1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// SI implementation of the TargetRegisterInfo class.
13 //===----------------------------------------------------------------------===//
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30 for (unsigned i = 0; PSets[i] != -1; ++i) {
31 if (PSets[i] == (int)PSetID)
37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38 BitVector &PressureSets) const {
39 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40 const int *PSets = getRegUnitPressureSets(*U);
41 if (hasPressureSet(PSets, PSetID)) {
42 PressureSets.set(PSetID);
48 static cl::opt<bool> EnableSpillSGPRToSMEM(
49 "amdgpu-spill-sgpr-to-smem",
50 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53 static cl::opt<bool> EnableSpillSGPRToVGPR(
54 "amdgpu-spill-sgpr-to-vgpr",
55 cl::desc("Enable spilling VGPRs to SGPRs"),
59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
61 SGPRPressureSets(getNumRegPressureSets()),
62 VGPRPressureSets(getNumRegPressureSets()),
63 SpillSGPRToVGPR(false),
64 SpillSGPRToSMEM(false) {
65 if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66 SpillSGPRToSMEM = true;
67 else if (EnableSpillSGPRToVGPR)
68 SpillSGPRToVGPR = true;
70 unsigned NumRegPressureSets = getNumRegPressureSets();
72 SGPRSetID = NumRegPressureSets;
73 VGPRSetID = NumRegPressureSets;
75 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
80 // Determine the number of reg units for each pressure set.
81 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83 const int *PSets = getRegUnitPressureSets(i);
84 for (unsigned j = 0; PSets[j] != -1; ++j) {
85 ++PressureSetRegUnits[PSets[j]];
89 unsigned VGPRMax = 0, SGPRMax = 0;
90 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
93 VGPRMax = PressureSetRegUnits[i];
96 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
98 SGPRMax = PressureSetRegUnits[i];
102 assert(SGPRSetID < NumRegPressureSets &&
103 VGPRSetID < NumRegPressureSets);
106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107 const MachineFunction &MF) const {
109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
118 // Try to place it in a hole after PrivateSegmentBufferReg.
120 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121 // alignment constraints, so we have a hole where can put the wave offset.
124 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125 // wave offset before it.
132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133 const MachineFunction &MF) const {
134 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135 unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
136 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140 const MachineFunction &MF) const {
141 return AMDGPU::SGPR32;
144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
145 BitVector Reserved(getNumRegs());
147 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148 // this seems likely to result in bugs, so I'm marking them as reserved.
149 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
152 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153 reserveRegisterTuples(Reserved, AMDGPU::M0);
155 // Reserve the memory aperture registers.
156 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
161 // Reserve xnack_mask registers - support is not implemented in Codegen.
162 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
164 // Reserve Trap Handler registers - support is not implemented in Codegen.
165 reserveRegisterTuples(Reserved, AMDGPU::TBA);
166 reserveRegisterTuples(Reserved, AMDGPU::TMA);
167 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
176 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
178 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182 reserveRegisterTuples(Reserved, Reg);
185 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189 reserveRegisterTuples(Reserved, Reg);
192 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
194 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196 // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
200 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201 if (ScratchRSrcReg != AMDGPU::NoRegister) {
202 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
204 // TODO: May need to reserve a VGPR if doing LDS spilling.
205 reserveRegisterTuples(Reserved, ScratchRSrcReg);
206 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
209 // We have to assume the SP is needed in case there are calls in the function,
210 // which is detected after the function is lowered. If we aren't really going
211 // to need SP, don't bother reserving it.
212 unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
214 if (StackPtrReg != AMDGPU::NoRegister) {
215 reserveRegisterTuples(Reserved, StackPtrReg);
216 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
219 unsigned FrameReg = MFI->getFrameOffsetReg();
220 if (FrameReg != AMDGPU::NoRegister) {
221 reserveRegisterTuples(Reserved, FrameReg);
222 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
229 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
230 if (Info->isEntryFunction()) {
231 const MachineFrameInfo &MFI = Fn.getFrameInfo();
232 return MFI.hasStackObjects() || MFI.hasCalls();
235 // May need scavenger for dealing with callee saved registers.
239 bool SIRegisterInfo::requiresFrameIndexScavenging(
240 const MachineFunction &MF) const {
241 const MachineFrameInfo &MFI = MF.getFrameInfo();
242 if (MFI.hasStackObjects())
245 // May need to deal with callee saved registers.
246 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
247 return !Info->isEntryFunction();
250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251 const MachineFunction &MF) const {
252 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253 // create a virtual register for it during frame index elimination, so the
254 // scavenger is directly needed.
255 return MF.getFrameInfo().hasStackObjects() &&
256 MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
261 const MachineFunction &) const {
262 // There are no special dedicated stack or frame pointers.
266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
267 // This helps catch bugs as verifier errors.
271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
272 assert(SIInstrInfo::isMUBUF(*MI));
274 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275 AMDGPU::OpName::offset);
276 return MI->getOperand(OffIdx).getImm();
279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
281 if (!SIInstrInfo::isMUBUF(*MI))
284 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
285 AMDGPU::OpName::vaddr) &&
286 "Should never see frame index on non-address operand");
288 return getMUBUFInstrOffset(MI);
291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
292 if (!MI->mayLoadOrStore())
295 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
297 return !isUInt<12>(FullOffset);
300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
303 int64_t Offset) const {
304 MachineBasicBlock::iterator Ins = MBB->begin();
305 DebugLoc DL; // Defaults to "unknown"
307 if (Ins != MBB->end())
308 DL = Ins->getDebugLoc();
310 MachineFunction *MF = MBB->getParent();
311 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312 const SIInstrInfo *TII = Subtarget.getInstrInfo();
315 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316 .addFrameIndex(FrameIdx);
320 MachineRegisterInfo &MRI = MF->getRegInfo();
321 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
323 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
325 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
327 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328 .addFrameIndex(FrameIdx);
330 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331 .addReg(OffsetReg, RegState::Kill)
335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
336 int64_t Offset) const {
338 MachineBasicBlock *MBB = MI.getParent();
339 MachineFunction *MF = MBB->getParent();
340 const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341 const SIInstrInfo *TII = Subtarget.getInstrInfo();
344 // FIXME: Is it possible to be storing a frame index to itself?
346 for (const MachineOperand &MO: MI.operands()) {
349 llvm_unreachable("should not see multiple frame indices");
356 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358 assert(TII->isMUBUF(MI));
359 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360 MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361 "should only be seeing frame offset relative FrameIndex");
364 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365 int64_t NewOffset = OffsetOp->getImm() + Offset;
366 assert(isUInt<12>(NewOffset) && "offset should be legal");
368 FIOp->ChangeToRegister(BaseReg, false);
369 OffsetOp->setImm(NewOffset);
372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
374 int64_t Offset) const {
375 if (!SIInstrInfo::isMUBUF(*MI))
378 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
380 return isUInt<12>(NewOffset);
383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
384 const MachineFunction &MF, unsigned Kind) const {
385 // This is inaccurate. It depends on the instruction and address space. The
386 // only place where we should hit this is for dealing with frame indexes /
387 // private accesses, so this is correct in that case.
388 return &AMDGPU::VGPR_32RegClass;
391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
394 case AMDGPU::SI_SPILL_S512_SAVE:
395 case AMDGPU::SI_SPILL_S512_RESTORE:
396 case AMDGPU::SI_SPILL_V512_SAVE:
397 case AMDGPU::SI_SPILL_V512_RESTORE:
399 case AMDGPU::SI_SPILL_S256_SAVE:
400 case AMDGPU::SI_SPILL_S256_RESTORE:
401 case AMDGPU::SI_SPILL_V256_SAVE:
402 case AMDGPU::SI_SPILL_V256_RESTORE:
404 case AMDGPU::SI_SPILL_S128_SAVE:
405 case AMDGPU::SI_SPILL_S128_RESTORE:
406 case AMDGPU::SI_SPILL_V128_SAVE:
407 case AMDGPU::SI_SPILL_V128_RESTORE:
409 case AMDGPU::SI_SPILL_V96_SAVE:
410 case AMDGPU::SI_SPILL_V96_RESTORE:
412 case AMDGPU::SI_SPILL_S64_SAVE:
413 case AMDGPU::SI_SPILL_S64_RESTORE:
414 case AMDGPU::SI_SPILL_V64_SAVE:
415 case AMDGPU::SI_SPILL_V64_RESTORE:
417 case AMDGPU::SI_SPILL_S32_SAVE:
418 case AMDGPU::SI_SPILL_S32_RESTORE:
419 case AMDGPU::SI_SPILL_V32_SAVE:
420 case AMDGPU::SI_SPILL_V32_RESTORE:
422 default: llvm_unreachable("Invalid spill opcode");
426 static int getOffsetMUBUFStore(unsigned Opc) {
428 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
447 static int getOffsetMUBUFLoad(unsigned Opc) {
449 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
483 MachineFrameInfo &MFI,
484 MachineBasicBlock::iterator MI,
487 MachineBasicBlock *MBB = MI->getParent();
488 const DebugLoc &DL = MI->getDebugLoc();
489 bool IsStore = MI->mayStore();
491 unsigned Opc = MI->getOpcode();
492 int LoadStoreOp = IsStore ?
493 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
494 if (LoadStoreOp == -1)
497 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498 MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
500 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
501 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
506 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
508 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
509 AMDGPU::OpName::vdata_in);
515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
516 unsigned LoadStoreOp,
520 unsigned ScratchRsrcReg,
521 unsigned ScratchOffsetReg,
523 MachineMemOperand *MMO,
524 RegScavenger *RS) const {
525 MachineBasicBlock *MBB = MI->getParent();
526 MachineFunction *MF = MI->getParent()->getParent();
527 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
528 const SIInstrInfo *TII = ST.getInstrInfo();
529 const MachineFrameInfo &MFI = MF->getFrameInfo();
531 const MCInstrDesc &Desc = TII->get(LoadStoreOp);
532 const DebugLoc &DL = MI->getDebugLoc();
533 bool IsStore = Desc.mayStore();
535 bool Scavenged = false;
536 unsigned SOffset = ScratchOffsetReg;
538 const unsigned EltSize = 4;
539 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
540 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
541 unsigned Size = NumSubRegs * EltSize;
542 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
543 int64_t ScratchOffsetRegDelta = 0;
545 unsigned Align = MFI.getObjectAlignment(Index);
546 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
548 assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
550 if (!isUInt<12>(Offset + Size - EltSize)) {
551 SOffset = AMDGPU::NoRegister;
553 // We currently only support spilling VGPRs to EltSize boundaries, meaning
554 // we can simplify the adjustment of Offset here to just scale with
556 Offset *= ST.getWavefrontSize();
558 // We don't have access to the register scavenger if this function is called
559 // during PEI::scavengeFrameVirtualRegs().
561 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
563 if (SOffset == AMDGPU::NoRegister) {
564 // There are no free SGPRs, and since we are in the process of spilling
565 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
566 // on SI/CI and on VI it is true until we implement spilling using scalar
567 // stores), we have no way to free up an SGPR. Our solution here is to
568 // add the offset directly to the ScratchOffset register, and then
569 // subtract the offset after the spill to return ScratchOffset to it's
571 SOffset = ScratchOffsetReg;
572 ScratchOffsetRegDelta = Offset;
577 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
578 .addReg(ScratchOffsetReg)
584 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
585 unsigned SubReg = NumSubRegs == 1 ?
586 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
588 unsigned SOffsetRegState = 0;
589 unsigned SrcDstRegState = getDefRegState(!IsStore);
591 SOffsetRegState |= getKillRegState(Scavenged);
592 // The last implicit use carries the "Kill" flag.
593 SrcDstRegState |= getKillRegState(IsKill);
596 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
597 MachineMemOperand *NewMMO
598 = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
599 EltSize, MinAlign(Align, EltSize * i));
601 auto MIB = BuildMI(*MBB, MI, DL, Desc)
602 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
603 .addReg(ScratchRsrcReg)
604 .addReg(SOffset, SOffsetRegState)
609 .addMemOperand(NewMMO);
612 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
615 if (ScratchOffsetRegDelta != 0) {
616 // Subtract the offset we added to the ScratchOffset register.
617 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
618 .addReg(ScratchOffsetReg)
619 .addImm(ScratchOffsetRegDelta);
623 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
625 if (SuperRegSize % 16 == 0) {
626 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
627 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
630 if (SuperRegSize % 8 == 0) {
631 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
632 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
635 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
636 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
639 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
642 bool OnlyToVGPR) const {
643 MachineBasicBlock *MBB = MI->getParent();
644 MachineFunction *MF = MBB->getParent();
645 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
646 DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
648 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
649 = MFI->getSGPRToVGPRSpills(Index);
650 bool SpillToVGPR = !VGPRSpills.empty();
651 if (OnlyToVGPR && !SpillToVGPR)
654 MachineRegisterInfo &MRI = MF->getRegInfo();
655 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
656 const SIInstrInfo *TII = ST.getInstrInfo();
658 unsigned SuperReg = MI->getOperand(0).getReg();
659 bool IsKill = MI->getOperand(0).isKill();
660 const DebugLoc &DL = MI->getDebugLoc();
662 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
664 bool SpillToSMEM = spillSGPRToSMEM();
665 if (SpillToSMEM && OnlyToVGPR)
668 assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
669 SuperReg != MFI->getFrameOffsetReg() &&
670 SuperReg != MFI->getScratchWaveOffsetReg()));
672 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
674 unsigned OffsetReg = AMDGPU::M0;
675 unsigned M0CopyReg = AMDGPU::NoRegister;
678 if (RS->isRegUsed(AMDGPU::M0)) {
679 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
680 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
685 unsigned ScalarStoreOp;
686 unsigned EltSize = 4;
687 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
688 if (SpillToSMEM && isSGPRClass(RC)) {
689 // XXX - if private_element_size is larger than 4 it might be useful to be
690 // able to spill wider vmem spills.
691 std::tie(EltSize, ScalarStoreOp) =
692 getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
695 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
696 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
698 // SubReg carries the "Kill" flag when SubReg == SuperReg.
699 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
700 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
701 unsigned SubReg = NumSubRegs == 1 ?
702 SuperReg : getSubReg(SuperReg, SplitParts[i]);
705 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
707 // The allocated memory size is really the wavefront size * the frame
708 // index size. The widest register class is 64 bytes, so a 4-byte scratch
709 // allocation is enough to spill this in a single stack object.
711 // FIXME: Frame size/offsets are computed earlier than this, so the extra
712 // space is still unnecessarily allocated.
714 unsigned Align = FrameInfo.getObjectAlignment(Index);
715 MachinePointerInfo PtrInfo
716 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
717 MachineMemOperand *MMO
718 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
719 EltSize, MinAlign(Align, EltSize * i));
721 // SMEM instructions only support a single offset, so increment the wave
724 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
726 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
727 .addReg(MFI->getFrameOffsetReg())
730 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
731 .addReg(MFI->getFrameOffsetReg());
734 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
735 .addReg(SubReg, getKillRegState(IsKill)) // sdata
736 .addReg(MFI->getScratchRSrcReg()) // sbase
737 .addReg(OffsetReg, RegState::Kill) // soff
745 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
747 // During SGPR spilling to VGPR, determine if the VGPR is defined. The
748 // only circumstance in which we say it is undefined is when it is the
749 // first spill to this VGPR in the first basic block.
750 bool VGPRDefined = true;
751 if (MBB == &MF->front())
752 VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
754 // Mark the "old value of vgpr" input undef only if this is the first sgpr
755 // spill to this specific vgpr in the first basic block.
756 BuildMI(*MBB, MI, DL,
757 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
759 .addReg(SubReg, getKillRegState(IsKill))
761 .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
763 // FIXME: Since this spills to another register instead of an actual
764 // frame index, we should delete the frame index when all references to
767 // XXX - Can to VGPR spill fail for some subregisters but not others?
771 // Spill SGPR to a frame index.
772 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
773 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
774 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
776 MachineInstrBuilder Mov
777 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
778 .addReg(SubReg, SubKillState);
781 // There could be undef components of a spilled super register.
782 // TODO: Can we detect this and skip the spill?
783 if (NumSubRegs > 1) {
784 // The last implicit use of the SuperReg carries the "Kill" flag.
785 unsigned SuperKillState = 0;
787 SuperKillState |= getKillRegState(IsKill);
788 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
791 unsigned Align = FrameInfo.getObjectAlignment(Index);
792 MachinePointerInfo PtrInfo
793 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
794 MachineMemOperand *MMO
795 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
796 EltSize, MinAlign(Align, EltSize * i));
797 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
798 .addReg(TmpReg, RegState::Kill) // src
799 .addFrameIndex(Index) // vaddr
800 .addReg(MFI->getScratchRSrcReg()) // srrsrc
801 .addReg(MFI->getFrameOffsetReg()) // soffset
802 .addImm(i * 4) // offset
807 if (M0CopyReg != AMDGPU::NoRegister) {
808 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
809 .addReg(M0CopyReg, RegState::Kill);
812 MI->eraseFromParent();
813 MFI->addToSpilledSGPRs(NumSubRegs);
817 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
820 bool OnlyToVGPR) const {
821 MachineFunction *MF = MI->getParent()->getParent();
822 MachineRegisterInfo &MRI = MF->getRegInfo();
823 MachineBasicBlock *MBB = MI->getParent();
824 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
826 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
827 = MFI->getSGPRToVGPRSpills(Index);
828 bool SpillToVGPR = !VGPRSpills.empty();
829 if (OnlyToVGPR && !SpillToVGPR)
832 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
833 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
834 const SIInstrInfo *TII = ST.getInstrInfo();
835 const DebugLoc &DL = MI->getDebugLoc();
837 unsigned SuperReg = MI->getOperand(0).getReg();
838 bool SpillToSMEM = spillSGPRToSMEM();
839 if (SpillToSMEM && OnlyToVGPR)
842 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
844 unsigned OffsetReg = AMDGPU::M0;
845 unsigned M0CopyReg = AMDGPU::NoRegister;
848 if (RS->isRegUsed(AMDGPU::M0)) {
849 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
850 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
855 unsigned EltSize = 4;
856 unsigned ScalarLoadOp;
858 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
859 if (SpillToSMEM && isSGPRClass(RC)) {
860 // XXX - if private_element_size is larger than 4 it might be useful to be
861 // able to spill wider vmem spills.
862 std::tie(EltSize, ScalarLoadOp) =
863 getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
866 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
867 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
869 // SubReg carries the "Kill" flag when SubReg == SuperReg.
870 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
872 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
873 unsigned SubReg = NumSubRegs == 1 ?
874 SuperReg : getSubReg(SuperReg, SplitParts[i]);
877 // FIXME: Size may be > 4 but extra bytes wasted.
878 unsigned Align = FrameInfo.getObjectAlignment(Index);
879 MachinePointerInfo PtrInfo
880 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
881 MachineMemOperand *MMO
882 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
883 EltSize, MinAlign(Align, EltSize * i));
886 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
888 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
889 .addReg(MFI->getFrameOffsetReg())
892 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
893 .addReg(MFI->getFrameOffsetReg());
897 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
898 .addReg(MFI->getScratchRSrcReg()) // sbase
899 .addReg(OffsetReg, RegState::Kill) // soff
904 MIB.addReg(SuperReg, RegState::ImplicitDefine);
910 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
912 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
918 MIB.addReg(SuperReg, RegState::ImplicitDefine);
923 // Restore SGPR from a stack slot.
924 // FIXME: We should use S_LOAD_DWORD here for VI.
925 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
926 unsigned Align = FrameInfo.getObjectAlignment(Index);
928 MachinePointerInfo PtrInfo
929 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
931 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
932 MachineMemOperand::MOLoad, EltSize,
933 MinAlign(Align, EltSize * i));
935 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
936 .addFrameIndex(Index) // vaddr
937 .addReg(MFI->getScratchRSrcReg()) // srsrc
938 .addReg(MFI->getFrameOffsetReg()) // soffset
939 .addImm(i * 4) // offset
943 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
944 .addReg(TmpReg, RegState::Kill);
947 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
951 if (M0CopyReg != AMDGPU::NoRegister) {
952 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
953 .addReg(M0CopyReg, RegState::Kill);
956 MI->eraseFromParent();
960 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
961 /// a VGPR and the stack slot can be safely eliminated when all other users are
963 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
964 MachineBasicBlock::iterator MI,
966 RegScavenger *RS) const {
967 switch (MI->getOpcode()) {
968 case AMDGPU::SI_SPILL_S512_SAVE:
969 case AMDGPU::SI_SPILL_S256_SAVE:
970 case AMDGPU::SI_SPILL_S128_SAVE:
971 case AMDGPU::SI_SPILL_S64_SAVE:
972 case AMDGPU::SI_SPILL_S32_SAVE:
973 return spillSGPR(MI, FI, RS, true);
974 case AMDGPU::SI_SPILL_S512_RESTORE:
975 case AMDGPU::SI_SPILL_S256_RESTORE:
976 case AMDGPU::SI_SPILL_S128_RESTORE:
977 case AMDGPU::SI_SPILL_S64_RESTORE:
978 case AMDGPU::SI_SPILL_S32_RESTORE:
979 return restoreSGPR(MI, FI, RS, true);
981 llvm_unreachable("not an SGPR spill instruction");
985 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
986 int SPAdj, unsigned FIOperandNum,
987 RegScavenger *RS) const {
988 MachineFunction *MF = MI->getParent()->getParent();
989 MachineRegisterInfo &MRI = MF->getRegInfo();
990 MachineBasicBlock *MBB = MI->getParent();
991 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
992 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
993 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
994 const SIInstrInfo *TII = ST.getInstrInfo();
995 DebugLoc DL = MI->getDebugLoc();
997 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
998 int Index = MI->getOperand(FIOperandNum).getIndex();
1000 switch (MI->getOpcode()) {
1001 // SGPR register spill
1002 case AMDGPU::SI_SPILL_S512_SAVE:
1003 case AMDGPU::SI_SPILL_S256_SAVE:
1004 case AMDGPU::SI_SPILL_S128_SAVE:
1005 case AMDGPU::SI_SPILL_S64_SAVE:
1006 case AMDGPU::SI_SPILL_S32_SAVE: {
1007 spillSGPR(MI, Index, RS);
1011 // SGPR register restore
1012 case AMDGPU::SI_SPILL_S512_RESTORE:
1013 case AMDGPU::SI_SPILL_S256_RESTORE:
1014 case AMDGPU::SI_SPILL_S128_RESTORE:
1015 case AMDGPU::SI_SPILL_S64_RESTORE:
1016 case AMDGPU::SI_SPILL_S32_RESTORE: {
1017 restoreSGPR(MI, Index, RS);
1021 // VGPR register spill
1022 case AMDGPU::SI_SPILL_V512_SAVE:
1023 case AMDGPU::SI_SPILL_V256_SAVE:
1024 case AMDGPU::SI_SPILL_V128_SAVE:
1025 case AMDGPU::SI_SPILL_V96_SAVE:
1026 case AMDGPU::SI_SPILL_V64_SAVE:
1027 case AMDGPU::SI_SPILL_V32_SAVE: {
1028 const MachineOperand *VData = TII->getNamedOperand(*MI,
1029 AMDGPU::OpName::vdata);
1030 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1032 VData->getReg(), VData->isKill(),
1033 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1034 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1035 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1036 *MI->memoperands_begin(),
1038 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1039 MI->eraseFromParent();
1042 case AMDGPU::SI_SPILL_V32_RESTORE:
1043 case AMDGPU::SI_SPILL_V64_RESTORE:
1044 case AMDGPU::SI_SPILL_V96_RESTORE:
1045 case AMDGPU::SI_SPILL_V128_RESTORE:
1046 case AMDGPU::SI_SPILL_V256_RESTORE:
1047 case AMDGPU::SI_SPILL_V512_RESTORE: {
1048 const MachineOperand *VData = TII->getNamedOperand(*MI,
1049 AMDGPU::OpName::vdata);
1051 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1053 VData->getReg(), VData->isKill(),
1054 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1055 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1056 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1057 *MI->memoperands_begin(),
1059 MI->eraseFromParent();
1064 const DebugLoc &DL = MI->getDebugLoc();
1065 bool IsMUBUF = TII->isMUBUF(*MI);
1068 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1069 // Convert to an absolute stack address by finding the offset from the
1070 // scratch wave base and scaling by the wave size.
1072 // In an entry function/kernel the stack address is already the
1073 // absolute address relative to the scratch wave offset.
1076 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1078 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1079 unsigned ResultReg = IsCopy ?
1080 MI->getOperand(0).getReg() :
1081 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1083 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1084 .addReg(MFI->getFrameOffsetReg())
1085 .addReg(MFI->getScratchWaveOffsetReg());
1087 int64_t Offset = FrameInfo.getObjectOffset(Index);
1089 // XXX - This never happens because of emergency scavenging slot at 0?
1090 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1091 .addImm(Log2_32(ST.getWavefrontSize()))
1095 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1097 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1098 .addImm(Log2_32(ST.getWavefrontSize()))
1099 .addReg(DiffReg, RegState::Kill);
1101 // TODO: Fold if use instruction is another add of a constant.
1102 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1103 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1105 .addReg(ScaledReg, RegState::Kill);
1107 unsigned ConstOffsetReg
1108 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1110 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1112 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1113 .addReg(ConstOffsetReg, RegState::Kill)
1114 .addReg(ScaledReg, RegState::Kill);
1118 // Don't introduce an extra copy if we're just materializing in a mov.
1120 MI->eraseFromParent();
1122 FIOp.ChangeToRegister(ResultReg, false, false, true);
1127 // Disable offen so we don't need a 0 vgpr base.
1128 assert(static_cast<int>(FIOperandNum) ==
1129 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1130 AMDGPU::OpName::vaddr));
1132 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1133 == MFI->getFrameOffsetReg());
1135 int64_t Offset = FrameInfo.getObjectOffset(Index);
1137 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1138 int64_t NewOffset = OldImm + Offset;
1140 if (isUInt<12>(NewOffset) &&
1141 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1142 MI->eraseFromParent();
1147 // If the offset is simply too big, don't convert to a scratch wave offset
1150 int64_t Offset = FrameInfo.getObjectOffset(Index);
1151 FIOp.ChangeToImmediate(Offset);
1152 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1153 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1154 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1156 FIOp.ChangeToRegister(TmpReg, false, false, true);
1162 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1163 #define AMDGPU_REG_ASM_NAMES
1164 #include "AMDGPURegAsmNames.inc.cpp"
1166 #define REG_RANGE(BeginReg, EndReg, RegTable) \
1167 if (Reg >= BeginReg && Reg <= EndReg) { \
1168 unsigned Index = Reg - BeginReg; \
1169 assert(Index < array_lengthof(RegTable)); \
1170 return RegTable[Index]; \
1173 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1174 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1175 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1176 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1177 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1180 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1181 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1183 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1184 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1187 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1188 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1192 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1193 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1196 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1197 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1201 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1202 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1208 // FIXME: Rename flat_scr so we don't need to special case this.
1210 case AMDGPU::FLAT_SCR:
1211 return "flat_scratch";
1212 case AMDGPU::FLAT_SCR_LO:
1213 return "flat_scratch_lo";
1214 case AMDGPU::FLAT_SCR_HI:
1215 return "flat_scratch_hi";
1217 // For the special named registers the default is fine.
1218 return TargetRegisterInfo::getRegAsmName(Reg);
1222 // FIXME: This is very slow. It might be worth creating a map from physreg to
1224 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1225 assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1227 static const TargetRegisterClass *const BaseClasses[] = {
1228 &AMDGPU::VGPR_32RegClass,
1229 &AMDGPU::SReg_32RegClass,
1230 &AMDGPU::VReg_64RegClass,
1231 &AMDGPU::SReg_64RegClass,
1232 &AMDGPU::VReg_96RegClass,
1233 &AMDGPU::VReg_128RegClass,
1234 &AMDGPU::SReg_128RegClass,
1235 &AMDGPU::VReg_256RegClass,
1236 &AMDGPU::SReg_256RegClass,
1237 &AMDGPU::VReg_512RegClass,
1238 &AMDGPU::SReg_512RegClass,
1239 &AMDGPU::SCC_CLASSRegClass,
1240 &AMDGPU::Pseudo_SReg_32RegClass,
1241 &AMDGPU::Pseudo_SReg_128RegClass,
1244 for (const TargetRegisterClass *BaseClass : BaseClasses) {
1245 if (BaseClass->contains(Reg)) {
1252 // TODO: It might be helpful to have some target specific flags in
1253 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1254 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1255 unsigned Size = getRegSizeInBits(*RC);
1260 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1262 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1264 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1266 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1268 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1270 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1272 llvm_unreachable("Invalid register class size");
1276 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1277 const TargetRegisterClass *SRC) const {
1278 switch (getRegSizeInBits(*SRC)) {
1280 return &AMDGPU::VGPR_32RegClass;
1282 return &AMDGPU::VReg_64RegClass;
1284 return &AMDGPU::VReg_96RegClass;
1286 return &AMDGPU::VReg_128RegClass;
1288 return &AMDGPU::VReg_256RegClass;
1290 return &AMDGPU::VReg_512RegClass;
1292 llvm_unreachable("Invalid register class size");
1296 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1297 const TargetRegisterClass *VRC) const {
1298 switch (getRegSizeInBits(*VRC)) {
1300 return &AMDGPU::SGPR_32RegClass;
1302 return &AMDGPU::SReg_64RegClass;
1304 return &AMDGPU::SReg_128RegClass;
1306 return &AMDGPU::SReg_256RegClass;
1308 return &AMDGPU::SReg_512RegClass;
1310 llvm_unreachable("Invalid register class size");
1314 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1315 const TargetRegisterClass *RC, unsigned SubIdx) const {
1316 if (SubIdx == AMDGPU::NoSubRegister)
1319 // We can assume that each lane corresponds to one 32-bit register.
1320 unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1321 if (isSGPRClass(RC)) {
1324 return &AMDGPU::SGPR_32RegClass;
1326 return &AMDGPU::SReg_64RegClass;
1328 return &AMDGPU::SReg_128RegClass;
1330 return &AMDGPU::SReg_256RegClass;
1331 case 16: /* fall-through */
1333 llvm_unreachable("Invalid sub-register class size");
1338 return &AMDGPU::VGPR_32RegClass;
1340 return &AMDGPU::VReg_64RegClass;
1342 return &AMDGPU::VReg_96RegClass;
1344 return &AMDGPU::VReg_128RegClass;
1346 return &AMDGPU::VReg_256RegClass;
1347 case 16: /* fall-through */
1349 llvm_unreachable("Invalid sub-register class size");
1354 bool SIRegisterInfo::shouldRewriteCopySrc(
1355 const TargetRegisterClass *DefRC,
1357 const TargetRegisterClass *SrcRC,
1358 unsigned SrcSubReg) const {
1359 // We want to prefer the smallest register class possible, so we don't want to
1360 // stop and rewrite on anything that looks like a subregister
1361 // extract. Operations mostly don't care about the super register class, so we
1362 // only want to stop on the most basic of copies between the same register
1365 // e.g. if we have something like
1368 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1369 // %3 = COPY %2, sub0
1371 // We want to look through the COPY to find:
1375 return getCommonSubClass(DefRC, SrcRC) != nullptr;
1378 /// Returns a register that is not used at any point in the function.
1379 /// If all registers are used, then this function will return
1380 // AMDGPU::NoRegister.
1382 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1383 const TargetRegisterClass *RC,
1384 const MachineFunction &MF) const {
1386 for (unsigned Reg : *RC)
1387 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1389 return AMDGPU::NoRegister;
1392 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1393 unsigned EltSize) const {
1395 static const int16_t Sub0_15[] = {
1396 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1397 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1398 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1399 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1402 static const int16_t Sub0_7[] = {
1403 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1404 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1407 static const int16_t Sub0_3[] = {
1408 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1411 static const int16_t Sub0_2[] = {
1412 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1415 static const int16_t Sub0_1[] = {
1416 AMDGPU::sub0, AMDGPU::sub1,
1419 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1423 return makeArrayRef(Sub0_1);
1425 return makeArrayRef(Sub0_2);
1427 return makeArrayRef(Sub0_3);
1429 return makeArrayRef(Sub0_7);
1431 return makeArrayRef(Sub0_15);
1433 llvm_unreachable("unhandled register size");
1438 static const int16_t Sub0_15_64[] = {
1439 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1440 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1441 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1442 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1445 static const int16_t Sub0_7_64[] = {
1446 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1447 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1451 static const int16_t Sub0_3_64[] = {
1452 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1455 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1459 return makeArrayRef(Sub0_3_64);
1461 return makeArrayRef(Sub0_7_64);
1463 return makeArrayRef(Sub0_15_64);
1465 llvm_unreachable("unhandled register size");
1469 assert(EltSize == 16 && "unhandled register spill split size");
1471 static const int16_t Sub0_15_128[] = {
1472 AMDGPU::sub0_sub1_sub2_sub3,
1473 AMDGPU::sub4_sub5_sub6_sub7,
1474 AMDGPU::sub8_sub9_sub10_sub11,
1475 AMDGPU::sub12_sub13_sub14_sub15
1478 static const int16_t Sub0_7_128[] = {
1479 AMDGPU::sub0_sub1_sub2_sub3,
1480 AMDGPU::sub4_sub5_sub6_sub7
1483 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1487 return makeArrayRef(Sub0_7_128);
1489 return makeArrayRef(Sub0_15_128);
1491 llvm_unreachable("unhandled register size");
1495 const TargetRegisterClass*
1496 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1497 unsigned Reg) const {
1498 if (TargetRegisterInfo::isVirtualRegister(Reg))
1499 return MRI.getRegClass(Reg);
1501 return getPhysRegClass(Reg);
1504 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1505 unsigned Reg) const {
1506 const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1507 assert(RC && "Register class for the reg not found");
1508 return hasVGPRs(RC);
1511 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1512 const TargetRegisterClass *SrcRC,
1514 const TargetRegisterClass *DstRC,
1516 const TargetRegisterClass *NewRC,
1517 LiveIntervals &LIS) const {
1518 unsigned SrcSize = getRegSizeInBits(*SrcRC);
1519 unsigned DstSize = getRegSizeInBits(*DstRC);
1520 unsigned NewSize = getRegSizeInBits(*NewRC);
1522 // Do not increase size of registers beyond dword, we would need to allocate
1523 // adjacent registers and constraint regalloc more than needed.
1525 // Always allow dword coalescing.
1526 if (SrcSize <= 32 || DstSize <= 32)
1529 return NewSize <= DstSize || NewSize <= SrcSize;
1532 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1533 MachineFunction &MF) const {
1535 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1536 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1538 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1540 switch (RC->getID()) {
1542 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1543 case AMDGPU::VGPR_32RegClassID:
1544 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1545 case AMDGPU::SGPR_32RegClassID:
1546 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1550 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1551 unsigned Idx) const {
1552 if (Idx == getVGPRPressureSet())
1553 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1554 const_cast<MachineFunction &>(MF));
1556 if (Idx == getSGPRPressureSet())
1557 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1558 const_cast<MachineFunction &>(MF));
1560 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1563 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1564 static const int Empty[] = { -1 };
1566 if (hasRegUnit(AMDGPU::M0, RegUnit))
1568 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1571 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1572 // Not a callee saved register.
1573 return AMDGPU::SGPR30_SGPR31;
1576 const TargetRegisterClass *
1577 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1578 const MachineRegisterInfo &MRI) const {
1579 unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1580 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1586 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1587 &AMDGPU::SReg_32_XM0RegClass;
1589 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1590 &AMDGPU::SReg_64_XEXECRegClass;
1592 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1595 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1596 &AMDGPU::SReg_128RegClass;
1598 llvm_unreachable("not implemented");