1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief SI implementation of the TargetRegisterInfo class.
13 //===----------------------------------------------------------------------===//
15 #include "SIRegisterInfo.h"
16 #include "SIInstrInfo.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/RegisterScavenging.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
27 static cl::opt<bool> EnableSpillSGPRToSMEM(
28 "amdgpu-spill-sgpr-to-smem",
29 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
33 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
34 for (unsigned i = 0; PSets[i] != -1; ++i) {
35 if (PSets[i] == (int)PSetID)
41 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
42 BitVector &PressureSets) const {
43 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
44 const int *PSets = getRegUnitPressureSets(*U);
45 if (hasPressureSet(PSets, PSetID)) {
46 PressureSets.set(PSetID);
52 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
53 SGPRPressureSets(getNumRegPressureSets()),
54 VGPRPressureSets(getNumRegPressureSets()) {
55 unsigned NumRegPressureSets = getNumRegPressureSets();
57 SGPRSetID = NumRegPressureSets;
58 VGPRSetID = NumRegPressureSets;
60 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
61 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
62 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
65 // Determine the number of reg units for each pressure set.
66 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
67 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
68 const int *PSets = getRegUnitPressureSets(i);
69 for (unsigned j = 0; PSets[j] != -1; ++j) {
70 ++PressureSetRegUnits[PSets[j]];
74 unsigned VGPRMax = 0, SGPRMax = 0;
75 for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
78 VGPRMax = PressureSetRegUnits[i];
81 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
83 SGPRMax = PressureSetRegUnits[i];
87 assert(SGPRSetID < NumRegPressureSets &&
88 VGPRSetID < NumRegPressureSets);
91 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
92 MCRegAliasIterator R(Reg, this, true);
94 for (; R.isValid(); ++R)
98 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
99 const MachineFunction &MF) const {
100 unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
101 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
102 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
105 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
106 const MachineFunction &MF) const {
107 unsigned RegCount = getMaxNumSGPRs(MF);
110 // Try to place it in a hole after PrivateSegmentbufferReg.
112 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
113 // alignment constraints, so we have a hole where can put the wave offset.
116 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
117 // wave offset before it.
120 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
123 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
124 BitVector Reserved(getNumRegs());
125 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
127 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
128 // this seems likely to result in bugs, so I'm marking them as reserved.
129 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
130 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
132 // Reserve Trap Handler registers - support is not implemented in Codegen.
133 reserveRegisterTuples(Reserved, AMDGPU::TBA);
134 reserveRegisterTuples(Reserved, AMDGPU::TMA);
135 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
136 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
137 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
138 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
139 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
140 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
142 unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
143 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
144 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
145 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
146 reserveRegisterTuples(Reserved, Reg);
149 unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
150 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
151 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
152 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
153 reserveRegisterTuples(Reserved, Reg);
156 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
158 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
159 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
160 // Reserve 1 SGPR for scratch wave offset in case we need to spill.
161 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
164 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
165 if (ScratchRSrcReg != AMDGPU::NoRegister) {
166 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
168 // TODO: May need to reserve a VGPR if doing LDS spilling.
169 reserveRegisterTuples(Reserved, ScratchRSrcReg);
170 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
176 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
177 return Fn.getFrameInfo().hasStackObjects();
181 SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
182 return MF.getFrameInfo().hasStackObjects();
185 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
186 const MachineFunction &MF) const {
187 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
188 // create a virtual register for it during frame index elimination, so the
189 // scavenger is directly needed.
190 return MF.getFrameInfo().hasStackObjects() &&
191 MF.getSubtarget<SISubtarget>().hasScalarStores() &&
192 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
195 bool SIRegisterInfo::requiresVirtualBaseRegisters(
196 const MachineFunction &) const {
197 // There are no special dedicated stack or frame pointers.
201 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
202 // This helps catch bugs as verifier errors.
206 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
207 assert(SIInstrInfo::isMUBUF(*MI));
209 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
210 AMDGPU::OpName::offset);
211 return MI->getOperand(OffIdx).getImm();
214 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
216 if (!SIInstrInfo::isMUBUF(*MI))
219 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
220 AMDGPU::OpName::vaddr) &&
221 "Should never see frame index on non-address operand");
223 return getMUBUFInstrOffset(MI);
226 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
227 if (!MI->mayLoadOrStore())
230 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
232 return !isUInt<12>(FullOffset);
235 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
238 int64_t Offset) const {
239 MachineBasicBlock::iterator Ins = MBB->begin();
240 DebugLoc DL; // Defaults to "unknown"
242 if (Ins != MBB->end())
243 DL = Ins->getDebugLoc();
245 MachineFunction *MF = MBB->getParent();
246 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
247 const SIInstrInfo *TII = Subtarget.getInstrInfo();
250 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
251 .addFrameIndex(FrameIdx);
255 MachineRegisterInfo &MRI = MF->getRegInfo();
256 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
257 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
259 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
261 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
263 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
264 .addFrameIndex(FrameIdx);
266 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
267 .addReg(UnusedCarry, RegState::Define | RegState::Dead)
268 .addReg(OffsetReg, RegState::Kill)
272 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
273 int64_t Offset) const {
275 MachineBasicBlock *MBB = MI.getParent();
276 MachineFunction *MF = MBB->getParent();
277 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
278 const SIInstrInfo *TII = Subtarget.getInstrInfo();
281 // FIXME: Is it possible to be storing a frame index to itself?
283 for (const MachineOperand &MO: MI.operands()) {
286 llvm_unreachable("should not see multiple frame indices");
293 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
294 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
296 assert(TII->isMUBUF(MI));
298 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
299 int64_t NewOffset = OffsetOp->getImm() + Offset;
300 assert(isUInt<12>(NewOffset) && "offset should be legal");
302 FIOp->ChangeToRegister(BaseReg, false);
303 OffsetOp->setImm(NewOffset);
306 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
308 int64_t Offset) const {
309 if (!SIInstrInfo::isMUBUF(*MI))
312 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
314 return isUInt<12>(NewOffset);
317 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
318 const MachineFunction &MF, unsigned Kind) const {
319 // This is inaccurate. It depends on the instruction and address space. The
320 // only place where we should hit this is for dealing with frame indexes /
321 // private accesses, so this is correct in that case.
322 return &AMDGPU::VGPR_32RegClass;
325 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
328 case AMDGPU::SI_SPILL_S512_SAVE:
329 case AMDGPU::SI_SPILL_S512_RESTORE:
330 case AMDGPU::SI_SPILL_V512_SAVE:
331 case AMDGPU::SI_SPILL_V512_RESTORE:
333 case AMDGPU::SI_SPILL_S256_SAVE:
334 case AMDGPU::SI_SPILL_S256_RESTORE:
335 case AMDGPU::SI_SPILL_V256_SAVE:
336 case AMDGPU::SI_SPILL_V256_RESTORE:
338 case AMDGPU::SI_SPILL_S128_SAVE:
339 case AMDGPU::SI_SPILL_S128_RESTORE:
340 case AMDGPU::SI_SPILL_V128_SAVE:
341 case AMDGPU::SI_SPILL_V128_RESTORE:
343 case AMDGPU::SI_SPILL_V96_SAVE:
344 case AMDGPU::SI_SPILL_V96_RESTORE:
346 case AMDGPU::SI_SPILL_S64_SAVE:
347 case AMDGPU::SI_SPILL_S64_RESTORE:
348 case AMDGPU::SI_SPILL_V64_SAVE:
349 case AMDGPU::SI_SPILL_V64_RESTORE:
351 case AMDGPU::SI_SPILL_S32_SAVE:
352 case AMDGPU::SI_SPILL_S32_RESTORE:
353 case AMDGPU::SI_SPILL_V32_SAVE:
354 case AMDGPU::SI_SPILL_V32_RESTORE:
356 default: llvm_unreachable("Invalid spill opcode");
360 static int getOffsetMUBUFStore(unsigned Opc) {
362 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
363 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
364 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
365 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
366 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
367 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
368 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
369 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
370 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
371 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
377 static int getOffsetMUBUFLoad(unsigned Opc) {
379 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
380 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
381 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
382 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
383 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
384 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
385 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
386 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
387 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
388 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
389 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
390 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
391 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
392 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
398 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
399 // need to handle the case where an SGPR may need to be spilled while spilling.
400 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
401 MachineFrameInfo &MFI,
402 MachineBasicBlock::iterator MI,
405 MachineBasicBlock *MBB = MI->getParent();
406 const DebugLoc &DL = MI->getDebugLoc();
407 bool IsStore = MI->mayStore();
409 unsigned Opc = MI->getOpcode();
410 int LoadStoreOp = IsStore ?
411 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
412 if (LoadStoreOp == -1)
415 unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
417 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
418 .addReg(Reg, getDefRegState(!IsStore))
419 .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
420 .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
425 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
429 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
430 unsigned LoadStoreOp,
434 unsigned ScratchRsrcReg,
435 unsigned ScratchOffsetReg,
437 MachineMemOperand *MMO,
438 RegScavenger *RS) const {
439 MachineBasicBlock *MBB = MI->getParent();
440 MachineFunction *MF = MI->getParent()->getParent();
441 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
442 const SIInstrInfo *TII = ST.getInstrInfo();
443 const MachineFrameInfo &MFI = MF->getFrameInfo();
445 const MCInstrDesc &Desc = TII->get(LoadStoreOp);
446 const DebugLoc &DL = MI->getDebugLoc();
447 bool IsStore = Desc.mayStore();
449 bool RanOutOfSGPRs = false;
450 bool Scavenged = false;
451 unsigned SOffset = ScratchOffsetReg;
453 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
454 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
455 unsigned Size = NumSubRegs * 4;
456 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
457 const int64_t OriginalImmOffset = Offset;
459 unsigned Align = MFI.getObjectAlignment(Index);
460 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
462 if (!isUInt<12>(Offset + Size)) {
463 SOffset = AMDGPU::NoRegister;
465 // We don't have access to the register scavenger if this function is called
466 // during PEI::scavengeFrameVirtualRegs().
468 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
470 if (SOffset == AMDGPU::NoRegister) {
471 // There are no free SGPRs, and since we are in the process of spilling
472 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
473 // on SI/CI and on VI it is true until we implement spilling using scalar
474 // stores), we have no way to free up an SGPR. Our solution here is to
475 // add the offset directly to the ScratchOffset register, and then
476 // subtract the offset after the spill to return ScratchOffset to it's
478 RanOutOfSGPRs = true;
479 SOffset = ScratchOffsetReg;
484 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
485 .addReg(ScratchOffsetReg)
491 const unsigned EltSize = 4;
493 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
494 unsigned SubReg = NumSubRegs == 1 ?
495 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
497 unsigned SOffsetRegState = 0;
498 unsigned SrcDstRegState = getDefRegState(!IsStore);
500 SOffsetRegState |= getKillRegState(Scavenged);
501 // The last implicit use carries the "Kill" flag.
502 SrcDstRegState |= getKillRegState(IsKill);
505 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
506 MachineMemOperand *NewMMO
507 = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
508 EltSize, MinAlign(Align, EltSize * i));
510 auto MIB = BuildMI(*MBB, MI, DL, Desc)
511 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
512 .addReg(ScratchRsrcReg)
513 .addReg(SOffset, SOffsetRegState)
518 .addMemOperand(NewMMO);
521 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
525 // Subtract the offset we added to the ScratchOffset register.
526 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
527 .addReg(ScratchOffsetReg)
528 .addImm(OriginalImmOffset);
532 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
534 if (SuperRegSize % 16 == 0) {
535 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
536 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
539 if (SuperRegSize % 8 == 0) {
540 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
541 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
544 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
545 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
548 void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
550 RegScavenger *RS) const {
551 MachineBasicBlock *MBB = MI->getParent();
552 MachineFunction *MF = MBB->getParent();
553 MachineRegisterInfo &MRI = MF->getRegInfo();
554 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
555 const SIInstrInfo *TII = ST.getInstrInfo();
557 unsigned SuperReg = MI->getOperand(0).getReg();
558 bool IsKill = MI->getOperand(0).isKill();
559 const DebugLoc &DL = MI->getDebugLoc();
561 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
562 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
564 bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
566 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
568 unsigned OffsetReg = AMDGPU::M0;
569 unsigned M0CopyReg = AMDGPU::NoRegister;
572 if (RS->isRegUsed(AMDGPU::M0)) {
573 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
574 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
579 unsigned ScalarStoreOp;
580 unsigned EltSize = 4;
581 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
582 if (SpillToSMEM && isSGPRClass(RC)) {
583 // XXX - if private_element_size is larger than 4 it might be useful to be
584 // able to spill wider vmem spills.
585 std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
588 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
589 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
591 // SubReg carries the "Kill" flag when SubReg == SuperReg.
592 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
593 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
594 unsigned SubReg = NumSubRegs == 1 ?
595 SuperReg : getSubReg(SuperReg, SplitParts[i]);
598 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
600 // The allocated memory size is really the wavefront size * the frame
601 // index size. The widest register class is 64 bytes, so a 4-byte scratch
602 // allocation is enough to spill this in a single stack object.
604 // FIXME: Frame size/offsets are computed earlier than this, so the extra
605 // space is still unnecessarily allocated.
607 unsigned Align = FrameInfo.getObjectAlignment(Index);
608 MachinePointerInfo PtrInfo
609 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
610 MachineMemOperand *MMO
611 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
612 EltSize, MinAlign(Align, EltSize * i));
614 // SMEM instructions only support a single offset, so increment the wave
617 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
619 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
620 .addReg(MFI->getScratchWaveOffsetReg())
623 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
624 .addReg(MFI->getScratchWaveOffsetReg());
627 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
628 .addReg(SubReg, getKillRegState(IsKill)) // sdata
629 .addReg(MFI->getScratchRSrcReg()) // sbase
630 .addReg(OffsetReg, RegState::Kill) // soff
637 struct SIMachineFunctionInfo::SpilledReg Spill =
638 MFI->getSpilledReg(MF, Index, i);
639 if (Spill.hasReg()) {
640 BuildMI(*MBB, MI, DL,
641 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
643 .addReg(SubReg, getKillRegState(IsKill))
646 // FIXME: Since this spills to another register instead of an actual
647 // frame index, we should delete the frame index when all references to
650 // Spill SGPR to a frame index.
651 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
652 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
653 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
655 MachineInstrBuilder Mov
656 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
657 .addReg(SubReg, SubKillState);
660 // There could be undef components of a spilled super register.
661 // TODO: Can we detect this and skip the spill?
662 if (NumSubRegs > 1) {
663 // The last implicit use of the SuperReg carries the "Kill" flag.
664 unsigned SuperKillState = 0;
666 SuperKillState |= getKillRegState(IsKill);
667 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
670 unsigned Align = FrameInfo.getObjectAlignment(Index);
671 MachinePointerInfo PtrInfo
672 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
673 MachineMemOperand *MMO
674 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
675 EltSize, MinAlign(Align, EltSize * i));
676 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
677 .addReg(TmpReg, RegState::Kill) // src
678 .addFrameIndex(Index) // vaddr
679 .addReg(MFI->getScratchRSrcReg()) // srrsrc
680 .addReg(MFI->getScratchWaveOffsetReg()) // soffset
681 .addImm(i * 4) // offset
686 if (M0CopyReg != AMDGPU::NoRegister) {
687 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
688 .addReg(M0CopyReg, RegState::Kill);
691 MI->eraseFromParent();
692 MFI->addToSpilledSGPRs(NumSubRegs);
695 void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
697 RegScavenger *RS) const {
698 MachineFunction *MF = MI->getParent()->getParent();
699 MachineRegisterInfo &MRI = MF->getRegInfo();
700 MachineBasicBlock *MBB = MI->getParent();
701 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
702 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
703 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
704 const SIInstrInfo *TII = ST.getInstrInfo();
705 const DebugLoc &DL = MI->getDebugLoc();
707 unsigned SuperReg = MI->getOperand(0).getReg();
708 bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
710 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
712 unsigned OffsetReg = AMDGPU::M0;
713 unsigned M0CopyReg = AMDGPU::NoRegister;
716 if (RS->isRegUsed(AMDGPU::M0)) {
717 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
718 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
723 unsigned EltSize = 4;
724 unsigned ScalarLoadOp;
726 const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
727 if (SpillToSMEM && isSGPRClass(RC)) {
728 // XXX - if private_element_size is larger than 4 it might be useful to be
729 // able to spill wider vmem spills.
730 std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
733 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
734 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
736 // SubReg carries the "Kill" flag when SubReg == SuperReg.
737 int64_t FrOffset = FrameInfo.getObjectOffset(Index);
739 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
740 unsigned SubReg = NumSubRegs == 1 ?
741 SuperReg : getSubReg(SuperReg, SplitParts[i]);
744 // FIXME: Size may be > 4 but extra bytes wasted.
745 unsigned Align = FrameInfo.getObjectAlignment(Index);
746 MachinePointerInfo PtrInfo
747 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
748 MachineMemOperand *MMO
749 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
750 EltSize, MinAlign(Align, EltSize * i));
753 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
755 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
756 .addReg(MFI->getScratchWaveOffsetReg())
759 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
760 .addReg(MFI->getScratchWaveOffsetReg());
764 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
765 .addReg(MFI->getScratchRSrcReg()) // sbase
766 .addReg(OffsetReg, RegState::Kill) // soff
771 MIB.addReg(SuperReg, RegState::ImplicitDefine);
776 SIMachineFunctionInfo::SpilledReg Spill
777 = MFI->getSpilledReg(MF, Index, i);
779 if (Spill.hasReg()) {
781 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
787 MIB.addReg(SuperReg, RegState::ImplicitDefine);
789 // Restore SGPR from a stack slot.
790 // FIXME: We should use S_LOAD_DWORD here for VI.
791 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
792 unsigned Align = FrameInfo.getObjectAlignment(Index);
794 MachinePointerInfo PtrInfo
795 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
797 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
798 MachineMemOperand::MOLoad, EltSize,
799 MinAlign(Align, EltSize * i));
801 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
802 .addFrameIndex(Index) // vaddr
803 .addReg(MFI->getScratchRSrcReg()) // srsrc
804 .addReg(MFI->getScratchWaveOffsetReg()) // soffset
805 .addImm(i * 4) // offset
809 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
810 .addReg(TmpReg, RegState::Kill);
813 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
817 if (M0CopyReg != AMDGPU::NoRegister) {
818 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
819 .addReg(M0CopyReg, RegState::Kill);
822 MI->eraseFromParent();
825 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
826 int SPAdj, unsigned FIOperandNum,
827 RegScavenger *RS) const {
828 MachineFunction *MF = MI->getParent()->getParent();
829 MachineRegisterInfo &MRI = MF->getRegInfo();
830 MachineBasicBlock *MBB = MI->getParent();
831 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
832 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
833 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
834 const SIInstrInfo *TII = ST.getInstrInfo();
835 DebugLoc DL = MI->getDebugLoc();
837 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
838 int Index = MI->getOperand(FIOperandNum).getIndex();
840 switch (MI->getOpcode()) {
841 // SGPR register spill
842 case AMDGPU::SI_SPILL_S512_SAVE:
843 case AMDGPU::SI_SPILL_S256_SAVE:
844 case AMDGPU::SI_SPILL_S128_SAVE:
845 case AMDGPU::SI_SPILL_S64_SAVE:
846 case AMDGPU::SI_SPILL_S32_SAVE: {
847 spillSGPR(MI, Index, RS);
851 // SGPR register restore
852 case AMDGPU::SI_SPILL_S512_RESTORE:
853 case AMDGPU::SI_SPILL_S256_RESTORE:
854 case AMDGPU::SI_SPILL_S128_RESTORE:
855 case AMDGPU::SI_SPILL_S64_RESTORE:
856 case AMDGPU::SI_SPILL_S32_RESTORE: {
857 restoreSGPR(MI, Index, RS);
861 // VGPR register spill
862 case AMDGPU::SI_SPILL_V512_SAVE:
863 case AMDGPU::SI_SPILL_V256_SAVE:
864 case AMDGPU::SI_SPILL_V128_SAVE:
865 case AMDGPU::SI_SPILL_V96_SAVE:
866 case AMDGPU::SI_SPILL_V64_SAVE:
867 case AMDGPU::SI_SPILL_V32_SAVE: {
868 const MachineOperand *VData = TII->getNamedOperand(*MI,
869 AMDGPU::OpName::vdata);
870 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
872 VData->getReg(), VData->isKill(),
873 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
874 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
875 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
876 *MI->memoperands_begin(),
878 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
879 MI->eraseFromParent();
882 case AMDGPU::SI_SPILL_V32_RESTORE:
883 case AMDGPU::SI_SPILL_V64_RESTORE:
884 case AMDGPU::SI_SPILL_V96_RESTORE:
885 case AMDGPU::SI_SPILL_V128_RESTORE:
886 case AMDGPU::SI_SPILL_V256_RESTORE:
887 case AMDGPU::SI_SPILL_V512_RESTORE: {
888 const MachineOperand *VData = TII->getNamedOperand(*MI,
889 AMDGPU::OpName::vdata);
891 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
893 VData->getReg(), VData->isKill(),
894 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
895 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
896 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
897 *MI->memoperands_begin(),
899 MI->eraseFromParent();
904 if (TII->isMUBUF(*MI)) {
905 // Disable offen so we don't need a 0 vgpr base.
906 assert(static_cast<int>(FIOperandNum) ==
907 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
908 AMDGPU::OpName::vaddr));
910 int64_t Offset = FrameInfo.getObjectOffset(Index);
912 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
913 int64_t NewOffset = OldImm + Offset;
915 if (isUInt<12>(NewOffset) &&
916 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
917 MI->eraseFromParent();
922 int64_t Offset = FrameInfo.getObjectOffset(Index);
923 FIOp.ChangeToImmediate(Offset);
924 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
925 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
926 BuildMI(*MBB, MI, MI->getDebugLoc(),
927 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
929 FIOp.ChangeToRegister(TmpReg, false, false, true);
935 // FIXME: This is very slow. It might be worth creating a map from physreg to
937 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
938 assert(!TargetRegisterInfo::isVirtualRegister(Reg));
940 static const TargetRegisterClass *const BaseClasses[] = {
941 &AMDGPU::VGPR_32RegClass,
942 &AMDGPU::SReg_32RegClass,
943 &AMDGPU::VReg_64RegClass,
944 &AMDGPU::SReg_64RegClass,
945 &AMDGPU::VReg_96RegClass,
946 &AMDGPU::VReg_128RegClass,
947 &AMDGPU::SReg_128RegClass,
948 &AMDGPU::VReg_256RegClass,
949 &AMDGPU::SReg_256RegClass,
950 &AMDGPU::VReg_512RegClass,
951 &AMDGPU::SReg_512RegClass,
952 &AMDGPU::SCC_CLASSRegClass,
955 for (const TargetRegisterClass *BaseClass : BaseClasses) {
956 if (BaseClass->contains(Reg)) {
963 // TODO: It might be helpful to have some target specific flags in
964 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
965 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
966 switch (RC->getSize()) {
967 case 0: return false;
968 case 1: return false;
970 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
972 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
974 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
976 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
978 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
980 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
982 llvm_unreachable("Invalid register class size");
986 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
987 const TargetRegisterClass *SRC) const {
988 switch (SRC->getSize()) {
990 return &AMDGPU::VGPR_32RegClass;
992 return &AMDGPU::VReg_64RegClass;
994 return &AMDGPU::VReg_96RegClass;
996 return &AMDGPU::VReg_128RegClass;
998 return &AMDGPU::VReg_256RegClass;
1000 return &AMDGPU::VReg_512RegClass;
1002 llvm_unreachable("Invalid register class size");
1006 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1007 const TargetRegisterClass *VRC) const {
1008 switch (VRC->getSize()) {
1010 return &AMDGPU::SGPR_32RegClass;
1012 return &AMDGPU::SReg_64RegClass;
1014 return &AMDGPU::SReg_128RegClass;
1016 return &AMDGPU::SReg_256RegClass;
1018 return &AMDGPU::SReg_512RegClass;
1020 llvm_unreachable("Invalid register class size");
1024 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1025 const TargetRegisterClass *RC, unsigned SubIdx) const {
1026 if (SubIdx == AMDGPU::NoSubRegister)
1029 // We can assume that each lane corresponds to one 32-bit register.
1030 LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger();
1031 unsigned Count = countPopulation(Mask);
1032 if (isSGPRClass(RC)) {
1035 return &AMDGPU::SGPR_32RegClass;
1037 return &AMDGPU::SReg_64RegClass;
1039 return &AMDGPU::SReg_128RegClass;
1041 return &AMDGPU::SReg_256RegClass;
1042 case 16: /* fall-through */
1044 llvm_unreachable("Invalid sub-register class size");
1049 return &AMDGPU::VGPR_32RegClass;
1051 return &AMDGPU::VReg_64RegClass;
1053 return &AMDGPU::VReg_96RegClass;
1055 return &AMDGPU::VReg_128RegClass;
1057 return &AMDGPU::VReg_256RegClass;
1058 case 16: /* fall-through */
1060 llvm_unreachable("Invalid sub-register class size");
1065 bool SIRegisterInfo::shouldRewriteCopySrc(
1066 const TargetRegisterClass *DefRC,
1068 const TargetRegisterClass *SrcRC,
1069 unsigned SrcSubReg) const {
1070 // We want to prefer the smallest register class possible, so we don't want to
1071 // stop and rewrite on anything that looks like a subregister
1072 // extract. Operations mostly don't care about the super register class, so we
1073 // only want to stop on the most basic of copies between the same register
1076 // e.g. if we have something like
1079 // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
1080 // vreg3 = COPY vreg2, sub0
1082 // We want to look through the COPY to find:
1083 // => vreg3 = COPY vreg0
1086 return getCommonSubClass(DefRC, SrcRC) != nullptr;
1089 // FIXME: Most of these are flexible with HSA and we don't need to reserve them
1090 // as input registers if unused. Whether the dispatch ptr is necessary should be
1091 // easy to detect from used intrinsics. Scratch setup is harder to know.
1092 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
1093 enum PreloadedValue Value) const {
1095 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1096 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1099 case SIRegisterInfo::WORKGROUP_ID_X:
1100 assert(MFI->hasWorkGroupIDX());
1101 return MFI->WorkGroupIDXSystemSGPR;
1102 case SIRegisterInfo::WORKGROUP_ID_Y:
1103 assert(MFI->hasWorkGroupIDY());
1104 return MFI->WorkGroupIDYSystemSGPR;
1105 case SIRegisterInfo::WORKGROUP_ID_Z:
1106 assert(MFI->hasWorkGroupIDZ());
1107 return MFI->WorkGroupIDZSystemSGPR;
1108 case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
1109 return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
1110 case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
1111 if (ST.isAmdCodeObjectV2(MF)) {
1112 assert(MFI->hasPrivateSegmentBuffer());
1113 return MFI->PrivateSegmentBufferUserSGPR;
1115 assert(MFI->hasPrivateMemoryInputPtr());
1116 return MFI->PrivateMemoryPtrUserSGPR;
1117 case SIRegisterInfo::KERNARG_SEGMENT_PTR:
1118 assert(MFI->hasKernargSegmentPtr());
1119 return MFI->KernargSegmentPtrUserSGPR;
1120 case SIRegisterInfo::DISPATCH_ID:
1121 assert(MFI->hasDispatchID());
1122 return MFI->DispatchIDUserSGPR;
1123 case SIRegisterInfo::FLAT_SCRATCH_INIT:
1124 assert(MFI->hasFlatScratchInit());
1125 return MFI->FlatScratchInitUserSGPR;
1126 case SIRegisterInfo::DISPATCH_PTR:
1127 assert(MFI->hasDispatchPtr());
1128 return MFI->DispatchPtrUserSGPR;
1129 case SIRegisterInfo::QUEUE_PTR:
1130 assert(MFI->hasQueuePtr());
1131 return MFI->QueuePtrUserSGPR;
1132 case SIRegisterInfo::WORKITEM_ID_X:
1133 assert(MFI->hasWorkItemIDX());
1134 return AMDGPU::VGPR0;
1135 case SIRegisterInfo::WORKITEM_ID_Y:
1136 assert(MFI->hasWorkItemIDY());
1137 return AMDGPU::VGPR1;
1138 case SIRegisterInfo::WORKITEM_ID_Z:
1139 assert(MFI->hasWorkItemIDZ());
1140 return AMDGPU::VGPR2;
1142 llvm_unreachable("unexpected preloaded value type");
1145 /// \brief Returns a register that is not used at any point in the function.
1146 /// If all registers are used, then this function will return
1147 // AMDGPU::NoRegister.
1149 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1150 const TargetRegisterClass *RC,
1151 const MachineFunction &MF) const {
1153 for (unsigned Reg : *RC)
1154 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1156 return AMDGPU::NoRegister;
1159 unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
1160 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1165 unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
1166 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1171 unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
1172 const SIMachineFunctionInfo &MFI) const {
1173 if (MFI.hasFlatScratchInit()) {
1174 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1175 return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
1177 if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
1178 return 4; // FLAT_SCRATCH, VCC (in that order)
1181 if (ST.isXNACKEnabled())
1182 return 4; // XNACK, VCC (in that order)
1187 unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
1188 unsigned WavesPerEU) const {
1189 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1190 switch (WavesPerEU) {
1198 switch (WavesPerEU) {
1211 unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
1212 unsigned WavesPerEU,
1213 bool Addressable) const {
1214 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1215 switch (WavesPerEU) {
1220 default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
1223 switch (WavesPerEU) {
1231 default: return getNumAddressableSGPRs(ST);
1236 unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
1237 const Function &F = *MF.getFunction();
1239 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1240 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1242 // Compute maximum number of SGPRs function can use using default/requested
1243 // minimum number of waves per execution unit.
1244 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
1245 unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
1246 unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
1248 // Check if maximum number of SGPRs was explicitly requested using
1249 // "amdgpu-num-sgpr" attribute.
1250 if (F.hasFnAttribute("amdgpu-num-sgpr")) {
1251 unsigned Requested = AMDGPU::getIntegerAttribute(
1252 F, "amdgpu-num-sgpr", MaxNumSGPRs);
1254 // Make sure requested value does not violate subtarget's specifications.
1255 if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
1258 // If more SGPRs are required to support the input user/system SGPRs,
1259 // increase to accommodate them.
1261 // FIXME: This really ends up using the requested number of SGPRs + number
1262 // of reserved special registers in total. Theoretically you could re-use
1263 // the last input registers for these special registers, but this would
1264 // require a lot of complexity to deal with the weird aliasing.
1265 unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
1266 if (Requested && Requested < NumInputSGPRs)
1267 Requested = NumInputSGPRs;
1269 // Make sure requested value is compatible with values implied by
1270 // default/requested minimum/maximum number of waves per execution unit.
1271 if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
1273 if (WavesPerEU.second &&
1274 Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
1278 MaxNumSGPRs = Requested;
1281 if (ST.hasSGPRInitBug())
1282 MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
1284 return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
1285 MaxNumAddressableSGPRs);
1288 unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
1289 const SISubtarget &ST) const {
1290 if (ST.debuggerReserveRegs())
1295 unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
1296 switch (WavesPerEU) {
1307 default: return 129;
1311 unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
1312 switch (WavesPerEU) {
1323 default: return getTotalNumVGPRs();
1327 unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
1328 const Function &F = *MF.getFunction();
1330 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1331 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1333 // Compute maximum number of VGPRs function can use using default/requested
1334 // minimum number of waves per execution unit.
1335 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
1336 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
1338 // Check if maximum number of VGPRs was explicitly requested using
1339 // "amdgpu-num-vgpr" attribute.
1340 if (F.hasFnAttribute("amdgpu-num-vgpr")) {
1341 unsigned Requested = AMDGPU::getIntegerAttribute(
1342 F, "amdgpu-num-vgpr", MaxNumVGPRs);
1344 // Make sure requested value does not violate subtarget's specifications.
1345 if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
1348 // Make sure requested value is compatible with values implied by
1349 // default/requested minimum/maximum number of waves per execution unit.
1350 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
1352 if (WavesPerEU.second &&
1353 Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
1357 MaxNumVGPRs = Requested;
1360 return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
1363 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1364 unsigned EltSize) const {
1366 static const int16_t Sub0_15[] = {
1367 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1368 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1369 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1370 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1373 static const int16_t Sub0_7[] = {
1374 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1375 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1378 static const int16_t Sub0_3[] = {
1379 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1382 static const int16_t Sub0_2[] = {
1383 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1386 static const int16_t Sub0_1[] = {
1387 AMDGPU::sub0, AMDGPU::sub1,
1390 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1394 return makeArrayRef(Sub0_1);
1396 return makeArrayRef(Sub0_2);
1398 return makeArrayRef(Sub0_3);
1400 return makeArrayRef(Sub0_7);
1402 return makeArrayRef(Sub0_15);
1404 llvm_unreachable("unhandled register size");
1409 static const int16_t Sub0_15_64[] = {
1410 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1411 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1412 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1413 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1416 static const int16_t Sub0_7_64[] = {
1417 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1418 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1422 static const int16_t Sub0_3_64[] = {
1423 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1426 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1430 return makeArrayRef(Sub0_3_64);
1432 return makeArrayRef(Sub0_7_64);
1434 return makeArrayRef(Sub0_15_64);
1436 llvm_unreachable("unhandled register size");
1440 assert(EltSize == 16 && "unhandled register spill split size");
1442 static const int16_t Sub0_15_128[] = {
1443 AMDGPU::sub0_sub1_sub2_sub3,
1444 AMDGPU::sub4_sub5_sub6_sub7,
1445 AMDGPU::sub8_sub9_sub10_sub11,
1446 AMDGPU::sub12_sub13_sub14_sub15
1449 static const int16_t Sub0_7_128[] = {
1450 AMDGPU::sub0_sub1_sub2_sub3,
1451 AMDGPU::sub4_sub5_sub6_sub7
1454 switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1458 return makeArrayRef(Sub0_7_128);
1460 return makeArrayRef(Sub0_15_128);
1462 llvm_unreachable("unhandled register size");
1466 const TargetRegisterClass*
1467 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1468 unsigned Reg) const {
1469 if (TargetRegisterInfo::isVirtualRegister(Reg))
1470 return MRI.getRegClass(Reg);
1472 return getPhysRegClass(Reg);
1475 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1476 unsigned Reg) const {
1477 return hasVGPRs(getRegClassForReg(MRI, Reg));