1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Any MIMG instructions that use tfe or lwe require an initialization of the
12 /// result register that will be written in the case of a memory access failure
13 /// The required code is also added to tie this init code to the result of the
16 //===----------------------------------------------------------------------===//
20 #include "AMDGPUSubtarget.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIInstrInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Target/TargetMachine.h"
30 #define DEBUG_TYPE "si-img-init"
36 class SIAddIMGInit : public MachineFunctionPass {
41 SIAddIMGInit() : MachineFunctionPass(ID) {
42 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
45 bool runOnMachineFunction(MachineFunction &MF) override;
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
49 MachineFunctionPass::getAnalysisUsage(AU);
53 } // End anonymous namespace.
55 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
57 char SIAddIMGInit::ID = 0;
59 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
61 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
63 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
64 MachineRegisterInfo &MRI = MF.getRegInfo();
65 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
66 const SIInstrInfo *TII = ST.getInstrInfo();
67 const SIRegisterInfo *RI = ST.getRegisterInfo();
70 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
72 MachineBasicBlock &MBB = *BI;
73 MachineBasicBlock::iterator I, Next;
74 for (I = MBB.begin(); I != MBB.end(); I = Next) {
76 MachineInstr &MI = *I;
78 auto Opcode = MI.getOpcode();
79 if (TII->isMIMG(Opcode) && !MI.mayStore()) {
80 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
81 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
82 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
84 // Check for instructions that don't have tfe or lwe fields
85 // There shouldn't be any at this point.
86 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
88 unsigned TFEVal = TFE->getImm();
89 unsigned LWEVal = LWE->getImm();
90 unsigned D16Val = D16 ? D16->getImm() : 0;
92 if (TFEVal || LWEVal) {
93 // At least one of TFE or LWE are non-zero
94 // We have to insert a suitable initialization of the result value and
95 // tie this to the dest of the image instruction.
97 const DebugLoc &DL = MI.getDebugLoc();
100 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
102 // Calculate which dword we have to initialize to 0.
103 MachineOperand *MO_Dmask =
104 TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
106 // check that dmask operand is found.
107 assert(MO_Dmask && "Expected dmask operand in instruction");
109 unsigned dmask = MO_Dmask->getImm();
110 // Determine the number of active lanes taking into account the
111 // Gather4 special case
112 unsigned ActiveLanes =
113 TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
115 // Subreg indices are counted from 1
116 // When D16 then we want next whole VGPR after write data.
117 static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
119 bool Packed = !ST.hasUnpackedD16VMem();
122 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
124 // Abandon attempt if the dst size isn't large enough
125 // - this is in fact an error but this is picked up elsewhere and
126 // reported correctly.
128 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
129 if (DstSize < InitIdx)
132 // Create a register for the intialization value.
134 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
135 unsigned NewDst = 0; // Final initialized value will be in here
137 // If PRTStrictNull feature is enabled (the default) then initialize
138 // all the result registers to 0, otherwise just the error indication
139 // register (VGPRn+1)
140 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
141 unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
144 // In this case we can just initialize the result directly
145 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
149 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
150 for (; SizeLeft; SizeLeft--, CurrIdx++) {
152 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
155 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
156 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
158 // Insert into the super-reg
159 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
168 // Add as an implicit operand
169 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
171 // Tie the just added implicit operand to the dst
172 MI.tieOperands(DstIdx, MI.getNumOperands() - 1);