lib/Target/AMDGPU/SIAddIMGInit.cpp

   1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// Any MIMG instructions that use tfe or lwe require an initialization of the
  12 /// result register that will be written in the case of a memory access failure
  13 /// The required code is also added to tie this init code to the result of the
  14 /// img instruction
  15 ///
  16 //===----------------------------------------------------------------------===//
  17 //
  18
  19 #include "AMDGPU.h"
  20 #include "AMDGPUSubtarget.h"
  21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  22 #include "SIInstrInfo.h"
  23 #include "llvm/CodeGen/MachineFunctionPass.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/IR/Function.h"
  27 #include "llvm/Support/Debug.h"
  28 #include "llvm/Target/TargetMachine.h"
  29
  30 #define DEBUG_TYPE "si-img-init"
  31
  32 using namespace llvm;
  33
  34 namespace {
  35
  36 class SIAddIMGInit : public MachineFunctionPass {
  37 public:
  38   static char ID;
  39
  40 public:
  41   SIAddIMGInit() : MachineFunctionPass(ID) {
  42     initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
  43   }
  44
  45   bool runOnMachineFunction(MachineFunction &MF) override;
  46
  47   void getAnalysisUsage(AnalysisUsage &AU) const override {
  48     AU.setPreservesCFG();
  49     MachineFunctionPass::getAnalysisUsage(AU);
  50   }
  51 };
  52
  53 } // End anonymous namespace.
  54
  55 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
  56
  57 char SIAddIMGInit::ID = 0;
  58
  59 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
  60
  61 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
  62
  63 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
  64   MachineRegisterInfo &MRI = MF.getRegInfo();
  65   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  66   const SIInstrInfo *TII = ST.getInstrInfo();
  67   const SIRegisterInfo *RI = ST.getRegisterInfo();
  68   bool Changed = false;
  69
  70   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
  71        ++BI) {
  72     MachineBasicBlock &MBB = *BI;
  73     MachineBasicBlock::iterator I, Next;
  74     for (I = MBB.begin(); I != MBB.end(); I = Next) {
  75       Next = std::next(I);
  76       MachineInstr &MI = *I;
  77
  78       auto Opcode = MI.getOpcode();
  79       if (TII->isMIMG(Opcode) && !MI.mayStore()) {
  80         MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
  81         MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
  82         MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
  83
  84         // Check for instructions that don't have tfe or lwe fields
  85         // There shouldn't be any at this point.
  86         assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
  87
  88         unsigned TFEVal = TFE->getImm();
  89         unsigned LWEVal = LWE->getImm();
  90         unsigned D16Val = D16 ? D16->getImm() : 0;
  91
  92         if (TFEVal || LWEVal) {
  93           // At least one of TFE or LWE are non-zero
  94           // We have to insert a suitable initialization of the result value and
  95           // tie this to the dest of the image instruction.
  96
  97           const DebugLoc &DL = MI.getDebugLoc();
  98
  99           int DstIdx =
 100               AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
 101
 102           // Calculate which dword we have to initialize to 0.
 103           MachineOperand *MO_Dmask =
 104               TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
 105
 106           // check that dmask operand is found.
 107           assert(MO_Dmask && "Expected dmask operand in instruction");
 108
 109           unsigned dmask = MO_Dmask->getImm();
 110           // Determine the number of active lanes taking into account the
 111           // Gather4 special case
 112           unsigned ActiveLanes =
 113               TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
 114
 115           // Subreg indices are counted from 1
 116           // When D16 then we want next whole VGPR after write data.
 117           static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
 118
 119           bool Packed = !ST.hasUnpackedD16VMem();
 120
 121           unsigned InitIdx =
 122               D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
 123
 124           // Abandon attempt if the dst size isn't large enough
 125           // - this is in fact an error but this is picked up elsewhere and
 126           // reported correctly.
 127           uint32_t DstSize =
 128               RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
 129           if (DstSize < InitIdx)
 130             continue;
 131
 132           // Create a register for the intialization value.
 133           unsigned PrevDst =
 134               MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
 135           unsigned NewDst = 0; // Final initialized value will be in here
 136
 137           // If PRTStrictNull feature is enabled (the default) then initialize
 138           // all the result registers to 0, otherwise just the error indication
 139           // register (VGPRn+1)
 140           unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
 141           unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
 142
 143           if (DstSize == 1) {
 144             // In this case we can just initialize the result directly
 145             BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
 146                 .addImm(0);
 147             NewDst = PrevDst;
 148           } else {
 149             BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
 150             for (; SizeLeft; SizeLeft--, CurrIdx++) {
 151               NewDst =
 152                   MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
 153               // Initialize dword
 154               unsigned SubReg =
 155                   MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 156               BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
 157                   .addImm(0);
 158               // Insert into the super-reg
 159               BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
 160                   .addReg(PrevDst)
 161                   .addReg(SubReg)
 162                   .addImm(CurrIdx);
 163
 164               PrevDst = NewDst;
 165             }
 166           }
 167
 168           // Add as an implicit operand
 169           MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
 170
 171           // Tie the just added implicit operand to the dst
 172           MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
 173
 174           Changed = true;
 175         }
 176       }
 177     }
 178   }
 179
 180   return Changed;
 181 }