contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

   1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
  12 /// will sometimes generate these illegal copies in situations like this:
  13 ///
  14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
  15 ///
  16 /// BB0:
  17 ///   %vreg0 <sgpr> = SCALAR_INST
  18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
  19 ///    ...
  20 ///    BRANCH %cond BB1, BB2
  21 ///  BB1:
  22 ///    %vreg2 <vgpr> = VECTOR_INST
  23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  24 ///  BB2:
  25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
  26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
  27 ///
  28 ///
  29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
  30 /// code will look like this:
  31 ///
  32 /// BB0:
  33 ///   %vreg0 <sgpr> = SCALAR_INST
  34 ///    ...
  35 ///    BRANCH %cond BB1, BB2
  36 /// BB1:
  37 ///   %vreg2 <vgpr> = VECTOR_INST
  38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  39 /// BB2:
  40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
  41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  42 ///
  43 /// Now that the result of the PHI instruction is an SGPR, the register
  44 /// allocator is now forced to constrain the register class of %vreg3 to
  45 /// <sgpr> so we end up with final code like this:
  46 ///
  47 /// BB0:
  48 ///   %vreg0 <sgpr> = SCALAR_INST
  49 ///    ...
  50 ///    BRANCH %cond BB1, BB2
  51 /// BB1:
  52 ///   %vreg2 <vgpr> = VECTOR_INST
  53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
  54 /// BB2:
  55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
  56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  57 ///
  58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
  59 ///
  60 /// In order to avoid this problem, this pass searches for PHI instructions
  61 /// which define a <vsrc> register and constrains its definition class to
  62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
  63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
  64 /// will be unable to perform the COPY removal from the above example  which
  65 /// ultimately led to the creation of an illegal COPY.
  66 //===----------------------------------------------------------------------===//
  67
  68 #include "AMDGPU.h"
  69 #include "AMDGPUSubtarget.h"
  70 #include "SIInstrInfo.h"
  71 #include "llvm/CodeGen/MachineFunctionPass.h"
  72 #include "llvm/CodeGen/MachineInstrBuilder.h"
  73 #include "llvm/CodeGen/MachineRegisterInfo.h"
  74 #include "llvm/Support/Debug.h"
  75 #include "llvm/Support/raw_ostream.h"
  76 #include "llvm/Target/TargetMachine.h"
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "si-fix-sgpr-copies"
  81
  82 namespace {
  83
  84 class SIFixSGPRCopies : public MachineFunctionPass {
  85 public:
  86   static char ID;
  87
  88   SIFixSGPRCopies() : MachineFunctionPass(ID) { }
  89
  90   bool runOnMachineFunction(MachineFunction &MF) override;
  91
  92   const char *getPassName() const override {
  93     return "SI Fix SGPR copies";
  94   }
  95
  96   void getAnalysisUsage(AnalysisUsage &AU) const override {
  97     AU.setPreservesCFG();
  98     MachineFunctionPass::getAnalysisUsage(AU);
  99   }
 100 };
 101
 102 } // End anonymous namespace
 103
 104 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
 105                 "SI Fix SGPR copies", false, false)
 106
 107 char SIFixSGPRCopies::ID = 0;
 108
 109 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
 110
 111 FunctionPass *llvm::createSIFixSGPRCopiesPass() {
 112   return new SIFixSGPRCopies();
 113 }
 114
 115 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
 116   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 117   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 118     if (!MI.getOperand(i).isReg() ||
 119         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
 120       continue;
 121
 122     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
 123       return true;
 124   }
 125   return false;
 126 }
 127
 128 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 129 getCopyRegClasses(const MachineInstr &Copy,
 130                   const SIRegisterInfo &TRI,
 131                   const MachineRegisterInfo &MRI) {
 132   unsigned DstReg = Copy.getOperand(0).getReg();
 133   unsigned SrcReg = Copy.getOperand(1).getReg();
 134
 135   const TargetRegisterClass *SrcRC =
 136     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
 137     MRI.getRegClass(SrcReg) :
 138     TRI.getPhysRegClass(SrcReg);
 139
 140   // We don't really care about the subregister here.
 141   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
 142
 143   const TargetRegisterClass *DstRC =
 144     TargetRegisterInfo::isVirtualRegister(DstReg) ?
 145     MRI.getRegClass(DstReg) :
 146     TRI.getPhysRegClass(DstReg);
 147
 148   return std::make_pair(SrcRC, DstRC);
 149 }
 150
 151 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
 152                              const TargetRegisterClass *DstRC,
 153                              const SIRegisterInfo &TRI) {
 154   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
 155 }
 156
 157 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
 158                              const TargetRegisterClass *DstRC,
 159                              const SIRegisterInfo &TRI) {
 160   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
 161 }
 162
 163 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
 164 //
 165 // SGPRx = ...
 166 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 167 // VGPRz = COPY SGPRy
 168 //
 169 // ==>
 170 //
 171 // VGPRx = COPY SGPRx
 172 // VGPRz = REG_SEQUENCE VGPRx, sub0
 173 //
 174 // This exposes immediate folding opportunities when materializing 64-bit
 175 // immediates.
 176 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
 177                                         const SIRegisterInfo *TRI,
 178                                         const SIInstrInfo *TII,
 179                                         MachineRegisterInfo &MRI) {
 180   assert(MI.isRegSequence());
 181
 182   unsigned DstReg = MI.getOperand(0).getReg();
 183   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
 184     return false;
 185
 186   if (!MRI.hasOneUse(DstReg))
 187     return false;
 188
 189   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
 190   if (!CopyUse.isCopy())
 191     return false;
 192
 193   const TargetRegisterClass *SrcRC, *DstRC;
 194   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
 195
 196   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
 197     return false;
 198
 199   // TODO: Could have multiple extracts?
 200   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
 201   if (SubReg != AMDGPU::NoSubRegister)
 202     return false;
 203
 204   MRI.setRegClass(DstReg, DstRC);
 205
 206   // SGPRx = ...
 207   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 208   // VGPRz = COPY SGPRy
 209
 210   // =>
 211   // VGPRx = COPY SGPRx
 212   // VGPRz = REG_SEQUENCE VGPRx, sub0
 213
 214   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
 215
 216   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
 217     unsigned SrcReg = MI.getOperand(I).getReg();
 218     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
 219
 220     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
 221     assert(TRI->isSGPRClass(SrcRC) &&
 222            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
 223
 224     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
 225     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
 226
 227     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
 228
 229     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
 230       .addOperand(MI.getOperand(I));
 231
 232     MI.getOperand(I).setReg(TmpReg);
 233   }
 234
 235   CopyUse.eraseFromParent();
 236   return true;
 237 }
 238
 239 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 240   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 241   MachineRegisterInfo &MRI = MF.getRegInfo();
 242   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 243   const SIInstrInfo *TII = ST.getInstrInfo();
 244
 245   SmallVector<MachineInstr *, 16> Worklist;
 246
 247   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 248                                                   BI != BE; ++BI) {
 249
 250     MachineBasicBlock &MBB = *BI;
 251     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
 252          I != E; ++I) {
 253       MachineInstr &MI = *I;
 254
 255       switch (MI.getOpcode()) {
 256       default:
 257         continue;
 258       case AMDGPU::COPY: {
 259         // If the destination register is a physical register there isn't really
 260         // much we can do to fix this.
 261         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
 262           continue;
 263
 264         const TargetRegisterClass *SrcRC, *DstRC;
 265         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
 266         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
 267           DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
 268           TII->moveToVALU(MI);
 269         }
 270
 271         break;
 272       }
 273       case AMDGPU::PHI: {
 274         DEBUG(dbgs() << "Fixing PHI: " << MI);
 275         unsigned Reg = MI.getOperand(0).getReg();
 276         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
 277           break;
 278
 279         // If a PHI node defines an SGPR and any of its operands are VGPRs,
 280         // then we need to move it to the VALU.
 281         //
 282         // Also, if a PHI node defines an SGPR and has all SGPR operands
 283         // we must move it to the VALU, because the SGPR operands will
 284         // all end up being assigned the same register, which means
 285         // there is a potential for a conflict if different threads take
 286         // different control flow paths.
 287         //
 288         // For Example:
 289         //
 290         // sgpr0 = def;
 291         // ...
 292         // sgpr1 = def;
 293         // ...
 294         // sgpr2 = PHI sgpr0, sgpr1
 295         // use sgpr2;
 296         //
 297         // Will Become:
 298         //
 299         // sgpr2 = def;
 300         // ...
 301         // sgpr2 = def;
 302         // ...
 303         // use sgpr2
 304         //
 305         // FIXME: This is OK if the branching decision is made based on an
 306         // SGPR value.
 307         bool SGPRBranch = false;
 308
 309         // The one exception to this rule is when one of the operands
 310         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
 311         // instruction.  In this case, there we know the program will
 312         // never enter the second block (the loop) without entering
 313         // the first block (where the condition is computed), so there
 314         // is no chance for values to be over-written.
 315
 316         bool HasBreakDef = false;
 317         for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
 318           unsigned Reg = MI.getOperand(i).getReg();
 319           if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
 320             TII->moveToVALU(MI);
 321             break;
 322           }
 323           MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
 324           assert(DefInstr);
 325           switch(DefInstr->getOpcode()) {
 326
 327           case AMDGPU::SI_BREAK:
 328           case AMDGPU::SI_IF_BREAK:
 329           case AMDGPU::SI_ELSE_BREAK:
 330           // If we see a PHI instruction that defines an SGPR, then that PHI
 331           // instruction has already been considered and should have
 332           // a *_BREAK as an operand.
 333           case AMDGPU::PHI:
 334             HasBreakDef = true;
 335             break;
 336           }
 337         }
 338
 339         if (!SGPRBranch && !HasBreakDef)
 340           TII->moveToVALU(MI);
 341         break;
 342       }
 343       case AMDGPU::REG_SEQUENCE: {
 344         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
 345             !hasVGPROperands(MI, TRI)) {
 346           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
 347           continue;
 348         }
 349
 350         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 351
 352         TII->moveToVALU(MI);
 353         break;
 354       }
 355       case AMDGPU::INSERT_SUBREG: {
 356         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
 357         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
 358         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
 359         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
 360         if (TRI->isSGPRClass(DstRC) &&
 361             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
 362           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
 363           TII->moveToVALU(MI);
 364         }
 365         break;
 366       }
 367       }
 368     }
 369   }
 370
 371   return true;
 372 }