contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp

   1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This pass adds amdgpu.uniform metadata to IR values so this information
  11 /// can be used during instruction selection.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPU.h"
  16 #include "llvm/ADT/SetVector.h"
  17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
  18 #include "llvm/Analysis/LoopInfo.h"
  19 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  20 #include "llvm/IR/IRBuilder.h"
  21 #include "llvm/IR/InstVisitor.h"
  22 #include "llvm/InitializePasses.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 #define DEBUG_TYPE "amdgpu-annotate-uniform"
  27
  28 using namespace llvm;
  29
  30 namespace {
  31
  32 class AMDGPUAnnotateUniformValues : public FunctionPass,
  33                        public InstVisitor<AMDGPUAnnotateUniformValues> {
  34   LegacyDivergenceAnalysis *DA;
  35   MemoryDependenceResults *MDR;
  36   LoopInfo *LI;
  37   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
  38   bool isKernelFunc;
  39
  40 public:
  41   static char ID;
  42   AMDGPUAnnotateUniformValues() :
  43     FunctionPass(ID) { }
  44   bool doInitialization(Module &M) override;
  45   bool runOnFunction(Function &F) override;
  46   StringRef getPassName() const override {
  47     return "AMDGPU Annotate Uniform Values";
  48   }
  49   void getAnalysisUsage(AnalysisUsage &AU) const override {
  50     AU.addRequired<LegacyDivergenceAnalysis>();
  51     AU.addRequired<MemoryDependenceWrapperPass>();
  52     AU.addRequired<LoopInfoWrapperPass>();
  53     AU.setPreservesAll();
  54  }
  55
  56   void visitBranchInst(BranchInst &I);
  57   void visitLoadInst(LoadInst &I);
  58   bool isClobberedInFunction(LoadInst * Load);
  59 };
  60
  61 } // End anonymous namespace
  62
  63 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
  64                       "Add AMDGPU uniform metadata", false, false)
  65 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
  66 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
  67 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  68 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
  69                     "Add AMDGPU uniform metadata", false, false)
  70
  71 char AMDGPUAnnotateUniformValues::ID = 0;
  72
  73 static void setUniformMetadata(Instruction *I) {
  74   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
  75 }
  76 static void setNoClobberMetadata(Instruction *I) {
  77   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
  78 }
  79
  80 static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
  81   for (auto I : predecessors(Root))
  82     if (Set.insert(I))
  83       DFS(I, Set);
  84 }
  85
  86 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
  87   // 1. get Loop for the Load->getparent();
  88   // 2. if it exists, collect all the BBs from the most outer
  89   // loop and check for the writes. If NOT - start DFS over all preds.
  90   // 3. Start DFS over all preds from the most outer loop header.
  91   SetVector<BasicBlock *> Checklist;
  92   BasicBlock *Start = Load->getParent();
  93   Checklist.insert(Start);
  94   const Value *Ptr = Load->getPointerOperand();
  95   const Loop *L = LI->getLoopFor(Start);
  96   if (L) {
  97     const Loop *P = L;
  98     do {
  99       L = P;
 100       P = P->getParentLoop();
 101     } while (P);
 102     Checklist.insert(L->block_begin(), L->block_end());
 103     Start = L->getHeader();
 104   }
 105
 106   DFS(Start, Checklist);
 107   for (auto &BB : Checklist) {
 108     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
 109       BasicBlock::iterator(Load) : BB->end();
 110     auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
 111                                            StartIt, BB, Load);
 112     if (Q.isClobber() || Q.isUnknown())
 113       return true;
 114   }
 115   return false;
 116 }
 117
 118 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
 119   if (DA->isUniform(&I))
 120     setUniformMetadata(I.getParent()->getTerminator());
 121 }
 122
 123 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
 124   Value *Ptr = I.getPointerOperand();
 125   if (!DA->isUniform(Ptr))
 126     return;
 127   auto isGlobalLoad = [&](LoadInst &Load)->bool {
 128     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 129   };
 130   // We're tracking up to the Function boundaries
 131   // We cannot go beyond because of FunctionPass restrictions
 132   // Thus we can ensure that memory not clobbered for memory
 133   // operations that live in kernel only.
 134   bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
 135   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
 136   if (!PtrI && NotClobbered && isGlobalLoad(I)) {
 137     if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
 138       // Lookup for the existing GEP
 139       if (noClobberClones.count(Ptr)) {
 140         PtrI = noClobberClones[Ptr];
 141       } else {
 142         // Create GEP of the Value
 143         Function *F = I.getParent()->getParent();
 144         Value *Idx = Constant::getIntegerValue(
 145           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
 146         // Insert GEP at the entry to make it dominate all uses
 147         PtrI = GetElementPtrInst::Create(
 148           Ptr->getType()->getPointerElementType(), Ptr,
 149           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
 150       }
 151       I.replaceUsesOfWith(Ptr, PtrI);
 152     }
 153   }
 154
 155   if (PtrI) {
 156     setUniformMetadata(PtrI);
 157     if (NotClobbered)
 158       setNoClobberMetadata(PtrI);
 159   }
 160 }
 161
 162 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
 163   return false;
 164 }
 165
 166 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
 167   if (skipFunction(F))
 168     return false;
 169
 170   DA  = &getAnalysis<LegacyDivergenceAnalysis>();
 171   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
 172   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 173   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
 174
 175   visit(F);
 176   noClobberClones.clear();
 177   return true;
 178 }
 179
 180 FunctionPass *
 181 llvm::createAMDGPUAnnotateUniformValues() {
 182   return new AMDGPUAnnotateUniformValues();
 183 }