1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This pass adds target attributes to functions which use intrinsics
11 /// which will impact calling convention lowering.
13 //===----------------------------------------------------------------------===//
16 #include "AMDGPUSubtarget.h"
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/Analysis/CallGraphSCCPass.h"
19 #include "llvm/CodeGen/TargetPassConfig.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Module.h"
25 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
31 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
33 const TargetMachine *TM = nullptr;
36 bool addFeatureAttributes(Function &F);
41 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
43 bool doInitialization(CallGraph &CG) override;
44 bool runOnSCC(CallGraphSCC &SCC) override;
45 StringRef getPassName() const override {
46 return "AMDGPU Annotate Kernel Features";
49 void getAnalysisUsage(AnalysisUsage &AU) const override {
51 CallGraphSCCPass::getAnalysisUsage(AU);
54 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
55 static bool visitConstantExprsRecursively(
56 const Constant *EntryC,
57 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
63 char AMDGPUAnnotateKernelFeatures::ID = 0;
65 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
67 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
68 "Add AMDGPU function attributes", false, false)
71 // The queue ptr is only needed when casting to flat, not from it.
72 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
73 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
76 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
78 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
81 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
83 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
84 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
85 return castRequiresQueuePtr(SrcAS, AS);
91 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
92 const Constant *EntryC,
93 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
96 if (!ConstantExprVisited.insert(EntryC).second)
99 SmallVector<const Constant *, 16> Stack;
100 Stack.push_back(EntryC);
102 while (!Stack.empty()) {
103 const Constant *C = Stack.pop_back_val();
105 // Check this constant expression.
106 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
107 if (visitConstantExpr(CE, AS))
111 // Visit all sub-expressions.
112 for (const Use &U : C->operands()) {
113 const auto *OpC = dyn_cast<Constant>(U);
117 if (!ConstantExprVisited.insert(OpC).second)
120 Stack.push_back(OpC);
127 // We do not need to note the x workitem or workgroup id because they are always
130 // TODO: We should not add the attributes if the known compile time workgroup
131 // size is 1 for y/z.
132 static StringRef intrinsicToAttrName(Intrinsic::ID ID,
136 case Intrinsic::amdgcn_workitem_id_x:
137 NonKernelOnly = true;
138 return "amdgpu-work-item-id-x";
139 case Intrinsic::amdgcn_workgroup_id_x:
140 NonKernelOnly = true;
141 return "amdgpu-work-group-id-x";
142 case Intrinsic::amdgcn_workitem_id_y:
143 case Intrinsic::r600_read_tidig_y:
144 return "amdgpu-work-item-id-y";
145 case Intrinsic::amdgcn_workitem_id_z:
146 case Intrinsic::r600_read_tidig_z:
147 return "amdgpu-work-item-id-z";
148 case Intrinsic::amdgcn_workgroup_id_y:
149 case Intrinsic::r600_read_tgid_y:
150 return "amdgpu-work-group-id-y";
151 case Intrinsic::amdgcn_workgroup_id_z:
152 case Intrinsic::r600_read_tgid_z:
153 return "amdgpu-work-group-id-z";
154 case Intrinsic::amdgcn_dispatch_ptr:
155 return "amdgpu-dispatch-ptr";
156 case Intrinsic::amdgcn_dispatch_id:
157 return "amdgpu-dispatch-id";
158 case Intrinsic::amdgcn_kernarg_segment_ptr:
159 case Intrinsic::amdgcn_implicitarg_ptr:
160 return "amdgpu-kernarg-segment-ptr";
161 case Intrinsic::amdgcn_queue_ptr:
162 case Intrinsic::trap:
163 case Intrinsic::debugtrap:
165 return "amdgpu-queue-ptr";
171 static bool handleAttr(Function &Parent, const Function &Callee,
173 if (Callee.hasFnAttribute(Name)) {
174 Parent.addFnAttr(Name);
181 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
182 bool &NeedQueuePtr) {
183 // X ids unnecessarily propagated to kernels.
184 static const StringRef AttrNames[] = {
185 { "amdgpu-work-item-id-x" },
186 { "amdgpu-work-item-id-y" },
187 { "amdgpu-work-item-id-z" },
188 { "amdgpu-work-group-id-x" },
189 { "amdgpu-work-group-id-y" },
190 { "amdgpu-work-group-id-z" },
191 { "amdgpu-dispatch-ptr" },
192 { "amdgpu-dispatch-id" },
193 { "amdgpu-kernarg-segment-ptr" }
196 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
199 for (StringRef AttrName : AttrNames)
200 handleAttr(Parent, Callee, AttrName);
203 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
204 const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
205 bool HasFlat = ST.hasFlatAddressSpace();
206 bool HasApertureRegs = ST.hasApertureRegs();
207 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
209 bool Changed = false;
210 bool NeedQueuePtr = false;
211 bool HaveCall = false;
212 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
214 for (BasicBlock &BB : F) {
215 for (Instruction &I : BB) {
218 Function *Callee = CS.getCalledFunction();
220 // TODO: Do something with indirect calls.
222 if (!CS.isInlineAsm())
227 Intrinsic::ID IID = Callee->getIntrinsicID();
228 if (IID == Intrinsic::not_intrinsic) {
230 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
233 bool NonKernelOnly = false;
234 StringRef AttrName = intrinsicToAttrName(IID,
235 NonKernelOnly, NeedQueuePtr);
236 if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
237 F.addFnAttr(AttrName);
243 if (NeedQueuePtr || HasApertureRegs)
246 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
247 if (castRequiresQueuePtr(ASC, AS)) {
253 for (const Use &U : I.operands()) {
254 const auto *OpC = dyn_cast<Constant>(U);
258 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
267 F.addFnAttr("amdgpu-queue-ptr");
271 // TODO: We could refine this to captured pointers that could possibly be
272 // accessed by flat instructions. For now this is mostly a poor way of
273 // estimating whether there are calls before argument lowering.
274 if (HasFlat && !IsFunc && HaveCall) {
275 F.addFnAttr("amdgpu-flat-scratch");
282 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
283 Module &M = SCC.getCallGraph().getModule();
284 Triple TT(M.getTargetTriple());
286 bool Changed = false;
287 for (CallGraphNode *I : SCC) {
288 Function *F = I->getFunction();
289 if (!F || F->isDeclaration())
292 Changed |= addFeatureAttributes(*F);
299 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
300 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
302 report_fatal_error("TargetMachine is required");
304 AS = AMDGPU::getAMDGPUAS(CG.getModule());
305 TM = &TPC->getTM<TargetMachine>();
309 Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
310 return new AMDGPUAnnotateKernelFeatures();