//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// \brief This pass propagates attributes from kernels to the non-entry /// functions. Most of the library functions were not compiled for specific ABI, /// yet will be correctly compiled if proper attrbutes are propagated from the /// caller. /// /// The pass analyzes call graph and propagates ABI target features through the /// call graph. /// /// It can run in two modes: as a function or module pass. A function pass /// simply propagates attributes. A module pass clones functions if there are /// callers with different ABI. If a function is clonned all call sites will /// be updated to use a correct clone. /// /// A function pass is limited in functionality but can run early in the /// pipeline. A module pass is more powerful but has to run late, so misses /// library folding opportunities. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include #define DEBUG_TYPE "amdgpu-propagate-attributes" using namespace llvm; namespace llvm { extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; } namespace { class AMDGPUPropagateAttributes { const FeatureBitset TargetFeatures = { AMDGPU::FeatureWavefrontSize16, AMDGPU::FeatureWavefrontSize32, AMDGPU::FeatureWavefrontSize64 }; class Clone{ public: Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} FeatureBitset FeatureMask; Function *OrigF; Function *NewF; }; const TargetMachine *TM; // Clone functions as needed or just set attributes. bool AllowClone; // Option propagation roots. SmallSet Roots; // Clones of functions with their attributes. SmallVector Clones; // Find a clone with required features. Function *findFunction(const FeatureBitset &FeaturesNeeded, Function *OrigF); // Clone function F and set NewFeatures on the clone. // Cole takes the name of original function. Function *cloneWithFeatures(Function &F, const FeatureBitset &NewFeatures); // Set new function's features in place. void setFeatures(Function &F, const FeatureBitset &NewFeatures); std::string getFeatureString(const FeatureBitset &Features) const; // Propagate attributes from Roots. bool process(); public: AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : TM(TM), AllowClone(AllowClone) {} // Use F as a root and propagate its attributes. bool process(Function &F); // Propagate attributes starting from kernel functions. bool process(Module &M); }; // Allows to propagate attributes early, but no clonning is allowed as it must // be a function pass to run before any optimizations. // TODO: We shall only need a one instance of module pass, but that needs to be // in the linker pipeline which is currently not possible. class AMDGPUPropagateAttributesEarly : public FunctionPass { const TargetMachine *TM; public: static char ID; // Pass identification AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : FunctionPass(ID), TM(TM) { initializeAMDGPUPropagateAttributesEarlyPass( *PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; }; // Allows to propagate attributes with clonning but does that late in the // pipeline. class AMDGPUPropagateAttributesLate : public ModulePass { const TargetMachine *TM; public: static char ID; // Pass identification AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) { initializeAMDGPUPropagateAttributesLatePass( *PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; }; } // end anonymous namespace. char AMDGPUPropagateAttributesEarly::ID = 0; char AMDGPUPropagateAttributesLate::ID = 0; INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, "amdgpu-propagate-attributes-early", "Early propagate attributes from kernels to functions", false, false) INITIALIZE_PASS(AMDGPUPropagateAttributesLate, "amdgpu-propagate-attributes-late", "Late propagate attributes from kernels to functions", false, false) Function * AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, Function *OrigF) { // TODO: search for clone's clones. for (Clone &C : Clones) if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) return C.NewF; return nullptr; } bool AMDGPUPropagateAttributes::process(Module &M) { for (auto &F : M.functions()) if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) Roots.insert(&F); return process(); } bool AMDGPUPropagateAttributes::process(Function &F) { Roots.insert(&F); return process(); } bool AMDGPUPropagateAttributes::process() { bool Changed = false; SmallSet NewRoots; SmallSet Replaced; if (Roots.empty()) return false; Module &M = *(*Roots.begin())->getParent(); do { Roots.insert(NewRoots.begin(), NewRoots.end()); NewRoots.clear(); for (auto &F : M.functions()) { if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) continue; const FeatureBitset &CalleeBits = TM->getSubtargetImpl(F)->getFeatureBits(); SmallVector, 32> ToReplace; for (User *U : F.users()) { Instruction *I = dyn_cast(U); if (!I) continue; CallBase *CI = dyn_cast(I); if (!CI) continue; Function *Caller = CI->getCaller(); if (!Caller) continue; if (!Roots.count(Caller)) continue; const FeatureBitset &CallerBits = TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; if (CallerBits == (CalleeBits & TargetFeatures)) { NewRoots.insert(&F); continue; } Function *NewF = findFunction(CallerBits, &F); if (!NewF) { FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | CallerBits); if (!AllowClone) { // This may set different features on different iteartions if // there is a contradiction in callers' attributes. In this case // we rely on a second pass running on Module, which is allowed // to clone. setFeatures(F, NewFeatures); NewRoots.insert(&F); Changed = true; break; } NewF = cloneWithFeatures(F, NewFeatures); Clones.push_back(Clone(CallerBits, &F, NewF)); NewRoots.insert(NewF); } ToReplace.push_back(std::make_pair(CI, NewF)); Replaced.insert(&F); Changed = true; } while (!ToReplace.empty()) { auto R = ToReplace.pop_back_val(); R.first->setCalledFunction(R.second); } } } while (!NewRoots.empty()); for (Function *F : Replaced) { if (F->use_empty()) F->eraseFromParent(); } return Changed; } Function * AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, const FeatureBitset &NewFeatures) { LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); ValueToValueMapTy dummy; Function *NewF = CloneFunction(&F, dummy); setFeatures(*NewF, NewFeatures); // Swap names. If that is the only clone it will retain the name of now // dead value. if (F.hasName()) { std::string NewName = NewF->getName(); NewF->takeName(&F); F.setName(NewName); // Name has changed, it does not need an external symbol. F.setVisibility(GlobalValue::DefaultVisibility); F.setLinkage(GlobalValue::InternalLinkage); } return NewF; } void AMDGPUPropagateAttributes::setFeatures(Function &F, const FeatureBitset &NewFeatures) { std::string NewFeatureStr = getFeatureString(NewFeatures); LLVM_DEBUG(dbgs() << "Set features " << getFeatureString(NewFeatures & TargetFeatures) << " on " << F.getName() << '\n'); F.removeFnAttr("target-features"); F.addFnAttr("target-features", NewFeatureStr); } std::string AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const { std::string Ret; for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { if (Features[KV.Value]) Ret += (StringRef("+") + KV.Key + ",").str(); else if (TargetFeatures[KV.Value]) Ret += (StringRef("-") + KV.Key + ",").str(); } Ret.pop_back(); // Remove last comma. return Ret; } bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; return AMDGPUPropagateAttributes(TM, false).process(F); } bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { if (!TM) return false; return AMDGPUPropagateAttributes(TM, true).process(M); } FunctionPass *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesEarly(TM); } ModulePass *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesLate(TM); }