1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
11 // It also builds the data structures and initialization code needed for
12 // updating execution counts and emitting the profile at runtime.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
56 #define DEBUG_TYPE "instrprof"
58 // The start and end values of precise value profile range for memory
60 cl::opt<std::string> MemOPSizeRange(
62 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
63 "precisely, in a format of <start_val>:<end_val>"),
66 // The value that considered to be large value in memory intrinsic.
67 cl::opt<unsigned> MemOPSizeLarge(
69 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
70 "Value of 0 disables the large value profiling."),
75 cl::opt<bool> DoNameCompression("enable-name-compression",
76 cl::desc("Enable name string compression"),
79 cl::opt<bool> DoHashBasedCounterSplit(
80 "hash-based-counter-split",
81 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
84 cl::opt<bool> ValueProfileStaticAlloc(
86 cl::desc("Do static counter allocation for value profiler"),
89 cl::opt<double> NumCountersPerValueSite(
90 "vp-counters-per-site",
91 cl::desc("The average number of profile counters allocated "
92 "per value profiling site."),
93 // This is set to a very small value because in real programs, only
94 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
95 // For those sites with non-zero profile, the average number of targets
96 // is usually smaller than 2.
99 cl::opt<bool> AtomicCounterUpdatePromoted(
100 "atomic-counter-update-promoted", cl::ZeroOrMore,
101 cl::desc("Do counter update using atomic fetch add "
102 " for promoted counters only"),
105 // If the option is not specified, the default behavior about whether
106 // counter promotion is done depends on how instrumentaiton lowering
107 // pipeline is setup, i.e., the default value of true of this option
108 // does not mean the promotion will be done by default. Explicitly
109 // setting this option can override the default behavior.
110 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
111 cl::desc("Do counter register promotion"),
113 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
114 cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
115 cl::desc("Max number counter promotions per loop to avoid"
116 " increasing register pressure too much"));
120 MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
121 cl::desc("Max number of allowed counter promotions"));
123 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
124 cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
125 cl::desc("The max number of exiting blocks of a loop to allow "
126 " speculative counter promotion"));
128 cl::opt<bool> SpeculativeCounterPromotionToLoop(
129 cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
130 cl::desc("When the option is false, if the target block is in a loop, "
131 "the promotion will be disallowed unless the promoted counter "
132 " update can be further/iteratively promoted into an acyclic "
135 cl::opt<bool> IterativeCounterPromotion(
136 cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
137 cl::desc("Allow counter promotion across the whole loop nest."));
139 class InstrProfilingLegacyPass : public ModulePass {
140 InstrProfiling InstrProf;
145 InstrProfilingLegacyPass() : ModulePass(ID) {}
146 InstrProfilingLegacyPass(const InstrProfOptions &Options)
147 : ModulePass(ID), InstrProf(Options) {}
149 StringRef getPassName() const override {
150 return "Frontend instrumentation-based coverage lowering";
153 bool runOnModule(Module &M) override {
154 return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.setPreservesCFG();
159 AU.addRequired<TargetLibraryInfoWrapperPass>();
164 /// A helper class to promote one counter RMW operation in the loop
165 /// into register update.
167 /// RWM update for the counter will be sinked out of the loop after
168 /// the transformation.
170 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
172 PGOCounterPromoterHelper(
173 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
174 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
175 ArrayRef<Instruction *> InsertPts,
176 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
178 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
179 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
180 assert(isa<LoadInst>(L));
181 assert(isa<StoreInst>(S));
182 SSA.AddAvailableValue(PH, Init);
185 void doExtraRewritesBeforeFinalDeletion() const override {
186 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
187 BasicBlock *ExitBlock = ExitBlocks[i];
188 Instruction *InsertPos = InsertPts[i];
189 // Get LiveIn value into the ExitBlock. If there are multiple
190 // predecessors, the value is defined by a PHI node in this
192 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
193 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
194 IRBuilder<> Builder(InsertPos);
195 if (AtomicCounterUpdatePromoted)
196 // automic update currently can only be promoted across the current
197 // loop, not the whole loop nest.
198 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
199 AtomicOrdering::SequentiallyConsistent);
201 LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
202 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
203 auto *NewStore = Builder.CreateStore(NewVal, Addr);
205 // Now update the parent loop's candidate list:
206 if (IterativeCounterPromotion) {
207 auto *TargetLoop = LI.getLoopFor(ExitBlock);
209 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
217 ArrayRef<BasicBlock *> ExitBlocks;
218 ArrayRef<Instruction *> InsertPts;
219 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
223 /// A helper class to do register promotion for all profile counter
224 /// updates in a loop.
226 class PGOCounterPromoter {
229 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
230 Loop &CurLoop, LoopInfo &LI)
231 : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
234 SmallVector<BasicBlock *, 8> LoopExitBlocks;
235 SmallPtrSet<BasicBlock *, 8> BlockSet;
236 L.getExitBlocks(LoopExitBlocks);
238 for (BasicBlock *ExitBlock : LoopExitBlocks) {
239 if (BlockSet.insert(ExitBlock).second) {
240 ExitBlocks.push_back(ExitBlock);
241 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
246 bool run(int64_t *NumPromoted) {
247 // Skip 'infinite' loops:
248 if (ExitBlocks.size() == 0)
250 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
254 unsigned Promoted = 0;
255 for (auto &Cand : LoopToCandidates[&L]) {
257 SmallVector<PHINode *, 4> NewPHIs;
258 SSAUpdater SSA(&NewPHIs);
259 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
261 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
262 L.getLoopPreheader(), ExitBlocks,
263 InsertPts, LoopToCandidates, LI);
264 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
266 if (Promoted >= MaxProm)
270 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
274 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
275 << L.getLoopDepth() << ")\n");
276 return Promoted != 0;
280 bool allowSpeculativeCounterPromotion(Loop *LP) {
281 SmallVector<BasicBlock *, 8> ExitingBlocks;
282 L.getExitingBlocks(ExitingBlocks);
283 // Not considierered speculative.
284 if (ExitingBlocks.size() == 1)
286 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
291 // Returns the max number of Counter Promotions for LP.
292 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
293 // We can't insert into a catchswitch.
294 SmallVector<BasicBlock *, 8> LoopExitBlocks;
295 LP->getExitBlocks(LoopExitBlocks);
296 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
297 return isa<CatchSwitchInst>(Exit->getTerminator());
301 if (!LP->hasDedicatedExits())
304 BasicBlock *PH = LP->getLoopPreheader();
308 SmallVector<BasicBlock *, 8> ExitingBlocks;
309 LP->getExitingBlocks(ExitingBlocks);
310 // Not considierered speculative.
311 if (ExitingBlocks.size() == 1)
312 return MaxNumOfPromotionsPerLoop;
314 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
317 // Whether the target block is in a loop does not matter:
318 if (SpeculativeCounterPromotionToLoop)
319 return MaxNumOfPromotionsPerLoop;
321 // Now check the target block:
322 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
323 for (auto *TargetBlock : LoopExitBlocks) {
324 auto *TargetLoop = LI.getLoopFor(TargetBlock);
327 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
328 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
330 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
331 PendingCandsInTarget);
336 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
337 SmallVector<BasicBlock *, 8> ExitBlocks;
338 SmallVector<Instruction *, 8> InsertPts;
343 } // end anonymous namespace
345 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
346 auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
348 return PreservedAnalyses::all();
350 return PreservedAnalyses::none();
353 char InstrProfilingLegacyPass::ID = 0;
354 INITIALIZE_PASS_BEGIN(
355 InstrProfilingLegacyPass, "instrprof",
356 "Frontend instrumentation-based coverage lowering.", false, false)
357 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
359 InstrProfilingLegacyPass, "instrprof",
360 "Frontend instrumentation-based coverage lowering.", false, false)
363 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
364 return new InstrProfilingLegacyPass(Options);
367 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
368 InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
371 return dyn_cast<InstrProfIncrementInst>(Instr);
374 bool InstrProfiling::lowerIntrinsics(Function *F) {
375 bool MadeChange = false;
376 PromotionCandidates.clear();
377 for (BasicBlock &BB : *F) {
378 for (auto I = BB.begin(), E = BB.end(); I != E;) {
380 InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
384 } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
385 lowerValueProfileInst(Ind);
394 promoteCounterLoadStores(F);
398 bool InstrProfiling::isCounterPromotionEnabled() const {
399 if (DoCounterPromotion.getNumOccurrences() > 0)
400 return DoCounterPromotion;
402 return Options.DoCounterPromotion;
405 void InstrProfiling::promoteCounterLoadStores(Function *F) {
406 if (!isCounterPromotionEnabled())
409 DominatorTree DT(*F);
411 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
413 for (const auto &LoadStore : PromotionCandidates) {
414 auto *CounterLoad = LoadStore.first;
415 auto *CounterStore = LoadStore.second;
416 BasicBlock *BB = CounterLoad->getParent();
417 Loop *ParentLoop = LI.getLoopFor(BB);
420 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
423 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
425 // Do a post-order traversal of the loops so that counter updates can be
426 // iteratively hoisted outside the loop nest.
427 for (auto *Loop : llvm::reverse(Loops)) {
428 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
429 Promoter.run(&TotalCountersPromoted);
433 /// Check if the module contains uses of any profiling intrinsics.
434 static bool containsProfilingIntrinsics(Module &M) {
435 if (auto *F = M.getFunction(
436 Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
439 if (auto *F = M.getFunction(
440 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
443 if (auto *F = M.getFunction(
444 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
450 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
455 ProfileDataMap.clear();
457 getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
459 TT = Triple(M.getTargetTriple());
461 // Emit the runtime hook even if no counters are present.
462 bool MadeChange = emitRuntimeHook();
464 // Improve compile time by avoiding linear scans when there is no work.
465 GlobalVariable *CoverageNamesVar =
466 M.getNamedGlobal(getCoverageUnusedNamesVarName());
467 if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
470 // We did not know how many value sites there would be inside
471 // the instrumented function. This is counting the number of instrumented
472 // target value sites to enter it as field in the profile data variable.
473 for (Function &F : M) {
474 InstrProfIncrementInst *FirstProfIncInst = nullptr;
475 for (BasicBlock &BB : F)
476 for (auto I = BB.begin(), E = BB.end(); I != E; I++)
477 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
478 computeNumValueSiteCounts(Ind);
479 else if (FirstProfIncInst == nullptr)
480 FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
482 // Value profiling intrinsic lowering requires per-function profile data
483 // variable to be created first.
484 if (FirstProfIncInst != nullptr)
485 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
488 for (Function &F : M)
489 MadeChange |= lowerIntrinsics(&F);
491 if (CoverageNamesVar) {
492 lowerCoverageData(CoverageNamesVar);
503 emitInitialization();
507 static Constant *getOrInsertValueProfilingCall(Module &M,
508 const TargetLibraryInfo &TLI,
509 bool IsRange = false) {
510 LLVMContext &Ctx = M.getContext();
511 auto *ReturnTy = Type::getVoidTy(M.getContext());
515 Type *ParamTypes[] = {
516 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
517 #include "llvm/ProfileData/InstrProfData.inc"
519 auto *ValueProfilingCallTy =
520 FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
521 Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
522 ValueProfilingCallTy);
524 Type *RangeParamTypes[] = {
525 #define VALUE_RANGE_PROF 1
526 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
527 #include "llvm/ProfileData/InstrProfData.inc"
528 #undef VALUE_RANGE_PROF
530 auto *ValueRangeProfilingCallTy =
531 FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
532 Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
533 ValueRangeProfilingCallTy);
536 if (Function *FunRes = dyn_cast<Function>(Res)) {
537 if (auto AK = TLI.getExtAttrForI32Param(false))
538 FunRes->addParamAttr(2, AK);
543 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
544 GlobalVariable *Name = Ind->getName();
545 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
546 uint64_t Index = Ind->getIndex()->getZExtValue();
547 auto It = ProfileDataMap.find(Name);
548 if (It == ProfileDataMap.end()) {
549 PerFunctionProfileData PD;
550 PD.NumValueSites[ValueKind] = Index + 1;
551 ProfileDataMap[Name] = PD;
552 } else if (It->second.NumValueSites[ValueKind] <= Index)
553 It->second.NumValueSites[ValueKind] = Index + 1;
556 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
557 GlobalVariable *Name = Ind->getName();
558 auto It = ProfileDataMap.find(Name);
559 assert(It != ProfileDataMap.end() && It->second.DataVar &&
560 "value profiling detected in function with no counter incerement");
562 GlobalVariable *DataVar = It->second.DataVar;
563 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
564 uint64_t Index = Ind->getIndex()->getZExtValue();
565 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
566 Index += It->second.NumValueSites[Kind];
568 IRBuilder<> Builder(Ind);
569 bool IsRange = (Ind->getValueKind()->getZExtValue() ==
570 llvm::InstrProfValueKind::IPVK_MemOPSize);
571 CallInst *Call = nullptr;
573 Value *Args[3] = {Ind->getTargetValue(),
574 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
575 Builder.getInt32(Index)};
576 Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
579 Ind->getTargetValue(),
580 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
581 Builder.getInt32(Index),
582 Builder.getInt64(MemOPSizeRangeStart),
583 Builder.getInt64(MemOPSizeRangeLast),
584 Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
586 Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
588 if (auto AK = TLI->getExtAttrForI32Param(false))
589 Call->addParamAttr(2, AK);
590 Ind->replaceAllUsesWith(Call);
591 Ind->eraseFromParent();
594 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
595 GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
597 IRBuilder<> Builder(Inc);
598 uint64_t Index = Inc->getIndex()->getZExtValue();
599 Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
600 Value *Load = Builder.CreateLoad(Addr, "pgocount");
601 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
602 auto *Store = Builder.CreateStore(Count, Addr);
603 Inc->replaceAllUsesWith(Store);
604 if (isCounterPromotionEnabled())
605 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
606 Inc->eraseFromParent();
609 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
610 ConstantArray *Names =
611 cast<ConstantArray>(CoverageNamesVar->getInitializer());
612 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
613 Constant *NC = Names->getOperand(I);
614 Value *V = NC->stripPointerCasts();
615 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
616 GlobalVariable *Name = cast<GlobalVariable>(V);
618 Name->setLinkage(GlobalValue::PrivateLinkage);
619 ReferencedNames.push_back(Name);
620 NC->dropAllReferences();
622 CoverageNamesVar->eraseFromParent();
625 /// Get the name of a profiling variable for a particular function.
626 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
627 StringRef NamePrefix = getInstrProfNameVarPrefix();
628 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
629 Function *F = Inc->getParent()->getParent();
630 Module *M = F->getParent();
631 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
632 !canRenameComdatFunc(*F))
633 return (Prefix + Name).str();
634 uint64_t FuncHash = Inc->getHash()->getZExtValue();
635 SmallVector<char, 24> HashPostfix;
636 if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
637 return (Prefix + Name).str();
638 return (Prefix + Name + "." + Twine(FuncHash)).str();
641 static inline bool shouldRecordFunctionAddr(Function *F) {
643 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
644 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
645 !HasAvailableExternallyLinkage)
648 // A function marked 'alwaysinline' with available_externally linkage can't
649 // have its address taken. Doing so would create an undefined external ref to
650 // the function, which would fail to link.
651 if (HasAvailableExternallyLinkage &&
652 F->hasFnAttribute(Attribute::AlwaysInline))
655 // Prohibit function address recording if the function is both internal and
656 // COMDAT. This avoids the profile data variable referencing internal symbols
658 if (F->hasLocalLinkage() && F->hasComdat())
661 // Check uses of this function for other than direct calls or invokes to it.
662 // Inline virtual functions have linkeOnceODR linkage. When a key method
663 // exists, the vtable will only be emitted in the TU where the key method
664 // is defined. In a TU where vtable is not available, the function won't
665 // be 'addresstaken'. If its address is not recorded here, the profile data
666 // with missing address may be picked by the linker leading to missing
667 // indirect call target info.
668 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
671 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
672 InstrProfIncrementInst *Inc) {
673 if (!needsComdatForCounter(F, M))
676 // COFF format requires a COMDAT section to have a key symbol with the same
677 // name. The linker targeting COFF also requires that the COMDAT
678 // a section is associated to must precede the associating section. For this
679 // reason, we must choose the counter var's name as the name of the comdat.
680 StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
681 ? getInstrProfCountersVarPrefix()
682 : getInstrProfComdatPrefix());
683 return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
686 static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
687 // Don't do this for Darwin. compiler-rt uses linker magic.
688 if (Triple(M.getTargetTriple()).isOSDarwin())
691 // Use linker script magic to get data/cnts/name start/end.
692 if (Triple(M.getTargetTriple()).isOSLinux() ||
693 Triple(M.getTargetTriple()).isOSFreeBSD() ||
694 Triple(M.getTargetTriple()).isOSFuchsia() ||
695 Triple(M.getTargetTriple()).isPS4CPU())
702 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
703 GlobalVariable *NamePtr = Inc->getName();
704 auto It = ProfileDataMap.find(NamePtr);
705 PerFunctionProfileData PD;
706 if (It != ProfileDataMap.end()) {
707 if (It->second.RegionCounters)
708 return It->second.RegionCounters;
712 // Move the name variable to the right section. Place them in a COMDAT group
713 // if the associated function is a COMDAT. This will make sure that
714 // only one copy of counters of the COMDAT function will be emitted after
716 Function *Fn = Inc->getParent()->getParent();
717 Comdat *ProfileVarsComdat = nullptr;
718 ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
720 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
721 LLVMContext &Ctx = M->getContext();
722 ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
724 // Create the counters variable.
726 new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
727 Constant::getNullValue(CounterTy),
728 getVarName(Inc, getInstrProfCountersVarPrefix()));
729 CounterPtr->setVisibility(NamePtr->getVisibility());
730 CounterPtr->setSection(
731 getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
732 CounterPtr->setAlignment(8);
733 CounterPtr->setComdat(ProfileVarsComdat);
735 auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
736 // Allocate statically the array of pointers to value profile nodes for
737 // the current function.
738 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
739 if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
741 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
742 NS += PD.NumValueSites[Kind];
744 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
747 new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
748 Constant::getNullValue(ValuesTy),
749 getVarName(Inc, getInstrProfValuesVarPrefix()));
750 ValuesVar->setVisibility(NamePtr->getVisibility());
751 ValuesVar->setSection(
752 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
753 ValuesVar->setAlignment(8);
754 ValuesVar->setComdat(ProfileVarsComdat);
756 ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
760 // Create data variable.
761 auto *Int16Ty = Type::getInt16Ty(Ctx);
762 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
763 Type *DataTypes[] = {
764 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
765 #include "llvm/ProfileData/InstrProfData.inc"
767 auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
769 Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
770 ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
771 : ConstantPointerNull::get(Int8PtrTy);
773 Constant *Int16ArrayVals[IPVK_Last + 1];
774 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
775 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
777 Constant *DataVals[] = {
778 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
779 #include "llvm/ProfileData/InstrProfData.inc"
781 auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
782 ConstantStruct::get(DataTy, DataVals),
783 getVarName(Inc, getInstrProfDataVarPrefix()));
784 Data->setVisibility(NamePtr->getVisibility());
785 Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
786 Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
787 Data->setComdat(ProfileVarsComdat);
789 PD.RegionCounters = CounterPtr;
791 ProfileDataMap[NamePtr] = PD;
793 // Mark the data variable as used so that it isn't stripped out.
794 UsedVars.push_back(Data);
795 // Now that the linkage set by the FE has been passed to the data and counter
796 // variables, reset Name variable's linkage and visibility to private so that
797 // it can be removed later by the compiler.
798 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
799 // Collect the referenced names to be used by emitNameData.
800 ReferencedNames.push_back(NamePtr);
805 void InstrProfiling::emitVNodes() {
806 if (!ValueProfileStaticAlloc)
809 // For now only support this on platforms that do
810 // not require runtime registration to discover
811 // named section start/end.
812 if (needsRuntimeRegistrationOfSectionRange(*M))
816 for (auto &PD : ProfileDataMap) {
817 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
818 TotalNS += PD.second.NumValueSites[Kind];
824 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
825 // Heuristic for small programs with very few total value sites.
826 // The default value of vp-counters-per-site is chosen based on
827 // the observation that large apps usually have a low percentage
828 // of value sites that actually have any profile data, and thus
829 // the average number of counters per site is low. For small
830 // apps with very few sites, this may not be true. Bump up the
831 // number of counters in this case.
832 #define INSTR_PROF_MIN_VAL_COUNTS 10
833 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
834 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
836 auto &Ctx = M->getContext();
837 Type *VNodeTypes[] = {
838 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
839 #include "llvm/ProfileData/InstrProfData.inc"
841 auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
843 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
844 auto *VNodesVar = new GlobalVariable(
845 *M, VNodesTy, false, GlobalValue::PrivateLinkage,
846 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
847 VNodesVar->setSection(
848 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
849 UsedVars.push_back(VNodesVar);
852 void InstrProfiling::emitNameData() {
853 std::string UncompressedData;
855 if (ReferencedNames.empty())
858 std::string CompressedNameStr;
859 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
860 DoNameCompression)) {
861 report_fatal_error(toString(std::move(E)), false);
864 auto &Ctx = M->getContext();
865 auto *NamesVal = ConstantDataArray::getString(
866 Ctx, StringRef(CompressedNameStr), false);
867 NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
868 GlobalValue::PrivateLinkage, NamesVal,
869 getInstrProfNamesVarName());
870 NamesSize = CompressedNameStr.size();
871 NamesVar->setSection(
872 getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
873 UsedVars.push_back(NamesVar);
875 for (auto *NamePtr : ReferencedNames)
876 NamePtr->eraseFromParent();
879 void InstrProfiling::emitRegistration() {
880 if (!needsRuntimeRegistrationOfSectionRange(*M))
883 // Construct the function.
884 auto *VoidTy = Type::getVoidTy(M->getContext());
885 auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
886 auto *Int64Ty = Type::getInt64Ty(M->getContext());
887 auto *RegisterFTy = FunctionType::get(VoidTy, false);
888 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
889 getInstrProfRegFuncsName(), M);
890 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
891 if (Options.NoRedZone)
892 RegisterF->addFnAttr(Attribute::NoRedZone);
894 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
895 auto *RuntimeRegisterF =
896 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
897 getInstrProfRegFuncName(), M);
899 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
900 for (Value *Data : UsedVars)
901 if (Data != NamesVar && !isa<Function>(Data))
902 IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
905 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
906 auto *NamesRegisterTy =
907 FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
908 auto *NamesRegisterF =
909 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
910 getInstrProfNamesRegFuncName(), M);
911 IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
912 IRB.getInt64(NamesSize)});
918 bool InstrProfiling::emitRuntimeHook() {
919 // We expect the linker to be invoked with -u<hook_var> flag for linux,
920 // for which case there is no need to emit the user function.
921 if (Triple(M->getTargetTriple()).isOSLinux())
924 // If the module's provided its own runtime, we don't need to do anything.
925 if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
928 // Declare an external variable that will pull in the runtime initialization.
929 auto *Int32Ty = Type::getInt32Ty(M->getContext());
931 new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
932 nullptr, getInstrProfRuntimeHookVarName());
934 // Make a function that uses it.
935 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
936 GlobalValue::LinkOnceODRLinkage,
937 getInstrProfRuntimeHookVarUseFuncName(), M);
938 User->addFnAttr(Attribute::NoInline);
939 if (Options.NoRedZone)
940 User->addFnAttr(Attribute::NoRedZone);
941 User->setVisibility(GlobalValue::HiddenVisibility);
942 if (Triple(M->getTargetTriple()).supportsCOMDAT())
943 User->setComdat(M->getOrInsertComdat(User->getName()));
945 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
946 auto *Load = IRB.CreateLoad(Var);
949 // Mark the user variable as used so that it isn't stripped out.
950 UsedVars.push_back(User);
954 void InstrProfiling::emitUses() {
955 if (!UsedVars.empty())
956 appendToUsed(*M, UsedVars);
959 void InstrProfiling::emitInitialization() {
960 StringRef InstrProfileOutput = Options.InstrProfileOutput;
962 if (!InstrProfileOutput.empty()) {
963 // Create variable for profile name.
964 Constant *ProfileNameConst =
965 ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
966 GlobalVariable *ProfileNameVar = new GlobalVariable(
967 *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
968 ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
969 if (TT.supportsCOMDAT()) {
970 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
971 ProfileNameVar->setComdat(M->getOrInsertComdat(
972 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
976 Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
980 // Create the initialization function.
981 auto *VoidTy = Type::getVoidTy(M->getContext());
982 auto *F = Function::Create(FunctionType::get(VoidTy, false),
983 GlobalValue::InternalLinkage,
984 getInstrProfInitFuncName(), M);
985 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
986 F->addFnAttr(Attribute::NoInline);
987 if (Options.NoRedZone)
988 F->addFnAttr(Attribute::NoRedZone);
990 // Add the basic block and the necessary calls.
991 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
993 IRB.CreateCall(RegisterF, {});
996 appendToGlobalCtors(*M, F, 0);