1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Instrumentation-based profile-guided optimization
12 //===----------------------------------------------------------------------===//
14 #include "CodeGenPGO.h"
15 #include "CodeGenFunction.h"
16 #include "CoverageMappingGen.h"
17 #include "clang/AST/RecursiveASTVisitor.h"
18 #include "clang/AST/StmtVisitor.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/IR/MDBuilder.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/MD5.h"
25 static llvm::cl::opt<bool> EnableValueProfiling(
26 "enable-value-profiling", llvm::cl::ZeroOrMore,
27 llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
29 using namespace clang;
30 using namespace CodeGen;
32 void CodeGenPGO::setFuncName(StringRef Name,
33 llvm::GlobalValue::LinkageTypes Linkage) {
34 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
35 FuncName = llvm::getPGOFuncName(
36 Name, Linkage, CGM.getCodeGenOpts().MainFileName,
37 PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
39 // If we're generating a profile, create a variable for the name.
40 if (CGM.getCodeGenOpts().hasProfileClangInstr())
41 FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
44 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
45 setFuncName(Fn->getName(), Fn->getLinkage());
46 // Create PGOFuncName meta data.
47 llvm::createPGOFuncNameMetadata(*Fn, FuncName);
51 /// \brief Stable hasher for PGO region counters.
53 /// PGOHash produces a stable hash of a given function's control flow.
55 /// Changing the output of this hash will invalidate all previously generated
56 /// profiles -- i.e., don't do it.
58 /// \note When this hash does eventually change (years?), we still need to
59 /// support old hashes. We'll need to pull in the version number from the
60 /// profile data format and use the matching hash function.
66 static const int NumBitsPerType = 6;
67 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
68 static const unsigned TooBig = 1u << NumBitsPerType;
71 /// \brief Hash values for AST nodes.
73 /// Distinct values for AST nodes that have region counters attached.
75 /// These values must be stable. All new members must be added at the end,
76 /// and no members should be removed. Changing the enumeration value for an
77 /// AST node will affect the hash of every function that contains that node.
78 enum HashType : unsigned char {
85 ObjCForCollectionStmt,
95 BinaryConditionalOperator,
97 // Keep this last. It's for the static assert that follows.
100 static_assert(LastHashType <= TooBig, "Too many types in HashType");
102 // TODO: When this format changes, take in a version number here, and use the
103 // old hash calculation for file formats that used the old hash.
104 PGOHash() : Working(0), Count(0) {}
105 void combine(HashType Type);
108 const int PGOHash::NumBitsPerType;
109 const unsigned PGOHash::NumTypesPerWord;
110 const unsigned PGOHash::TooBig;
112 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
113 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
114 /// The next counter value to assign.
115 unsigned NextCounter;
116 /// The function hash.
118 /// The map of statements to counters.
119 llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
121 MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
122 : NextCounter(0), CounterMap(CounterMap) {}
124 // Blocks and lambdas are handled as separate functions, so we need not
125 // traverse them in the parent context.
126 bool TraverseBlockExpr(BlockExpr *BE) { return true; }
127 bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
128 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
130 bool VisitDecl(const Decl *D) {
131 switch (D->getKind()) {
135 case Decl::CXXMethod:
136 case Decl::CXXConstructor:
137 case Decl::CXXDestructor:
138 case Decl::CXXConversion:
139 case Decl::ObjCMethod:
142 CounterMap[D->getBody()] = NextCounter++;
148 bool VisitStmt(const Stmt *S) {
149 auto Type = getHashType(S);
150 if (Type == PGOHash::None)
153 CounterMap[S] = NextCounter++;
157 PGOHash::HashType getHashType(const Stmt *S) {
158 switch (S->getStmtClass()) {
161 case Stmt::LabelStmtClass:
162 return PGOHash::LabelStmt;
163 case Stmt::WhileStmtClass:
164 return PGOHash::WhileStmt;
165 case Stmt::DoStmtClass:
166 return PGOHash::DoStmt;
167 case Stmt::ForStmtClass:
168 return PGOHash::ForStmt;
169 case Stmt::CXXForRangeStmtClass:
170 return PGOHash::CXXForRangeStmt;
171 case Stmt::ObjCForCollectionStmtClass:
172 return PGOHash::ObjCForCollectionStmt;
173 case Stmt::SwitchStmtClass:
174 return PGOHash::SwitchStmt;
175 case Stmt::CaseStmtClass:
176 return PGOHash::CaseStmt;
177 case Stmt::DefaultStmtClass:
178 return PGOHash::DefaultStmt;
179 case Stmt::IfStmtClass:
180 return PGOHash::IfStmt;
181 case Stmt::CXXTryStmtClass:
182 return PGOHash::CXXTryStmt;
183 case Stmt::CXXCatchStmtClass:
184 return PGOHash::CXXCatchStmt;
185 case Stmt::ConditionalOperatorClass:
186 return PGOHash::ConditionalOperator;
187 case Stmt::BinaryConditionalOperatorClass:
188 return PGOHash::BinaryConditionalOperator;
189 case Stmt::BinaryOperatorClass: {
190 const BinaryOperator *BO = cast<BinaryOperator>(S);
191 if (BO->getOpcode() == BO_LAnd)
192 return PGOHash::BinaryOperatorLAnd;
193 if (BO->getOpcode() == BO_LOr)
194 return PGOHash::BinaryOperatorLOr;
198 return PGOHash::None;
202 /// A StmtVisitor that propagates the raw counts through the AST and
203 /// records the count at statements where the value may change.
204 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
208 /// A flag that is set when the current count should be recorded on the
209 /// next statement, such as at the exit of a loop.
210 bool RecordNextStmtCount;
212 /// The count at the current location in the traversal.
213 uint64_t CurrentCount;
215 /// The map of statements to count values.
216 llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
218 /// BreakContinueStack - Keep counts of breaks and continues inside loops.
219 struct BreakContinue {
221 uint64_t ContinueCount;
222 BreakContinue() : BreakCount(0), ContinueCount(0) {}
224 SmallVector<BreakContinue, 8> BreakContinueStack;
226 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
228 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
230 void RecordStmtCount(const Stmt *S) {
231 if (RecordNextStmtCount) {
232 CountMap[S] = CurrentCount;
233 RecordNextStmtCount = false;
237 /// Set and return the current count.
238 uint64_t setCount(uint64_t Count) {
239 CurrentCount = Count;
243 void VisitStmt(const Stmt *S) {
245 for (const Stmt *Child : S->children())
250 void VisitFunctionDecl(const FunctionDecl *D) {
251 // Counter tracks entry to the function body.
252 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
253 CountMap[D->getBody()] = BodyCount;
257 // Skip lambda expressions. We visit these as FunctionDecls when we're
258 // generating them and aren't interested in the body when generating a
260 void VisitLambdaExpr(const LambdaExpr *LE) {}
262 void VisitCapturedDecl(const CapturedDecl *D) {
263 // Counter tracks entry to the capture body.
264 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
265 CountMap[D->getBody()] = BodyCount;
269 void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
270 // Counter tracks entry to the method body.
271 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
272 CountMap[D->getBody()] = BodyCount;
276 void VisitBlockDecl(const BlockDecl *D) {
277 // Counter tracks entry to the block body.
278 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
279 CountMap[D->getBody()] = BodyCount;
283 void VisitReturnStmt(const ReturnStmt *S) {
285 if (S->getRetValue())
286 Visit(S->getRetValue());
288 RecordNextStmtCount = true;
291 void VisitCXXThrowExpr(const CXXThrowExpr *E) {
294 Visit(E->getSubExpr());
296 RecordNextStmtCount = true;
299 void VisitGotoStmt(const GotoStmt *S) {
302 RecordNextStmtCount = true;
305 void VisitLabelStmt(const LabelStmt *S) {
306 RecordNextStmtCount = false;
307 // Counter tracks the block following the label.
308 uint64_t BlockCount = setCount(PGO.getRegionCount(S));
309 CountMap[S] = BlockCount;
310 Visit(S->getSubStmt());
313 void VisitBreakStmt(const BreakStmt *S) {
315 assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
316 BreakContinueStack.back().BreakCount += CurrentCount;
318 RecordNextStmtCount = true;
321 void VisitContinueStmt(const ContinueStmt *S) {
323 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
324 BreakContinueStack.back().ContinueCount += CurrentCount;
326 RecordNextStmtCount = true;
329 void VisitWhileStmt(const WhileStmt *S) {
331 uint64_t ParentCount = CurrentCount;
333 BreakContinueStack.push_back(BreakContinue());
334 // Visit the body region first so the break/continue adjustments can be
335 // included when visiting the condition.
336 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
337 CountMap[S->getBody()] = CurrentCount;
339 uint64_t BackedgeCount = CurrentCount;
341 // ...then go back and propagate counts through the condition. The count
342 // at the start of the condition is the sum of the incoming edges,
343 // the backedge from the end of the loop body, and the edges from
344 // continue statements.
345 BreakContinue BC = BreakContinueStack.pop_back_val();
347 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
348 CountMap[S->getCond()] = CondCount;
350 setCount(BC.BreakCount + CondCount - BodyCount);
351 RecordNextStmtCount = true;
354 void VisitDoStmt(const DoStmt *S) {
356 uint64_t LoopCount = PGO.getRegionCount(S);
358 BreakContinueStack.push_back(BreakContinue());
359 // The count doesn't include the fallthrough from the parent scope. Add it.
360 uint64_t BodyCount = setCount(LoopCount + CurrentCount);
361 CountMap[S->getBody()] = BodyCount;
363 uint64_t BackedgeCount = CurrentCount;
365 BreakContinue BC = BreakContinueStack.pop_back_val();
366 // The count at the start of the condition is equal to the count at the
367 // end of the body, plus any continues.
368 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
369 CountMap[S->getCond()] = CondCount;
371 setCount(BC.BreakCount + CondCount - LoopCount);
372 RecordNextStmtCount = true;
375 void VisitForStmt(const ForStmt *S) {
380 uint64_t ParentCount = CurrentCount;
382 BreakContinueStack.push_back(BreakContinue());
383 // Visit the body region first. (This is basically the same as a while
384 // loop; see further comments in VisitWhileStmt.)
385 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
386 CountMap[S->getBody()] = BodyCount;
388 uint64_t BackedgeCount = CurrentCount;
389 BreakContinue BC = BreakContinueStack.pop_back_val();
391 // The increment is essentially part of the body but it needs to include
392 // the count for all the continue statements.
394 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
395 CountMap[S->getInc()] = IncCount;
399 // ...then go back and propagate counts through the condition.
401 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
403 CountMap[S->getCond()] = CondCount;
406 setCount(BC.BreakCount + CondCount - BodyCount);
407 RecordNextStmtCount = true;
410 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
412 Visit(S->getLoopVarStmt());
413 Visit(S->getRangeStmt());
414 Visit(S->getBeginStmt());
415 Visit(S->getEndStmt());
417 uint64_t ParentCount = CurrentCount;
418 BreakContinueStack.push_back(BreakContinue());
419 // Visit the body region first. (This is basically the same as a while
420 // loop; see further comments in VisitWhileStmt.)
421 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
422 CountMap[S->getBody()] = BodyCount;
424 uint64_t BackedgeCount = CurrentCount;
425 BreakContinue BC = BreakContinueStack.pop_back_val();
427 // The increment is essentially part of the body but it needs to include
428 // the count for all the continue statements.
429 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
430 CountMap[S->getInc()] = IncCount;
433 // ...then go back and propagate counts through the condition.
435 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
436 CountMap[S->getCond()] = CondCount;
438 setCount(BC.BreakCount + CondCount - BodyCount);
439 RecordNextStmtCount = true;
442 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
444 Visit(S->getElement());
445 uint64_t ParentCount = CurrentCount;
446 BreakContinueStack.push_back(BreakContinue());
447 // Counter tracks the body of the loop.
448 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
449 CountMap[S->getBody()] = BodyCount;
451 uint64_t BackedgeCount = CurrentCount;
452 BreakContinue BC = BreakContinueStack.pop_back_val();
454 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
456 RecordNextStmtCount = true;
459 void VisitSwitchStmt(const SwitchStmt *S) {
463 BreakContinueStack.push_back(BreakContinue());
465 // If the switch is inside a loop, add the continue counts.
466 BreakContinue BC = BreakContinueStack.pop_back_val();
467 if (!BreakContinueStack.empty())
468 BreakContinueStack.back().ContinueCount += BC.ContinueCount;
469 // Counter tracks the exit block of the switch.
470 setCount(PGO.getRegionCount(S));
471 RecordNextStmtCount = true;
474 void VisitSwitchCase(const SwitchCase *S) {
475 RecordNextStmtCount = false;
476 // Counter for this particular case. This counts only jumps from the
477 // switch header and does not include fallthrough from the case before
479 uint64_t CaseCount = PGO.getRegionCount(S);
480 setCount(CurrentCount + CaseCount);
481 // We need the count without fallthrough in the mapping, so it's more useful
482 // for branch probabilities.
483 CountMap[S] = CaseCount;
484 RecordNextStmtCount = true;
485 Visit(S->getSubStmt());
488 void VisitIfStmt(const IfStmt *S) {
490 uint64_t ParentCount = CurrentCount;
493 // Counter tracks the "then" part of an if statement. The count for
494 // the "else" part, if it exists, will be calculated from this counter.
495 uint64_t ThenCount = setCount(PGO.getRegionCount(S));
496 CountMap[S->getThen()] = ThenCount;
498 uint64_t OutCount = CurrentCount;
500 uint64_t ElseCount = ParentCount - ThenCount;
503 CountMap[S->getElse()] = ElseCount;
505 OutCount += CurrentCount;
507 OutCount += ElseCount;
509 RecordNextStmtCount = true;
512 void VisitCXXTryStmt(const CXXTryStmt *S) {
514 Visit(S->getTryBlock());
515 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
516 Visit(S->getHandler(I));
517 // Counter tracks the continuation block of the try statement.
518 setCount(PGO.getRegionCount(S));
519 RecordNextStmtCount = true;
522 void VisitCXXCatchStmt(const CXXCatchStmt *S) {
523 RecordNextStmtCount = false;
524 // Counter tracks the catch statement's handler block.
525 uint64_t CatchCount = setCount(PGO.getRegionCount(S));
526 CountMap[S] = CatchCount;
527 Visit(S->getHandlerBlock());
530 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
532 uint64_t ParentCount = CurrentCount;
535 // Counter tracks the "true" part of a conditional operator. The
536 // count in the "false" part will be calculated from this counter.
537 uint64_t TrueCount = setCount(PGO.getRegionCount(E));
538 CountMap[E->getTrueExpr()] = TrueCount;
539 Visit(E->getTrueExpr());
540 uint64_t OutCount = CurrentCount;
542 uint64_t FalseCount = setCount(ParentCount - TrueCount);
543 CountMap[E->getFalseExpr()] = FalseCount;
544 Visit(E->getFalseExpr());
545 OutCount += CurrentCount;
548 RecordNextStmtCount = true;
551 void VisitBinLAnd(const BinaryOperator *E) {
553 uint64_t ParentCount = CurrentCount;
555 // Counter tracks the right hand side of a logical and operator.
556 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
557 CountMap[E->getRHS()] = RHSCount;
559 setCount(ParentCount + RHSCount - CurrentCount);
560 RecordNextStmtCount = true;
563 void VisitBinLOr(const BinaryOperator *E) {
565 uint64_t ParentCount = CurrentCount;
567 // Counter tracks the right hand side of a logical or operator.
568 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
569 CountMap[E->getRHS()] = RHSCount;
571 setCount(ParentCount + RHSCount - CurrentCount);
572 RecordNextStmtCount = true;
575 } // end anonymous namespace
577 void PGOHash::combine(HashType Type) {
578 // Check that we never combine 0 and only have six bits.
579 assert(Type && "Hash is invalid: unexpected type 0");
580 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
582 // Pass through MD5 if enough work has built up.
583 if (Count && Count % NumTypesPerWord == 0) {
584 using namespace llvm::support;
585 uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
586 MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
590 // Accumulate the current type.
592 Working = Working << NumBitsPerType | Type;
595 uint64_t PGOHash::finalize() {
596 // Use Working as the hash directly if we never used MD5.
597 if (Count <= NumTypesPerWord)
598 // No need to byte swap here, since none of the math was endian-dependent.
599 // This number will be byte-swapped as required on endianness transitions,
600 // so we will see the same value on the other side.
603 // Check for remaining work in Working.
607 // Finalize the MD5 and return the hash.
608 llvm::MD5::MD5Result Result;
610 using namespace llvm::support;
611 return endian::read<uint64_t, little, unaligned>(Result);
614 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
615 const Decl *D = GD.getDecl();
616 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
617 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
618 if (!InstrumentRegions && !PGOReader)
622 // Constructors and destructors may be represented by several functions in IR.
623 // If so, instrument only base variant, others are implemented by delegation
624 // to the base one, it would be counted twice otherwise.
625 if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
626 ((isa<CXXConstructorDecl>(GD.getDecl()) &&
627 GD.getCtorType() != Ctor_Base) ||
628 (isa<CXXDestructorDecl>(GD.getDecl()) &&
629 GD.getDtorType() != Dtor_Base))) {
632 CGM.ClearUnusedCoverageMapping(D);
635 mapRegionCounters(D);
636 if (CGM.getCodeGenOpts().CoverageMapping)
637 emitCounterRegionMapping(D);
639 SourceManager &SM = CGM.getContext().getSourceManager();
640 loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
641 computeRegionCounts(D);
642 applyFunctionAttributes(PGOReader, Fn);
646 void CodeGenPGO::mapRegionCounters(const Decl *D) {
647 RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
648 MapRegionCounters Walker(*RegionCounterMap);
649 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
650 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
651 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
652 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
653 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
654 Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
655 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
656 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
657 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
658 NumRegionCounters = Walker.NextCounter;
659 FunctionHash = Walker.Hash.finalize();
662 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
663 if (SkipCoverageMapping)
666 // Don't map the functions in system headers.
667 const auto &SM = CGM.getContext().getSourceManager();
668 auto Loc = D->getBody()->getLocStart();
669 return SM.isInSystemHeader(Loc);
672 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
673 if (skipRegionMappingForDecl(D))
676 std::string CoverageMapping;
677 llvm::raw_string_ostream OS(CoverageMapping);
678 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
679 CGM.getContext().getSourceManager(),
680 CGM.getLangOpts(), RegionCounterMap.get());
681 MappingGen.emitCounterMapping(D, OS);
684 if (CoverageMapping.empty())
687 CGM.getCoverageMapping()->addFunctionMappingRecord(
688 FuncNameVar, FuncName, FunctionHash, CoverageMapping);
692 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
693 llvm::GlobalValue::LinkageTypes Linkage) {
694 if (skipRegionMappingForDecl(D))
697 std::string CoverageMapping;
698 llvm::raw_string_ostream OS(CoverageMapping);
699 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
700 CGM.getContext().getSourceManager(),
702 MappingGen.emitEmptyMapping(D, OS);
705 if (CoverageMapping.empty())
708 setFuncName(Name, Linkage);
709 CGM.getCoverageMapping()->addFunctionMappingRecord(
710 FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
713 void CodeGenPGO::computeRegionCounts(const Decl *D) {
714 StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
715 ComputeRegionCounts Walker(*StmtCountMap, *this);
716 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
717 Walker.VisitFunctionDecl(FD);
718 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
719 Walker.VisitObjCMethodDecl(MD);
720 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
721 Walker.VisitBlockDecl(BD);
722 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
723 Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
727 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
728 llvm::Function *Fn) {
729 if (!haveRegionCounts())
732 uint64_t FunctionCount = getRegionCount(nullptr);
733 Fn->setEntryCount(FunctionCount);
736 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
737 if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
739 if (!Builder.GetInsertBlock())
742 unsigned Counter = (*RegionCounterMap)[S];
743 auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
744 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
745 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
746 Builder.getInt64(FunctionHash),
747 Builder.getInt32(NumRegionCounters),
748 Builder.getInt32(Counter)});
751 // This method either inserts a call to the profile run-time during
752 // instrumentation or puts profile data into metadata for PGO use.
753 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
754 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
756 if (!EnableValueProfiling)
759 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
762 if (isa<llvm::Constant>(ValuePtr))
765 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
766 if (InstrumentValueSites && RegionCounterMap) {
767 auto BuilderInsertPoint = Builder.saveIP();
768 Builder.SetInsertPoint(ValueSite);
769 llvm::Value *Args[5] = {
770 llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
771 Builder.getInt64(FunctionHash),
772 Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
773 Builder.getInt32(ValueKind),
774 Builder.getInt32(NumValueSites[ValueKind]++)
777 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
778 Builder.restoreIP(BuilderInsertPoint);
782 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
783 if (PGOReader && haveRegionCounts()) {
784 // We record the top most called three functions at each call site.
785 // Profile metadata contains "VP" string identifying this metadata
786 // as value profiling data, then a uint32_t value for the value profiling
787 // kind, a uint64_t value for the total number of times the call is
788 // executed, followed by the function hash and execution count (uint64_t)
789 // pairs for each function.
790 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
793 llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
794 (llvm::InstrProfValueKind)ValueKind,
795 NumValueSites[ValueKind]);
797 NumValueSites[ValueKind]++;
801 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
803 CGM.getPGOStats().addVisited(IsInMainFile);
804 RegionCounts.clear();
805 llvm::Expected<llvm::InstrProfRecord> RecordExpected =
806 PGOReader->getInstrProfRecord(FuncName, FunctionHash);
807 if (auto E = RecordExpected.takeError()) {
808 auto IPE = llvm::InstrProfError::take(std::move(E));
809 if (IPE == llvm::instrprof_error::unknown_function)
810 CGM.getPGOStats().addMissing(IsInMainFile);
811 else if (IPE == llvm::instrprof_error::hash_mismatch)
812 CGM.getPGOStats().addMismatched(IsInMainFile);
813 else if (IPE == llvm::instrprof_error::malformed)
814 // TODO: Consider a more specific warning for this case.
815 CGM.getPGOStats().addMismatched(IsInMainFile);
819 llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
820 RegionCounts = ProfRecord->Counts;
823 /// \brief Calculate what to divide by to scale weights.
825 /// Given the maximum weight, calculate a divisor that will scale all the
826 /// weights to strictly less than UINT32_MAX.
827 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
828 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
831 /// \brief Scale an individual branch weight (and add 1).
833 /// Scale a 64-bit weight down to 32-bits using \c Scale.
835 /// According to Laplace's Rule of Succession, it is better to compute the
836 /// weight based on the count plus 1, so universally add 1 to the value.
838 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
839 /// greater than \c Weight.
840 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
841 assert(Scale && "scale by 0?");
842 uint64_t Scaled = Weight / Scale + 1;
843 assert(Scaled <= UINT32_MAX && "overflow 32-bits");
847 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
848 uint64_t FalseCount) {
849 // Check for empty weights.
850 if (!TrueCount && !FalseCount)
853 // Calculate how to scale down to 32-bits.
854 uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
856 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
857 return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
858 scaleBranchWeight(FalseCount, Scale));
862 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
863 // We need at least two elements to create meaningful weights.
864 if (Weights.size() < 2)
867 // Check for empty weights.
868 uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
872 // Calculate how to scale down to 32-bits.
873 uint64_t Scale = calculateWeightScale(MaxWeight);
875 SmallVector<uint32_t, 16> ScaledWeights;
876 ScaledWeights.reserve(Weights.size());
877 for (uint64_t W : Weights)
878 ScaledWeights.push_back(scaleBranchWeight(W, Scale));
880 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
881 return MDHelper.createBranchWeights(ScaledWeights);
884 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
885 uint64_t LoopCount) {
886 if (!PGO.haveRegionCounts())
888 Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
889 assert(CondCount.hasValue() && "missing expected loop condition count");
892 return createProfileWeights(LoopCount,
893 std::max(*CondCount, LoopCount) - LoopCount);