1 //===- MergeFunctions.cpp - Merge identical functions ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass looks for equivalent functions that are mergable and folds them.
12 // Order relation is defined on set of functions. It was made through
13 // special function comparison procedure that returns
14 // 0 when functions are equal,
15 // -1 when Left function is less than right function, and
16 // 1 for opposite case. We need total-ordering, so we need to maintain
17 // four properties on the functions set:
18 // a <= a (reflexivity)
19 // if a <= b and b <= a then a = b (antisymmetry)
20 // if a <= b and b <= c then a <= c (transitivity).
21 // for all a and b: a <= b or b <= a (totality).
23 // Comparison iterates through each instruction in each basic block.
24 // Functions are kept on binary tree. For each new function F we perform
25 // lookup in binary tree.
26 // In practice it works the following way:
27 // -- We define Function* container class with custom "operator<" (FunctionPtr).
28 // -- "FunctionPtr" instances are stored in std::set collection, so every
29 // std::set::insert operation will give you result in log(N) time.
31 // As an optimization, a hash of the function structure is calculated first, and
32 // two functions are only compared if they have the same hash. This hash is
33 // cheap to compute, and has the property that if function F == G according to
34 // the comparison function, then hash(F) == hash(G). This consistency property
35 // is critical to ensuring all possible merging opportunities are exploited.
36 // Collisions in the hash affect the speed of the pass but not the correctness
37 // or determinism of the resulting transformation.
39 // When a match is found the functions are folded. If both functions are
40 // overridable, we move the functionality into a new internal function and
41 // leave two overridable thunks to it.
43 //===----------------------------------------------------------------------===//
47 // * virtual functions.
49 // Many functions have their address taken by the virtual function table for
50 // the object they belong to. However, as long as it's only used for a lookup
51 // and call, this is irrelevant, and we'd like to fold such functions.
53 // * be smarter about bitcasts.
55 // In order to fold functions, we will sometimes add either bitcast instructions
56 // or bitcast constant expressions. Unfortunately, this can confound further
57 // analysis since the two functions differ where one has a bitcast and the
58 // other doesn't. We should learn to look through bitcasts.
60 // * Compare complex types with pointer types inside.
61 // * Compare cross-reference cases.
62 // * Compare complex expressions.
64 // All the three issues above could be described as ability to prove that
65 // fA == fB == fC == fE == fF == fG in example below:
84 // Simplest cross-reference case (fA <--> fB) was implemented in previous
85 // versions of MergeFunctions, though it presented only in two function pairs
86 // in test-suite (that counts >50k functions)
87 // Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
88 // could cover much more cases.
90 //===----------------------------------------------------------------------===//
92 #include "llvm/ADT/Hashing.h"
93 #include "llvm/ADT/STLExtras.h"
94 #include "llvm/ADT/SmallSet.h"
95 #include "llvm/ADT/Statistic.h"
96 #include "llvm/IR/CallSite.h"
97 #include "llvm/IR/Constants.h"
98 #include "llvm/IR/DataLayout.h"
99 #include "llvm/IR/IRBuilder.h"
100 #include "llvm/IR/Instructions.h"
101 #include "llvm/IR/LLVMContext.h"
102 #include "llvm/IR/Module.h"
103 #include "llvm/IR/ValueHandle.h"
104 #include "llvm/IR/ValueMap.h"
105 #include "llvm/Pass.h"
106 #include "llvm/Support/CommandLine.h"
107 #include "llvm/Support/Debug.h"
108 #include "llvm/Support/ErrorHandling.h"
109 #include "llvm/Support/raw_ostream.h"
110 #include "llvm/Transforms/IPO.h"
111 #include "llvm/Transforms/Utils/FunctionComparator.h"
114 using namespace llvm;
116 #define DEBUG_TYPE "mergefunc"
118 STATISTIC(NumFunctionsMerged, "Number of functions merged");
119 STATISTIC(NumThunksWritten, "Number of thunks generated");
120 STATISTIC(NumAliasesWritten, "Number of aliases generated");
121 STATISTIC(NumDoubleWeak, "Number of new functions created");
123 static cl::opt<unsigned> NumFunctionsForSanityCheck(
125 cl::desc("How many functions in module could be used for "
126 "MergeFunctions pass sanity check. "
127 "'0' disables this check. Works only with '-debug' key."),
128 cl::init(0), cl::Hidden);
133 mutable AssertingVH<Function> F;
134 FunctionComparator::FunctionHash Hash;
136 // Note the hash is recalculated potentially multiple times, but it is cheap.
137 FunctionNode(Function *F)
138 : F(F), Hash(FunctionComparator::functionHash(*F)) {}
139 Function *getFunc() const { return F; }
140 FunctionComparator::FunctionHash getHash() const { return Hash; }
142 /// Replace the reference to the function F by the function G, assuming their
143 /// implementations are equal.
144 void replaceBy(Function *G) const {
148 void release() { F = nullptr; }
151 /// MergeFunctions finds functions which will generate identical machine code,
152 /// by considering all pointer types to be equivalent. Once identified,
153 /// MergeFunctions will fold them by replacing a call to one to a call to a
154 /// bitcast of the other.
156 class MergeFunctions : public ModulePass {
160 : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
161 HasGlobalAliases(false) {
162 initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
165 bool runOnModule(Module &M) override;
168 // The function comparison operator is provided here so that FunctionNodes do
169 // not need to become larger with another pointer.
170 class FunctionNodeCmp {
171 GlobalNumberState* GlobalNumbers;
173 FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
174 bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
175 // Order first by hashes, then full function comparison.
176 if (LHS.getHash() != RHS.getHash())
177 return LHS.getHash() < RHS.getHash();
178 FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
179 return FCmp.compare() == -1;
182 typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
184 GlobalNumberState GlobalNumbers;
186 /// A work queue of functions that may have been modified and should be
188 std::vector<WeakVH> Deferred;
190 /// Checks the rules of order relation introduced among functions set.
191 /// Returns true, if sanity check has been passed, and false if failed.
192 bool doSanityCheck(std::vector<WeakVH> &Worklist);
194 /// Insert a ComparableFunction into the FnTree, or merge it away if it's
195 /// equal to one that's already present.
196 bool insert(Function *NewFunction);
198 /// Remove a Function from the FnTree and queue it up for a second sweep of
200 void remove(Function *F);
202 /// Find the functions that use this Value and remove them from FnTree and
203 /// queue the functions.
204 void removeUsers(Value *V);
206 /// Replace all direct calls of Old with calls of New. Will bitcast New if
207 /// necessary to make types match.
208 void replaceDirectCallers(Function *Old, Function *New);
210 /// Merge two equivalent functions. Upon completion, G may be deleted, or may
211 /// be converted into a thunk. In either case, it should never be visited
213 void mergeTwoFunctions(Function *F, Function *G);
215 /// Replace G with a thunk or an alias to F. Deletes G.
216 void writeThunkOrAlias(Function *F, Function *G);
218 /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
219 /// of G with bitcast(F). Deletes G.
220 void writeThunk(Function *F, Function *G);
222 /// Replace G with an alias to F. Deletes G.
223 void writeAlias(Function *F, Function *G);
225 /// Replace function F with function G in the function tree.
226 void replaceFunctionInTree(const FunctionNode &FN, Function *G);
228 /// The set of all distinct functions. Use the insert() and remove() methods
229 /// to modify it. The map allows efficient lookup and deferring of Functions.
231 // Map functions to the iterators of the FunctionNode which contains them
232 // in the FnTree. This must be updated carefully whenever the FnTree is
233 // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
234 // dangling iterators into FnTree. The invariant that preserves this is that
235 // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
236 ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
238 /// Whether or not the target supports global aliases.
239 bool HasGlobalAliases;
242 } // end anonymous namespace
244 char MergeFunctions::ID = 0;
245 INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
247 ModulePass *llvm::createMergeFunctionsPass() {
248 return new MergeFunctions();
251 bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
252 if (const unsigned Max = NumFunctionsForSanityCheck) {
253 unsigned TripleNumber = 0;
256 dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
259 for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
260 I != E && i < Max; ++I, ++i) {
262 for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
263 Function *F1 = cast<Function>(*I);
264 Function *F2 = cast<Function>(*J);
265 int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
266 int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
268 // If F1 <= F2, then F2 >= F1, otherwise report failure.
270 dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
281 for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
282 ++k, ++K, ++TripleNumber) {
286 Function *F3 = cast<Function>(*K);
287 int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
288 int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
290 bool Transitive = true;
292 if (Res1 != 0 && Res1 == Res4) {
293 // F1 > F2, F2 > F3 => F1 > F3
294 Transitive = Res3 == Res1;
295 } else if (Res3 != 0 && Res3 == -Res4) {
296 // F1 > F3, F3 > F2 => F1 > F2
297 Transitive = Res3 == Res1;
298 } else if (Res4 != 0 && -Res3 == Res4) {
299 // F2 > F3, F3 > F1 => F2 > F1
300 Transitive = Res4 == -Res1;
304 dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
305 << TripleNumber << "\n";
306 dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
317 dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
323 bool MergeFunctions::runOnModule(Module &M) {
327 bool Changed = false;
329 // All functions in the module, ordered by hash. Functions with a unique
330 // hash value are easily eliminated.
331 std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
333 for (Function &Func : M) {
334 if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
335 HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
340 HashedFuncs.begin(), HashedFuncs.end(),
341 [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
342 const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
343 return a.first < b.first;
346 auto S = HashedFuncs.begin();
347 for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
348 // If the hash value matches the previous value or the next one, we must
349 // consider merging it. Otherwise it is dropped and never considered again.
350 if ((I != S && std::prev(I)->first == I->first) ||
351 (std::next(I) != IE && std::next(I)->first == I->first) ) {
352 Deferred.push_back(WeakVH(I->second));
357 std::vector<WeakVH> Worklist;
358 Deferred.swap(Worklist);
360 DEBUG(doSanityCheck(Worklist));
362 DEBUG(dbgs() << "size of module: " << M.size() << '\n');
363 DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
365 // Insert functions and merge them.
366 for (WeakVH &I : Worklist) {
369 Function *F = cast<Function>(I);
370 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) {
371 Changed |= insert(F);
374 DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
375 } while (!Deferred.empty());
378 GlobalNumbers.clear();
383 // Replace direct callers of Old with New.
384 void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
385 Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
386 for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
389 CallSite CS(U->getUser());
390 if (CS && CS.isCallee(U)) {
391 // Transfer the called function's attributes to the call site. Due to the
392 // bitcast we will 'lose' ABI changing attributes because the 'called
393 // function' is no longer a Function* but the bitcast. Code that looks up
394 // the attributes from the called function will fail.
396 // FIXME: This is not actually true, at least not anymore. The callsite
397 // will always have the same ABI affecting attributes as the callee,
398 // because otherwise the original input has UB. Note that Old and New
399 // always have matching ABI, so no attributes need to be changed.
400 // Transferring other attributes may help other optimizations, but that
401 // should be done uniformly and not in this ad-hoc way.
402 auto &Context = New->getContext();
403 auto NewFuncAttrs = New->getAttributes();
404 auto CallSiteAttrs = CS.getAttributes();
406 CallSiteAttrs = CallSiteAttrs.addAttributes(
407 Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
409 for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
410 AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
411 if (Attrs.getNumSlots())
412 CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
415 CS.setAttributes(CallSiteAttrs);
417 remove(CS.getInstruction()->getParent()->getParent());
423 // Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
424 void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
425 if (HasGlobalAliases && G->hasGlobalUnnamedAddr()) {
426 if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
427 G->hasWeakLinkage()) {
436 // Helper for writeThunk,
437 // Selects proper bitcast operation,
438 // but a bit simpler then CastInst::getCastOpcode.
439 static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
440 Type *SrcTy = V->getType();
441 if (SrcTy->isStructTy()) {
442 assert(DestTy->isStructTy());
443 assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
444 Value *Result = UndefValue::get(DestTy);
445 for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
446 Value *Element = createCast(
447 Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
448 DestTy->getStructElementType(I));
451 Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
455 assert(!DestTy->isStructTy());
456 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
457 return Builder.CreateIntToPtr(V, DestTy);
458 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
459 return Builder.CreatePtrToInt(V, DestTy);
461 return Builder.CreateBitCast(V, DestTy);
464 // Replace G with a simple tail call to bitcast(F). Also replace direct uses
465 // of G with bitcast(F). Deletes G.
466 void MergeFunctions::writeThunk(Function *F, Function *G) {
467 if (!G->isInterposable()) {
468 // Redirect direct callers of G to F.
469 replaceDirectCallers(G, F);
472 // If G was internal then we may have replaced all uses of G with F. If so,
473 // stop here and delete G. There's no need for a thunk.
474 if (G->hasLocalLinkage() && G->use_empty()) {
475 G->eraseFromParent();
479 Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
481 BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
482 IRBuilder<> Builder(BB);
484 SmallVector<Value *, 16> Args;
486 FunctionType *FFTy = F->getFunctionType();
487 for (Argument & AI : NewG->args()) {
488 Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
492 CallInst *CI = Builder.CreateCall(F, Args);
494 CI->setCallingConv(F->getCallingConv());
495 CI->setAttributes(F->getAttributes());
496 if (NewG->getReturnType()->isVoidTy()) {
497 Builder.CreateRetVoid();
499 Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
502 NewG->copyAttributesFrom(G);
505 G->replaceAllUsesWith(NewG);
506 G->eraseFromParent();
508 DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
512 // Replace G with an alias to F and delete G.
513 void MergeFunctions::writeAlias(Function *F, Function *G) {
514 auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
515 F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
517 GA->setVisibility(G->getVisibility());
519 G->replaceAllUsesWith(GA);
520 G->eraseFromParent();
522 DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
526 // Merge two equivalent functions. Upon completion, Function G is deleted.
527 void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
528 if (F->isInterposable()) {
529 assert(G->isInterposable());
531 // Make them both thunks to the same internal function.
532 Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
534 H->copyAttributesFrom(F);
537 F->replaceAllUsesWith(H);
539 unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
541 if (HasGlobalAliases) {
549 F->setAlignment(MaxAlignment);
550 F->setLinkage(GlobalValue::PrivateLinkage);
553 writeThunkOrAlias(F, G);
556 ++NumFunctionsMerged;
559 /// Replace function F by function G.
560 void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
562 Function *F = FN.getFunc();
563 assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
564 "The two functions must be equal");
566 auto I = FNodesInTree.find(F);
567 assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
568 assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
570 FnTreeType::iterator IterToFNInFnTree = I->second;
571 assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
572 // Remove F -> FN and insert G -> FN
573 FNodesInTree.erase(I);
574 FNodesInTree.insert({G, IterToFNInFnTree});
575 // Replace F with G in FN, which is stored inside the FnTree.
579 // Insert a ComparableFunction into the FnTree, or merge it away if equal to one
580 // that was already inserted.
581 bool MergeFunctions::insert(Function *NewFunction) {
582 std::pair<FnTreeType::iterator, bool> Result =
583 FnTree.insert(FunctionNode(NewFunction));
586 assert(FNodesInTree.count(NewFunction) == 0);
587 FNodesInTree.insert({NewFunction, Result.first});
588 DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
592 const FunctionNode &OldF = *Result.first;
594 // Don't merge tiny functions, since it can just end up making the function
596 // FIXME: Should still merge them if they are unnamed_addr and produce an
598 if (NewFunction->size() == 1) {
599 if (NewFunction->front().size() <= 2) {
600 DEBUG(dbgs() << NewFunction->getName()
601 << " is to small to bother merging\n");
606 // Impose a total order (by name) on the replacement of functions. This is
607 // important when operating on more than one module independently to prevent
608 // cycles of thunks calling each other when the modules are linked together.
610 // First of all, we process strong functions before weak functions.
611 if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) ||
612 (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() &&
613 OldF.getFunc()->getName() > NewFunction->getName())) {
614 // Swap the two functions.
615 Function *F = OldF.getFunc();
616 replaceFunctionInTree(*Result.first, NewFunction);
618 assert(OldF.getFunc() != F && "Must have swapped the functions.");
621 DEBUG(dbgs() << " " << OldF.getFunc()->getName()
622 << " == " << NewFunction->getName() << '\n');
624 Function *DeleteF = NewFunction;
625 mergeTwoFunctions(OldF.getFunc(), DeleteF);
629 // Remove a function from FnTree. If it was already in FnTree, add
630 // it to Deferred so that we'll look at it in the next round.
631 void MergeFunctions::remove(Function *F) {
632 auto I = FNodesInTree.find(F);
633 if (I != FNodesInTree.end()) {
634 DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
635 FnTree.erase(I->second);
636 // I->second has been invalidated, remove it from the FNodesInTree map to
637 // preserve the invariant.
638 FNodesInTree.erase(I);
639 Deferred.emplace_back(F);
643 // For each instruction used by the value, remove() the function that contains
644 // the instruction. This should happen right before a call to RAUW.
645 void MergeFunctions::removeUsers(Value *V) {
646 std::vector<Value *> Worklist;
647 Worklist.push_back(V);
648 SmallSet<Value*, 8> Visited;
650 while (!Worklist.empty()) {
651 Value *V = Worklist.back();
654 for (User *U : V->users()) {
655 if (Instruction *I = dyn_cast<Instruction>(U)) {
656 remove(I->getParent()->getParent());
657 } else if (isa<GlobalValue>(U)) {
659 } else if (Constant *C = dyn_cast<Constant>(U)) {
660 for (User *UU : C->users()) {
661 if (!Visited.insert(UU).second)
662 Worklist.push_back(UU);