1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass removes the computation of provably redundant expressions that have
11 // been computed earlier in a previous iteration. It relies on the use of PHIs
12 // to identify loop carried dependences. This is scalar replacement for vector
15 //-----------------------------------------------------------------------------
16 // Motivation: Consider the case where we have the following loop structure.
29 // This can be converted to
42 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
43 // Such a loop comes to this pass in the following form.
48 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
58 // In this pass, we look for PHIs such as X2 whose incoming values come only
59 // from the Loop Preheader and over the backedge and additionaly, both these
60 // values are the results of the same operation in terms of opcode. We call such
61 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
62 // over X1 is carried over only one iteration and so the DepChain is only one
65 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
66 // PHI coming over the backedge (X1). We stop at the first pair of such users
67 // I1 (of X2) and I2 (of X1) that meet the following conditions.
68 // 1. I1 and I2 are the same operation, but with different operands.
69 // 2. X2 and X1 are used at the same operand number in the two instructions.
70 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
71 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
73 // We then make the following transformation
78 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
79 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
80 // t1 = f(X2) <-- Will be removed by DCE.
89 // We proceed until we cannot find any more such instructions I1 and I2.
91 // --- DepChains & Loop carried dependences ---
92 // Consider a single basic block loop such as
98 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
99 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
103 // cond_branch <Loop>
105 // Then there is a dependence between X2 and X1 that goes back one iteration,
106 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
107 // DepChain from X2 to X1 (X2->X1).
108 // Similarly, there is a dependence between Y2 and X1 that goes back two
109 // iterations. X1 is used as Y2 two iterations after it is computed. This is
110 // represented by a DepChain as (Y2->X2->X1).
112 // A DepChain has the following properties.
113 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
114 // iterations of carried dependence + 1.
115 // 2. All instructions in the DepChain except the last are PHIs.
117 //===----------------------------------------------------------------------===//
119 #include "llvm/ADT/SetVector.h"
120 #include "llvm/ADT/SmallVector.h"
121 #include "llvm/ADT/Statistic.h"
122 #include "llvm/Analysis/LoopInfo.h"
123 #include "llvm/Analysis/LoopPass.h"
124 #include "llvm/IR/BasicBlock.h"
125 #include "llvm/IR/DerivedTypes.h"
126 #include "llvm/IR/IRBuilder.h"
127 #include "llvm/IR/Instruction.h"
128 #include "llvm/IR/Instructions.h"
129 #include "llvm/IR/IntrinsicInst.h"
130 #include "llvm/IR/Intrinsics.h"
131 #include "llvm/IR/Use.h"
132 #include "llvm/IR/User.h"
133 #include "llvm/IR/Value.h"
134 #include "llvm/Pass.h"
135 #include "llvm/Support/Casting.h"
136 #include "llvm/Support/CommandLine.h"
137 #include "llvm/Support/Compiler.h"
138 #include "llvm/Support/Debug.h"
139 #include "llvm/Support/raw_ostream.h"
140 #include "llvm/Transforms/Scalar.h"
141 #include "llvm/Transforms/Utils.h"
149 using namespace llvm;
151 #define DEBUG_TYPE "hexagon-vlcr"
153 STATISTIC(HexagonNumVectorLoopCarriedReuse,
154 "Number of values that were reused from a previous iteration.");
156 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
158 cl::desc("Maximum distance of loop carried dependences that are handled"),
159 cl::init(2), cl::ZeroOrMore);
163 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
164 Pass *createHexagonVectorLoopCarriedReusePass();
166 } // end namespace llvm
170 // See info about DepChain in the comments at the top of this file.
171 using ChainOfDependences = SmallVector<Instruction *, 4>;
174 ChainOfDependences Chain;
177 bool isIdentical(DepChain &Other) const {
178 if (Other.size() != size())
180 ChainOfDependences &OtherChain = Other.getChain();
181 for (int i = 0; i < size(); ++i) {
182 if (Chain[i] != OtherChain[i])
188 ChainOfDependences &getChain() {
200 void push_back(Instruction *I) {
204 int iterations() const {
208 Instruction *front() const {
209 return Chain.front();
212 Instruction *back() const {
216 Instruction *&operator[](const int index) {
220 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
223 LLVM_ATTRIBUTE_UNUSED
224 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
225 const ChainOfDependences &CD = D.Chain;
226 int ChainSize = CD.size();
227 OS << "**DepChain Start::**\n";
228 for (int i = 0; i < ChainSize -1; ++i) {
229 OS << *(CD[i]) << " -->\n";
231 OS << *CD[ChainSize-1] << "\n";
236 Instruction *Inst2Replace = nullptr;
238 // In the new PHI node that we'll construct this is the value that'll be
239 // used over the backedge. This is teh value that gets reused from a
240 // previous iteration.
241 Instruction *BackedgeInst = nullptr;
243 ReuseValue() = default;
245 void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
246 bool isDefined() { return Inst2Replace != nullptr; }
249 LLVM_ATTRIBUTE_UNUSED
250 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
251 OS << "** ReuseValue ***\n";
252 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
253 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
257 class HexagonVectorLoopCarriedReuse : public LoopPass {
261 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
262 PassRegistry *PR = PassRegistry::getPassRegistry();
263 initializeHexagonVectorLoopCarriedReusePass(*PR);
266 StringRef getPassName() const override {
267 return "Hexagon-specific loop carried reuse for HVX vectors";
270 void getAnalysisUsage(AnalysisUsage &AU) const override {
271 AU.addRequired<LoopInfoWrapperPass>();
272 AU.addRequiredID(LoopSimplifyID);
273 AU.addRequiredID(LCSSAID);
274 AU.addPreservedID(LCSSAID);
275 AU.setPreservesCFG();
278 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
281 SetVector<DepChain *> Dependences;
282 std::set<Instruction *> ReplacedInsts;
284 ReuseValue ReuseCandidate;
287 void findLoopCarriedDeps();
288 void findValueToReuse();
289 void findDepChainFromPHI(Instruction *I, DepChain &D);
291 Value *findValueInBlock(Value *Op, BasicBlock *BB);
292 bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
293 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
294 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
295 bool canReplace(Instruction *I);
298 } // end anonymous namespace
300 char HexagonVectorLoopCarriedReuse::ID = 0;
302 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
303 "Hexagon-specific predictive commoning for HVX vectors", false, false)
304 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
305 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
306 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
307 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
308 "Hexagon-specific predictive commoning for HVX vectors", false, false)
310 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
314 if (!L->getLoopPreheader())
317 // Work only on innermost loops.
318 if (!L->getSubLoops().empty())
321 // Work only on single basic blocks loops.
322 if (L->getNumBlocks() != 1)
330 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
332 if (!I1->isSameOperationAs(I2))
334 // This check is in place specifically for intrinsics. isSameOperationAs will
335 // return two for any two hexagon intrinsics because they are essentially the
336 // same instruciton (CallInst). We need to scratch the surface to see if they
337 // are calls to the same function.
338 if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
339 if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
340 if (C1->getCalledFunction() != C2->getCalledFunction())
345 // If both the Instructions are of Vector Type and any of the element
346 // is integer constant, check their values too for equivalence.
347 if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
348 unsigned NumOperands = I1->getNumOperands();
349 for (unsigned i = 0; i < NumOperands; ++i) {
350 ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
351 ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
354 if (C1->getSExtValue() != C2->getSExtValue())
362 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
363 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
365 (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
366 II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
367 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
372 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
373 for (auto *D : Dependences) {
374 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
375 if (D->iterations() > HexagonVLCRIterationLim) {
378 << ".. Skipping because number of iterations > than the limit\n");
382 PHINode *PN = cast<PHINode>(D->front());
383 Instruction *BEInst = D->back();
384 int Iters = D->iterations();
385 BasicBlock *BB = PN->getParent();
386 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
387 << " can be reused\n");
389 SmallVector<Instruction *, 4> PNUsers;
390 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
392 Instruction *User = cast<Instruction>(U.getUser());
394 if (User->getParent() != BB)
396 if (ReplacedInsts.count(User)) {
397 LLVM_DEBUG(dbgs() << *User
398 << " has already been replaced. Skipping...\n");
401 if (isa<PHINode>(User))
403 if (User->mayHaveSideEffects())
405 if (!canReplace(User))
408 PNUsers.push_back(User);
410 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
412 // For each interesting use I of PN, find an Instruction BEUser that
413 // performs the same operation as I on BEInst and whose other operands,
414 // if any, can also be rematerialized in OtherBB. We stop when we find the
415 // first such Instruction BEUser. This is because once BEUser is
416 // rematerialized in OtherBB, we may find more such "fixup" opportunities
417 // in this block. So, we'll start over again.
418 for (Instruction *I : PNUsers) {
419 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
422 Instruction *BEUser = cast<Instruction>(U.getUser());
424 if (BEUser->getParent() != BB)
426 if (!isEquivalentOperation(I, BEUser))
429 int NumOperands = I->getNumOperands();
431 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
432 Value *Op = I->getOperand(OpNo);
433 Instruction *OpInst = dyn_cast<Instruction>(Op);
437 Value *BEOp = BEUser->getOperand(OpNo);
438 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
440 if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
446 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
447 ReuseCandidate.Inst2Replace = I;
448 ReuseCandidate.BackedgeInst = BEUser;
451 ReuseCandidate.reset();
455 ReuseCandidate.reset();
458 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
460 PHINode *PN = dyn_cast<PHINode>(Op);
462 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
466 void HexagonVectorLoopCarriedReuse::reuseValue() {
467 LLVM_DEBUG(dbgs() << ReuseCandidate);
468 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
469 Instruction *BEInst = ReuseCandidate.BackedgeInst;
470 int NumOperands = Inst2Replace->getNumOperands();
471 std::map<Instruction *, DepChain *> DepChains;
473 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
475 for (int i = 0; i < NumOperands; ++i) {
476 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
480 Instruction *J = cast<Instruction>(BEInst->getOperand(i));
481 DepChain *D = getDepChainBtwn(I, J);
484 "No DepChain between corresponding operands in ReuseCandidate\n");
485 if (Iterations == -1)
486 Iterations = D->iterations();
487 assert(Iterations == D->iterations() && "Iterations mismatch");
492 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
494 SmallVector<Instruction *, 4> InstsInPreheader;
495 for (int i = 0; i < Iterations; ++i) {
496 Instruction *InstInPreheader = Inst2Replace->clone();
497 SmallVector<Value *, 4> Ops;
498 for (int j = 0; j < NumOperands; ++j) {
499 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
502 // Get the DepChain corresponding to this operand.
503 DepChain &D = *DepChains[I];
504 // Get the PHI for the iteration number and find
505 // the incoming value from the Loop Preheader for
507 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
508 InstInPreheader->setOperand(j, ValInPreheader);
510 InstsInPreheader.push_back(InstInPreheader);
511 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
512 InstInPreheader->insertBefore(LoopPH->getTerminator());
513 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
514 << LoopPH->getName() << "\n");
516 BasicBlock *BB = BEInst->getParent();
518 IRB.SetInsertPoint(BB->getFirstNonPHI());
519 Value *BEVal = BEInst;
521 for (int i = Iterations-1; i >=0 ; --i) {
522 Instruction *InstInPreheader = InstsInPreheader[i];
523 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
524 NewPhi->addIncoming(InstInPreheader, LoopPH);
525 NewPhi->addIncoming(BEVal, BB);
526 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
530 // We are in LCSSA form. So, a value defined inside the Loop is used only
531 // inside the loop. So, the following is safe.
532 Inst2Replace->replaceAllUsesWith(NewPhi);
533 ReplacedInsts.insert(Inst2Replace);
534 ++HexagonNumVectorLoopCarriedReuse;
537 bool HexagonVectorLoopCarriedReuse::doVLCR() {
538 assert(CurLoop->getSubLoops().empty() &&
539 "Can do VLCR on the innermost loop only");
540 assert((CurLoop->getNumBlocks() == 1) &&
541 "Can do VLCR only on single block loops");
543 bool Changed = false;
546 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
548 // Reset datastructures.
552 findLoopCarriedDeps();
554 if (ReuseCandidate.isDefined()) {
559 llvm::for_each(Dependences, std::default_delete<DepChain>());
564 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
566 PHINode *PN = dyn_cast<PHINode>(I);
571 auto NumIncomingValues = PN->getNumIncomingValues();
572 if (NumIncomingValues != 2) {
577 BasicBlock *BB = PN->getParent();
578 if (BB != CurLoop->getHeader()) {
583 Value *BEVal = PN->getIncomingValueForBlock(BB);
584 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
585 // This is a single block loop with a preheader, so at least
586 // one value should come over the backedge.
587 assert(BEInst && "There should be a value over the backedge");
590 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
591 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
596 findDepChainFromPHI(BEInst, D);
600 bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
603 for (auto *D : Dependences) {
604 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
610 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
612 for (auto *D : Dependences) {
613 if (D->front() == I1 && D->back() == I2)
619 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
620 BasicBlock *BB = CurLoop->getHeader();
621 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
622 auto *PN = cast<PHINode>(I);
623 if (!isa<VectorType>(PN->getType()))
626 DepChain *D = new DepChain();
627 findDepChainFromPHI(PN, *D);
629 Dependences.insert(D);
633 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
634 LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
635 ++i) { dbgs() << *Dependences[i] << "\n"; });
638 Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
639 return new HexagonVectorLoopCarriedReuse();