1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //==-----------------------------------------------------------------------===//
12 #include "AMDGPUSubtarget.h"
13 #include "R600InstrInfo.h"
14 #include "R600RegisterInfo.h"
15 #include "llvm/ADT/DepthFirstIterator.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineJumpTableInfo.h"
28 #include "llvm/CodeGen/MachineLoopInfo.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachinePostDominators.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/MachineValueType.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/raw_ostream.h"
49 #define DEBUG_TYPE "structcfg"
51 #define DEFAULT_VEC_SLOTS 8
55 //===----------------------------------------------------------------------===//
57 // Statistics for CFGStructurizer.
59 //===----------------------------------------------------------------------===//
61 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
63 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
65 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
66 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
70 void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
72 } // end namespace llvm
76 //===----------------------------------------------------------------------===//
78 // Miscellaneous utility for CFGStructurizer.
80 //===----------------------------------------------------------------------===//
82 #define SHOWNEWINSTR(i) \
83 DEBUG(dbgs() << "New instr: " << *i << "\n");
85 #define SHOWNEWBLK(b, msg) \
87 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
91 #define SHOWBLK_DETAIL(b, msg) \
94 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
100 #define INVALIDSCCNUM -1
102 //===----------------------------------------------------------------------===//
104 // supporting data structure for CFGStructurizer
106 //===----------------------------------------------------------------------===//
108 class BlockInformation {
110 bool IsRetired = false;
111 int SccNum = INVALIDSCCNUM;
113 BlockInformation() = default;
116 //===----------------------------------------------------------------------===//
120 //===----------------------------------------------------------------------===//
122 class AMDGPUCFGStructurizer : public MachineFunctionPass {
124 typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
125 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
126 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
130 SinglePath_InPath = 1,
131 SinglePath_NotInPath = 2
136 AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
137 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
140 StringRef getPassName() const override {
141 return "AMDGPU Control Flow Graph structurizer Pass";
144 void getAnalysisUsage(AnalysisUsage &AU) const override {
145 AU.addRequired<MachineDominatorTree>();
146 AU.addRequired<MachinePostDominatorTree>();
147 AU.addRequired<MachineLoopInfo>();
148 MachineFunctionPass::getAnalysisUsage(AU);
151 /// Perform the CFG structurization
154 /// Perform the CFG preparation
155 /// This step will remove every unconditionnal/dead jump instructions and make
156 /// sure all loops have an exit block
159 bool runOnMachineFunction(MachineFunction &MF) override {
160 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
161 TRI = &TII->getRegisterInfo();
166 MLI = &getAnalysis<MachineLoopInfo>();
167 DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
168 MDT = &getAnalysis<MachineDominatorTree>();
169 DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
170 PDT = &getAnalysis<MachinePostDominatorTree>();
171 DEBUG(PDT->print(dbgs()););
179 MachineDominatorTree *MDT;
180 MachinePostDominatorTree *PDT;
181 MachineLoopInfo *MLI;
182 const R600InstrInfo *TII = nullptr;
183 const R600RegisterInfo *TRI = nullptr;
186 /// Print the ordered Blocks.
187 void printOrderedBlocks() const {
189 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
190 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
191 dbgs() << "BB" << (*iterBlk)->getNumber();
192 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
193 if (i != 0 && i % 10 == 0) {
201 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
202 for (MachineLoop::iterator iter = LoopInfo.begin(),
203 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
204 (*iter)->print(dbgs(), 0);
209 int getSCCNum(MachineBasicBlock *MBB) const;
210 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
211 bool hasBackEdge(MachineBasicBlock *MBB) const;
212 bool isRetiredBlock(MachineBasicBlock *MBB) const;
213 bool isActiveLoophead(MachineBasicBlock *MBB) const;
214 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
215 bool AllowSideEntry = true) const;
216 int countActiveBlock(MBBVector::const_iterator It,
217 MBBVector::const_iterator E) const;
218 bool needMigrateBlock(MachineBasicBlock *MBB) const;
221 void reversePredicateSetter(MachineBasicBlock::iterator I,
222 MachineBasicBlock &MBB);
223 /// Compute the reversed DFS post order of Blocks
224 void orderBlocks(MachineFunction *MF);
226 // Function originally from CFGStructTraits
227 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
228 const DebugLoc &DL = DebugLoc());
229 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
230 const DebugLoc &DL = DebugLoc());
231 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
232 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
234 void insertCondBranchBefore(MachineBasicBlock *MBB,
235 MachineBasicBlock::iterator I, int NewOpcode,
236 int RegNum, const DebugLoc &DL);
237 static int getBranchNzeroOpcode(int OldOpcode);
238 static int getBranchZeroOpcode(int OldOpcode);
239 static int getContinueNzeroOpcode(int OldOpcode);
240 static int getContinueZeroOpcode(int OldOpcode);
241 static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
242 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
243 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
245 static bool isCondBranch(MachineInstr *MI);
246 static bool isUncondBranch(MachineInstr *MI);
247 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
248 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
249 /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
251 /// BB with backward-edge could have move instructions after the branch
252 /// instruction. Such move instruction "belong to" the loop backward-edge.
253 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
254 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
255 static bool isReturnBlock(MachineBasicBlock *MBB);
256 static void cloneSuccessorList(MachineBasicBlock *DstMBB,
257 MachineBasicBlock *SrcMBB) ;
258 static MachineBasicBlock *clone(MachineBasicBlock *MBB);
259 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
260 /// because the AMDGPU instruction is not recognized as terminator fix this
261 /// and retire this routine
262 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
263 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
264 static void wrapup(MachineBasicBlock *MBB);
266 int patternMatch(MachineBasicBlock *MBB);
267 int patternMatchGroup(MachineBasicBlock *MBB);
268 int serialPatternMatch(MachineBasicBlock *MBB);
269 int ifPatternMatch(MachineBasicBlock *MBB);
270 int loopendPatternMatch();
271 int mergeLoop(MachineLoop *LoopRep);
273 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
274 /// the same loop with LoopLandInfo without explicitly keeping track of
275 /// loopContBlks and loopBreakBlks, this is a method to get the information.
276 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
277 MachineBasicBlock *Src2MBB);
278 int handleJumpintoIf(MachineBasicBlock *HeadMBB,
279 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
280 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
281 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
282 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
283 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
284 MachineBasicBlock **LandMBBPtr);
285 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
286 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
287 MachineBasicBlock *LandMBB, bool Detail = false);
288 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
289 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
290 void mergeSerialBlock(MachineBasicBlock *DstMBB,
291 MachineBasicBlock *SrcMBB);
293 void mergeIfthenelseBlock(MachineInstr *BranchMI,
294 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
295 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
296 void mergeLooplandBlock(MachineBasicBlock *DstMBB,
297 MachineBasicBlock *LandMBB);
298 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
299 MachineBasicBlock *LandMBB);
300 void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
301 MachineBasicBlock *ContMBB);
302 /// normalizeInfiniteLoopExit change
304 /// uncond_br LoopHeader
308 /// cond_br 1 LoopHeader dummyExit
309 /// and return the newly added dummy exit block
310 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
311 void removeUnconditionalBranch(MachineBasicBlock *MBB);
312 /// Remove duplicate branches instructions in a block.
317 /// is transformed to
320 void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
321 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
322 void removeSuccessor(MachineBasicBlock *MBB);
323 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
324 MachineBasicBlock *PredMBB);
325 void migrateInstruction(MachineBasicBlock *SrcMBB,
326 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
327 void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
328 void retireBlock(MachineBasicBlock *MBB);
331 MBBInfoMap BlockInfoMap;
332 LoopLandInfoMap LLInfoMap;
333 std::map<MachineLoop *, bool> Visited;
334 MachineFunction *FuncRep;
335 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
338 char AMDGPUCFGStructurizer::ID = 0;
340 } // end anonymous namespace
342 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
343 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
344 if (It == BlockInfoMap.end())
345 return INVALIDSCCNUM;
346 return (*It).second->SccNum;
349 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
351 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
352 if (It == LLInfoMap.end())
357 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
358 MachineLoop *LoopRep = MLI->getLoopFor(MBB);
361 MachineBasicBlock *LoopHeader = LoopRep->getHeader();
362 return MBB->isSuccessor(LoopHeader);
365 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
366 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
367 if (It == BlockInfoMap.end())
369 return (*It).second->IsRetired;
372 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
373 MachineLoop *LoopRep = MLI->getLoopFor(MBB);
374 while (LoopRep && LoopRep->getHeader() == MBB) {
375 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
378 if (!isRetiredBlock(LoopLand))
380 LoopRep = LoopRep->getParentLoop();
385 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
386 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
387 bool AllowSideEntry) const {
389 if (SrcMBB == DstMBB)
390 return SinglePath_InPath;
391 while (SrcMBB && SrcMBB->succ_size() == 1) {
392 SrcMBB = *SrcMBB->succ_begin();
393 if (SrcMBB == DstMBB)
394 return SinglePath_InPath;
395 if (!AllowSideEntry && SrcMBB->pred_size() > 1)
396 return Not_SinglePath;
398 if (SrcMBB && SrcMBB->succ_size()==0)
399 return SinglePath_NotInPath;
400 return Not_SinglePath;
403 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
404 MBBVector::const_iterator E) const {
407 if (!isRetiredBlock(*It))
414 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
415 unsigned BlockSizeThreshold = 30;
416 unsigned CloneInstrThreshold = 100;
417 bool MultiplePreds = MBB && (MBB->pred_size() > 1);
421 unsigned BlkSize = MBB->size();
422 return ((BlkSize > BlockSizeThreshold) &&
423 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
426 void AMDGPUCFGStructurizer::reversePredicateSetter(
427 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
428 assert(I.isValid() && "Expected valid iterator");
432 if (I->getOpcode() == AMDGPU::PRED_X) {
433 switch (I->getOperand(2).getImm()) {
434 case AMDGPU::PRED_SETE_INT:
435 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
437 case AMDGPU::PRED_SETNE_INT:
438 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
440 case AMDGPU::PRED_SETE:
441 I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
443 case AMDGPU::PRED_SETNE:
444 I->getOperand(2).setImm(AMDGPU::PRED_SETE);
447 llvm_unreachable("PRED_X Opcode invalid!");
453 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
454 int NewOpcode, const DebugLoc &DL) {
456 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
458 //assume the instruction doesn't take any reg operand ...
462 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
464 const DebugLoc &DL) {
466 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
467 if (MBB->begin() != MBB->end())
468 MBB->insert(MBB->begin(), MI);
475 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
476 MachineBasicBlock::iterator I, int NewOpcode) {
477 MachineInstr *OldMI = &(*I);
478 MachineBasicBlock *MBB = OldMI->getParent();
479 MachineInstr *NewMBB =
480 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
481 MBB->insert(I, NewMBB);
482 //assume the instruction doesn't take any reg operand ...
483 SHOWNEWINSTR(NewMBB);
487 void AMDGPUCFGStructurizer::insertCondBranchBefore(
488 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
489 MachineInstr *OldMI = &(*I);
490 MachineBasicBlock *MBB = OldMI->getParent();
491 MachineFunction *MF = MBB->getParent();
492 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
493 MBB->insert(I, NewMI);
494 MachineInstrBuilder MIB(*MF, NewMI);
495 MIB.addReg(OldMI->getOperand(1).getReg(), false);
497 //erase later oldInstr->eraseFromParent();
500 void AMDGPUCFGStructurizer::insertCondBranchBefore(
501 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
502 int RegNum, const DebugLoc &DL) {
503 MachineFunction *MF = blk->getParent();
504 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
506 blk->insert(I, NewInstr);
507 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
508 SHOWNEWINSTR(NewInstr);
511 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
513 case AMDGPU::JUMP_COND:
514 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
515 case AMDGPU::BRANCH_COND_i32:
516 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
517 default: llvm_unreachable("internal error");
522 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
524 case AMDGPU::JUMP_COND:
525 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
526 case AMDGPU::BRANCH_COND_i32:
527 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
528 default: llvm_unreachable("internal error");
533 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
535 case AMDGPU::JUMP_COND:
536 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
537 default: llvm_unreachable("internal error");
542 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
544 case AMDGPU::JUMP_COND:
545 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
546 default: llvm_unreachable("internal error");
551 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
552 return MI->getOperand(0).getMBB();
555 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
556 MachineBasicBlock *MBB) {
557 MI->getOperand(0).setMBB(MBB);
561 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
563 assert(MBB->succ_size() == 2);
564 MachineBasicBlock *TrueBranch = getTrueBranch(MI);
565 MachineBasicBlock::succ_iterator It = MBB->succ_begin();
566 MachineBasicBlock::succ_iterator Next = It;
568 return (*It == TrueBranch) ? *Next : *It;
571 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
572 switch (MI->getOpcode()) {
573 case AMDGPU::JUMP_COND:
574 case AMDGPU::BRANCH_COND_i32:
575 case AMDGPU::BRANCH_COND_f32: return true;
582 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
583 switch (MI->getOpcode()) {
593 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
594 //get DebugLoc from the first MachineBasicBlock instruction with debug info
596 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
598 MachineInstr *instr = &(*It);
599 if (instr->getDebugLoc())
600 DL = instr->getDebugLoc();
605 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
606 MachineBasicBlock *MBB) {
607 MachineBasicBlock::reverse_iterator It = MBB->rbegin();
608 MachineInstr *MI = &*It;
609 if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
614 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
615 MachineBasicBlock *MBB) {
616 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
619 MachineInstr *MI = &*It;
621 if (isCondBranch(MI) || isUncondBranch(MI))
623 else if (!TII->isMov(MI->getOpcode()))
630 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
631 MachineBasicBlock::reverse_iterator It = MBB->rbegin();
632 if (It != MBB->rend()) {
633 MachineInstr *instr = &(*It);
634 if (instr->getOpcode() == AMDGPU::RETURN)
640 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
641 MachineInstr *MI = getReturnInstr(MBB);
642 bool IsReturn = (MBB->succ_size() == 0);
647 dbgs() << "BB" << MBB->getNumber()
648 <<" is return block without RETURN instr\n";);
652 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
653 MachineBasicBlock *SrcMBB) {
654 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
655 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
656 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
659 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
660 MachineFunction *Func = MBB->getParent();
661 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
662 Func->push_back(NewMBB); //insert to function
663 for (const MachineInstr &It : *MBB)
664 NewMBB->push_back(Func->CloneMachineInstr(&It));
668 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
669 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
670 MachineBasicBlock *NewBlk) {
671 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
672 if (BranchMI && isCondBranch(BranchMI) &&
673 getTrueBranch(BranchMI) == OldMBB)
674 setTrueBranch(BranchMI, NewBlk);
677 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
678 assert((!MBB->getParent()->getJumpTableInfo()
679 || MBB->getParent()->getJumpTableInfo()->isEmpty())
680 && "found a jump table");
682 //collect continue right before endloop
683 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
684 MachineBasicBlock::iterator Pre = MBB->begin();
685 MachineBasicBlock::iterator E = MBB->end();
686 MachineBasicBlock::iterator It = Pre;
688 if (Pre->getOpcode() == AMDGPU::CONTINUE
689 && It->getOpcode() == AMDGPU::ENDLOOP)
690 ContInstr.push_back(&*Pre);
695 //delete continue right before endloop
696 for (unsigned i = 0; i < ContInstr.size(); ++i)
697 ContInstr[i]->eraseFromParent();
699 // TODO to fix up jump table so later phase won't be confused. if
700 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
701 // there isn't such an interface yet. alternatively, replace all the other
702 // blocks in the jump table with the entryBlk //}
705 bool AMDGPUCFGStructurizer::prepare() {
706 bool Changed = false;
708 //FIXME: if not reducible flow graph, make it so ???
710 DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
712 orderBlocks(FuncRep);
714 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
716 // Add an ExitBlk to loop that don't have one
717 for (MachineLoopInfo::iterator It = MLI->begin(),
718 E = MLI->end(); It != E; ++It) {
719 MachineLoop *LoopRep = (*It);
720 MBBVector ExitingMBBs;
721 LoopRep->getExitingBlocks(ExitingMBBs);
723 if (ExitingMBBs.size() == 0) {
724 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
726 RetBlks.push_back(DummyExitBlk);
730 // Remove unconditional branch instr.
731 // Add dummy exit block iff there are multiple returns.
732 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
733 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
734 MachineBasicBlock *MBB = *It;
735 removeUnconditionalBranch(MBB);
736 removeRedundantConditionalBranch(MBB);
737 if (isReturnBlock(MBB)) {
738 RetBlks.push_back(MBB);
740 assert(MBB->succ_size() <= 2);
743 if (RetBlks.size() >= 2) {
744 addDummyExitBlock(RetBlks);
751 bool AMDGPUCFGStructurizer::run() {
752 //Assume reducible CFG...
753 DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
756 //Use the worse block ordering to test the algorithm.
757 ReverseVector(orderedBlks);
760 DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
763 MachineBasicBlock *MBB;
764 bool MakeProgress = false;
765 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
771 dbgs() << "numIter = " << NumIter
772 << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
775 SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
777 SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
780 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
782 MachineBasicBlock *SccBeginMBB = nullptr;
783 int SccNumBlk = 0; // The number of active blocks, init to a
784 // maximum possible number.
785 int SccNumIter; // Number of iteration in this SCC.
794 SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
796 dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
801 if (!isRetiredBlock(MBB))
806 bool ContNextScc = true;
808 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
809 // Just finish one scc.
811 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
812 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
814 dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
815 << ", sccNumIter = " << SccNumIter;
816 dbgs() << "doesn't make any progress\n";
819 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
820 SccNumBlk = sccRemainedNumBlk;
824 dbgs() << "repeat processing SCC" << getSCCNum(MBB)
825 << "sccNumIter = " << SccNumIter << '\n';
828 // Finish the current scc.
832 // Continue on next component in the current scc.
837 SccBeginMBB = nullptr;
838 } //while, "one iteration" over the function.
840 MachineBasicBlock *EntryMBB =
841 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
842 if (EntryMBB->succ_size() == 0) {
845 dbgs() << "Reduce to one block\n";
848 int NewnumRemainedBlk
849 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
850 // consider cloned blocks ??
851 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
853 NumRemainedBlk = NewnumRemainedBlk;
855 MakeProgress = false;
857 dbgs() << "No progress\n";
861 } while (!Finish && MakeProgress);
863 // Misc wrap up to maintain the consistency of the Function representation.
864 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
866 // Detach retired Block, release memory.
867 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
869 if ((*It).second && (*It).second->IsRetired) {
870 assert(((*It).first)->getNumber() != -1);
872 dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
874 (*It).first->eraseFromParent(); //Remove from the parent Function.
878 BlockInfoMap.clear();
882 DEBUG(FuncRep->viewCFG());
883 report_fatal_error("IRREDUCIBLE_CFG");
889 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
891 MachineBasicBlock *MBB;
892 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
894 const std::vector<MachineBasicBlock *> &SccNext = *It;
895 for (std::vector<MachineBasicBlock *>::const_iterator
896 blockIter = SccNext.begin(), blockEnd = SccNext.end();
897 blockIter != blockEnd; ++blockIter) {
899 OrderedBlks.push_back(MBB);
900 recordSccnum(MBB, SccNum);
904 // walk through all the block in func to check for unreachable
905 for (auto *MBB : nodes(MF)) {
906 SccNum = getSCCNum(MBB);
907 if (SccNum == INVALIDSCCNUM)
908 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
912 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
917 dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
920 while ((CurMatch = patternMatchGroup(MBB)) > 0)
921 NumMatch += CurMatch;
924 dbgs() << "End patternMatch BB" << MBB->getNumber()
925 << ", numMatch = " << NumMatch << "\n";
931 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
933 NumMatch += loopendPatternMatch();
934 NumMatch += serialPatternMatch(MBB);
935 NumMatch += ifPatternMatch(MBB);
939 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
940 if (MBB->succ_size() != 1)
943 MachineBasicBlock *childBlk = *MBB->succ_begin();
944 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
947 mergeSerialBlock(MBB, childBlk);
948 ++numSerialPatternMatch;
952 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
954 if (MBB->succ_size() != 2)
956 if (hasBackEdge(MBB))
958 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
962 assert(isCondBranch(BranchMI));
965 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
966 NumMatch += serialPatternMatch(TrueMBB);
967 NumMatch += ifPatternMatch(TrueMBB);
968 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
969 NumMatch += serialPatternMatch(FalseMBB);
970 NumMatch += ifPatternMatch(FalseMBB);
971 MachineBasicBlock *LandBlk;
974 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
976 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
977 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
979 LandBlk = *TrueMBB->succ_begin();
980 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
981 // Triangle pattern, false is empty
984 } else if (FalseMBB->succ_size() == 1
985 && *FalseMBB->succ_begin() == TrueMBB) {
986 // Triangle pattern, true is empty
987 // We reverse the predicate to make a triangle, empty false pattern;
988 std::swap(TrueMBB, FalseMBB);
989 reversePredicateSetter(MBB->end(), *MBB);
992 } else if (FalseMBB->succ_size() == 1
993 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
994 LandBlk = *FalseMBB->succ_begin();
995 } else if (TrueMBB->succ_size() == 1
996 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
997 LandBlk = *TrueMBB->succ_begin();
999 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1002 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1003 // new BB created for landBlk==NULL may introduce new challenge to the
1004 // reduction process.
1006 ((TrueMBB && TrueMBB->pred_size() > 1)
1007 || (FalseMBB && FalseMBB->pred_size() > 1))) {
1008 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1011 if (TrueMBB && TrueMBB->pred_size() > 1) {
1012 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1016 if (FalseMBB && FalseMBB->pred_size() > 1) {
1017 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1021 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1023 ++numIfPatternMatch;
1025 numClonedBlock += Cloned;
1027 return 1 + Cloned + NumMatch;
1030 int AMDGPUCFGStructurizer::loopendPatternMatch() {
1031 std::deque<MachineLoop *> NestedLoops;
1032 for (auto &It: *MLI)
1033 for (MachineLoop *ML : depth_first(It))
1034 NestedLoops.push_front(ML);
1036 if (NestedLoops.empty())
1039 // Process nested loop outside->inside (we did push_front),
1040 // so "continue" to a outside loop won't be mistaken as "break"
1041 // of the current loop.
1043 for (MachineLoop *ExaminedLoop : NestedLoops) {
1044 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1046 DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1047 int NumBreak = mergeLoop(ExaminedLoop);
1055 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1056 MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1057 MBBVector ExitingMBBs;
1058 LoopRep->getExitingBlocks(ExitingMBBs);
1059 assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1060 DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1061 // We assume a single ExitBlk
1063 LoopRep->getExitBlocks(ExitBlks);
1064 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1065 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1066 ExitBlkSet.insert(ExitBlks[i]);
1067 assert(ExitBlkSet.size() == 1);
1068 MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1069 assert(ExitBlk && "Loop has several exit block");
1070 MBBVector LatchBlks;
1071 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1072 if (LoopRep->contains(LB))
1073 LatchBlks.push_back(LB);
1075 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1076 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1077 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1078 settleLoopcontBlock(LatchBlks[i], LoopHeader);
1082 Match += serialPatternMatch(LoopHeader);
1083 Match += ifPatternMatch(LoopHeader);
1084 } while (Match > 0);
1085 mergeLooplandBlock(LoopHeader, ExitBlk);
1086 MachineLoop *ParentLoop = LoopRep->getParentLoop();
1088 MLI->changeLoopFor(LoopHeader, ParentLoop);
1090 MLI->removeBlock(LoopHeader);
1091 Visited[LoopRep] = true;
1095 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1096 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1097 if (Src1MBB->succ_size() == 0) {
1098 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1099 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1100 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1103 dbgs() << "isLoopContBreakBlock yes src1 = BB"
1104 << Src1MBB->getNumber()
1105 << " src2 = BB" << Src2MBB->getNumber() << "\n";
1114 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1115 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1116 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1119 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1121 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1126 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1127 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1129 MachineBasicBlock *DownBlk;
1131 //trueBlk could be the common post dominator
1135 dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1136 << " true = BB" << TrueMBB->getNumber()
1137 << ", numSucc=" << TrueMBB->succ_size()
1138 << " false = BB" << FalseMBB->getNumber() << "\n";
1143 dbgs() << "check down = BB" << DownBlk->getNumber();
1146 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1148 dbgs() << " working\n";
1151 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1152 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1154 numClonedBlock += Num;
1155 Num += serialPatternMatch(*HeadMBB->succ_begin());
1156 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1157 Num += ifPatternMatch(HeadMBB);
1163 dbgs() << " not working\n";
1165 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1166 } // walk down the postDomTree
1171 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1172 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1173 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1174 dbgs() << "head = BB" << HeadMBB->getNumber()
1175 << " size = " << HeadMBB->size();
1178 HeadMBB->print(dbgs());
1183 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1184 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1187 TrueMBB->print(dbgs());
1192 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1193 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1196 FalseMBB->print(dbgs());
1201 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1202 << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1205 LandMBB->print(dbgs());
1213 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1214 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1215 MachineBasicBlock **LandMBBPtr) {
1216 bool MigrateTrue = false;
1217 bool MigrateFalse = false;
1219 MachineBasicBlock *LandBlk = *LandMBBPtr;
1221 assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1222 && (!FalseMBB || FalseMBB->succ_size() <= 1));
1224 if (TrueMBB == FalseMBB)
1227 MigrateTrue = needMigrateBlock(TrueMBB);
1228 MigrateFalse = needMigrateBlock(FalseMBB);
1230 if (!MigrateTrue && !MigrateFalse)
1233 // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1234 // have more than one predecessors. without doing this, its predecessor
1235 // rather than headBlk will have undefined value in initReg.
1236 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1238 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1239 MigrateFalse = true;
1242 dbgs() << "before improveSimpleJumpintoIf: ";
1243 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1246 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1248 // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1249 // {initReg = 0; org falseBlk branch }
1250 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1252 // if landBlk->pred_size() > 2, put the about if-else inside
1253 // if (initReg !=2) {...}
1255 // add initReg = initVal to headBlk
1257 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1258 if (!MigrateTrue || !MigrateFalse) {
1259 // XXX: We have an opportunity here to optimize the "branch into if" case
1260 // here. Branch into if looks like this:
1263 // diamond_head branch_from
1265 // diamond_false diamond_true
1269 // The diamond_head block begins the "if" and the diamond_true block
1270 // is the block being "branched into".
1272 // If MigrateTrue is true, then TrueBB is the block being "branched into"
1273 // and if MigrateFalse is true, then FalseBB is the block being
1276 // Here is the pseudo code for how I think the optimization should work:
1277 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1278 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1279 // 3. Move the branch instruction from diamond_head into its own basic
1280 // block (new_block).
1281 // 4. Add an unconditional branch from diamond_head to new_block
1282 // 5. Replace the branch instruction in branch_from with an unconditional
1283 // branch to new_block. If branch_from has multiple predecessors, then
1284 // we need to replace the True/False block in the branch
1285 // instruction instead of replacing it.
1286 // 6. Change the condition of the branch instruction in new_block from
1287 // COND to (COND || GPR0)
1289 // In order insert these MOV instruction, we will need to use the
1290 // RegisterScavenger. Usually liveness stops being tracked during
1291 // the late machine optimization passes, however if we implement
1292 // bool TargetRegisterInfo::requiresRegisterScavenging(
1293 // const MachineFunction &MF)
1294 // and have it return true, liveness will be tracked correctly
1295 // by generic optimization passes. We will also need to make sure that
1296 // all of our target-specific passes that run after regalloc and before
1297 // the CFGStructurizer track liveness and we will need to modify this pass
1298 // to correctly track liveness.
1300 // After the above changes, the new CFG should look like this:
1303 // diamond_head branch_from
1307 // diamond_false diamond_true
1311 // Without this optimization, we are forced to duplicate the diamond_true
1312 // block and we will end up with a CFG like this:
1316 // diamond_head branch_from
1318 // diamond_false diamond_true diamond_true (duplicate)
1320 // done --------------------|
1322 // Duplicating diamond_true can be very costly especially if it has a
1323 // lot of instructions.
1329 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1331 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1332 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1334 if (LandBlkHasOtherPred) {
1335 report_fatal_error("Extra register needed to handle CFG");
1336 unsigned CmpResReg =
1337 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1338 report_fatal_error("Extra compare instruction needed to handle CFG");
1339 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1340 CmpResReg, DebugLoc());
1343 // XXX: We are running this after RA, so creating virtual registers will
1344 // cause an assertion failure in the PostRA scheduling pass.
1346 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1347 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1351 migrateInstruction(TrueMBB, LandBlk, I);
1352 // need to uncondionally insert the assignment to ensure a path from its
1353 // predecessor rather than headBlk has valid value in initReg if
1355 report_fatal_error("Extra register needed to handle CFG");
1357 insertInstrBefore(I, AMDGPU::ELSE);
1360 migrateInstruction(FalseMBB, LandBlk, I);
1361 // need to uncondionally insert the assignment to ensure a path from its
1362 // predecessor rather than headBlk has valid value in initReg if
1364 report_fatal_error("Extra register needed to handle CFG");
1367 if (LandBlkHasOtherPred) {
1369 insertInstrBefore(I, AMDGPU::ENDIF);
1371 // put initReg = 2 to other predecessors of landBlk
1372 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1373 PE = LandBlk->pred_end(); PI != PE; ++PI) {
1374 MachineBasicBlock *MBB = *PI;
1375 if (MBB != TrueMBB && MBB != FalseMBB)
1376 report_fatal_error("Extra register needed to handle CFG");
1380 dbgs() << "result from improveSimpleJumpintoIf: ";
1381 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1385 *LandMBBPtr = LandBlk;
1390 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1391 MachineBasicBlock *SrcMBB) {
1393 dbgs() << "serialPattern BB" << DstMBB->getNumber()
1394 << " <= BB" << SrcMBB->getNumber() << "\n";
1396 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1398 DstMBB->removeSuccessor(SrcMBB, true);
1399 cloneSuccessorList(DstMBB, SrcMBB);
1401 removeSuccessor(SrcMBB);
1402 MLI->removeBlock(SrcMBB);
1403 retireBlock(SrcMBB);
1406 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1407 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1408 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1411 dbgs() << "ifPattern BB" << MBB->getNumber();
1414 dbgs() << "BB" << TrueMBB->getNumber();
1416 dbgs() << " } else ";
1419 dbgs() << "BB" << FalseMBB->getNumber();
1422 dbgs() << "landBlock: ";
1426 dbgs() << "BB" << LandMBB->getNumber();
1431 int OldOpcode = BranchMI->getOpcode();
1432 DebugLoc BranchDL = BranchMI->getDebugLoc();
1442 MachineBasicBlock::iterator I = BranchMI;
1443 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1447 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1448 MBB->removeSuccessor(TrueMBB, true);
1449 if (LandMBB && TrueMBB->succ_size()!=0)
1450 TrueMBB->removeSuccessor(LandMBB, true);
1451 retireBlock(TrueMBB);
1452 MLI->removeBlock(TrueMBB);
1456 insertInstrBefore(I, AMDGPU::ELSE);
1457 MBB->splice(I, FalseMBB, FalseMBB->begin(),
1459 MBB->removeSuccessor(FalseMBB, true);
1460 if (LandMBB && FalseMBB->succ_size() != 0)
1461 FalseMBB->removeSuccessor(LandMBB, true);
1462 retireBlock(FalseMBB);
1463 MLI->removeBlock(FalseMBB);
1465 insertInstrBefore(I, AMDGPU::ENDIF);
1467 BranchMI->eraseFromParent();
1469 if (LandMBB && TrueMBB && FalseMBB)
1470 MBB->addSuccessor(LandMBB);
1473 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1474 MachineBasicBlock *LandMBB) {
1475 DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1476 << " land = BB" << LandMBB->getNumber() << "\n";);
1478 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1479 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1480 DstBlk->replaceSuccessor(DstBlk, LandMBB);
1483 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1484 MachineBasicBlock *LandMBB) {
1485 DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1486 << " land = BB" << LandMBB->getNumber() << "\n";);
1487 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1488 assert(BranchMI && isCondBranch(BranchMI));
1489 DebugLoc DL = BranchMI->getDebugLoc();
1490 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1491 MachineBasicBlock::iterator I = BranchMI;
1492 if (TrueBranch != LandMBB)
1493 reversePredicateSetter(I, *I->getParent());
1494 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1495 insertInstrBefore(I, AMDGPU::BREAK);
1496 insertInstrBefore(I, AMDGPU::ENDIF);
1497 //now branchInst can be erase safely
1498 BranchMI->eraseFromParent();
1499 //now take care of successors, retire blocks
1500 ExitingMBB->removeSuccessor(LandMBB, true);
1503 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1504 MachineBasicBlock *ContMBB) {
1505 DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1506 << ContingMBB->getNumber()
1507 << ", cont = BB" << ContMBB->getNumber() << "\n";);
1509 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1511 assert(isCondBranch(MI));
1512 MachineBasicBlock::iterator I = MI;
1513 MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1514 int OldOpcode = MI->getOpcode();
1515 DebugLoc DL = MI->getDebugLoc();
1517 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1519 if (!UseContinueLogical) {
1521 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1522 getBranchZeroOpcode(OldOpcode);
1523 insertCondBranchBefore(I, BranchOpcode, DL);
1524 // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1525 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1526 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1529 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1530 getContinueZeroOpcode(OldOpcode);
1531 insertCondBranchBefore(I, BranchOpcode, DL);
1534 MI->eraseFromParent();
1536 // if we've arrived here then we've already erased the branch instruction
1537 // travel back up the basic block to see the last reference of our debug
1538 // location we've just inserted that reference here so it should be
1539 // representative insertEnd to ensure phi-moves, if exist, go before the
1541 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1542 getLastDebugLocInBB(ContingMBB));
1546 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1547 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1549 assert(PreMBB->isSuccessor(SrcMBB));
1550 while (SrcMBB && SrcMBB != DstMBB) {
1551 assert(SrcMBB->succ_size() == 1);
1552 if (SrcMBB->pred_size() > 1) {
1553 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1558 SrcMBB = *SrcMBB->succ_begin();
1565 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1566 MachineBasicBlock *PredMBB) {
1567 assert(PredMBB->isSuccessor(MBB) &&
1568 "succBlk is not a prececessor of curBlk");
1570 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1571 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1572 //srcBlk, oldBlk, newBlk
1574 PredMBB->replaceSuccessor(MBB, CloneMBB);
1576 // add all successor to cloneBlk
1577 cloneSuccessorList(CloneMBB, MBB);
1579 numClonedInstr += MBB->size();
1582 dbgs() << "Cloned block: " << "BB"
1583 << MBB->getNumber() << "size " << MBB->size() << "\n";
1586 SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1591 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1592 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1593 MachineBasicBlock::iterator SpliceEnd;
1594 //look for the input branchinstr, not the AMDGPU branchinstr
1595 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1598 dbgs() << "migrateInstruction don't see branch instr\n" ;
1600 SpliceEnd = SrcMBB->end();
1602 DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1603 SpliceEnd = BranchMI;
1606 dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1607 << "srcSize = " << SrcMBB->size() << "\n";
1610 //splice insert before insertPos
1611 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1614 dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1615 << "srcSize = " << SrcMBB->size() << '\n';
1620 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1621 MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1622 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1624 if (!LoopHeader || !LoopLatch)
1626 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1627 // Is LoopRep an infinite loop ?
1628 if (!BranchMI || !isUncondBranch(BranchMI))
1631 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1632 FuncRep->push_back(DummyExitBlk); //insert to function
1633 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1634 DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1635 LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
1636 Ctx.emitError("Extra register needed to handle CFG");
1640 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1641 MachineInstr *BranchMI;
1643 // I saw two unconditional branch in one basic block in example
1644 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1645 while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1646 && isUncondBranch(BranchMI)) {
1647 DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1648 BranchMI->eraseFromParent();
1652 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1653 MachineBasicBlock *MBB) {
1654 if (MBB->succ_size() != 2)
1656 MachineBasicBlock *MBB1 = *MBB->succ_begin();
1657 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1661 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1662 assert(BranchMI && isCondBranch(BranchMI));
1663 DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1664 BranchMI->eraseFromParent();
1665 SHOWNEWBLK(MBB1, "Removing redundant successor");
1666 MBB->removeSuccessor(MBB1, true);
1669 void AMDGPUCFGStructurizer::addDummyExitBlock(
1670 SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1671 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1672 FuncRep->push_back(DummyExitBlk); //insert to function
1673 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1675 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1676 E = RetMBB.end(); It != E; ++It) {
1677 MachineBasicBlock *MBB = *It;
1678 MachineInstr *MI = getReturnInstr(MBB);
1680 MI->eraseFromParent();
1681 MBB->addSuccessor(DummyExitBlk);
1683 dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1687 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1690 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1691 while (MBB->succ_size())
1692 MBB->removeSuccessor(*MBB->succ_begin());
1695 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1697 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1699 srcBlkInfo = new BlockInformation();
1700 srcBlkInfo->SccNum = SccNum;
1703 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1705 dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1708 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1711 SrcBlkInfo = new BlockInformation();
1713 SrcBlkInfo->IsRetired = true;
1714 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1715 && "can't retire block yet");
1718 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1719 "AMDGPU CFG Structurizer", false, false)
1720 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1721 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1722 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1723 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1724 "AMDGPU CFG Structurizer", false, false)
1726 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1727 return new AMDGPUCFGStructurizer();