1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This is the LLVM vectorization plan. It represents a candidate for
12 /// vectorization, allowing to plan and optimize how to vectorize a given loop
13 /// before generating LLVM-IR.
14 /// The vectorizer uses vectorization plans to estimate the costs of potential
15 /// candidates and if profitable to execute the desired plan, generating vector
18 //===----------------------------------------------------------------------===//
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/InstrTypes.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/GraphWriter.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #define DEBUG_TYPE "vplan"
49 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
50 if (const VPInstruction *Instr = dyn_cast<VPInstruction>(&V))
57 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
58 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
59 const VPBlockBase *Block = this;
60 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
61 Block = Region->getEntry();
62 return cast<VPBasicBlock>(Block);
65 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
66 VPBlockBase *Block = this;
67 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
68 Block = Region->getEntry();
69 return cast<VPBasicBlock>(Block);
72 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
73 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
74 const VPBlockBase *Block = this;
75 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
76 Block = Region->getExit();
77 return cast<VPBasicBlock>(Block);
80 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
81 VPBlockBase *Block = this;
82 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
83 Block = Region->getExit();
84 return cast<VPBasicBlock>(Block);
87 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
88 if (!Successors.empty() || !Parent)
90 assert(Parent->getExit() == this &&
91 "Block w/o successors not the exit of its parent.");
92 return Parent->getEnclosingBlockWithSuccessors();
95 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
96 if (!Predecessors.empty() || !Parent)
98 assert(Parent->getEntry() == this &&
99 "Block w/o predecessors not the entry of its parent.");
100 return Parent->getEnclosingBlockWithPredecessors();
103 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
104 SmallVector<VPBlockBase *, 8> Blocks;
105 for (VPBlockBase *Block : depth_first(Entry))
106 Blocks.push_back(Block);
108 for (VPBlockBase *Block : Blocks)
113 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
114 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
115 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
116 BasicBlock *PrevBB = CFG.PrevBB;
117 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
118 PrevBB->getParent(), CFG.LastBB);
119 DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
121 // Hook up the new basic block to its predecessors.
122 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
123 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
124 auto &PredVPSuccessors = PredVPBB->getSuccessors();
125 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
126 assert(PredBB && "Predecessor basic-block not found building successor.");
127 auto *PredBBTerminator = PredBB->getTerminator();
128 DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
129 if (isa<UnreachableInst>(PredBBTerminator)) {
130 assert(PredVPSuccessors.size() == 1 &&
131 "Predecessor ending w/o branch must have single successor.");
132 PredBBTerminator->eraseFromParent();
133 BranchInst::Create(NewBB, PredBB);
135 assert(PredVPSuccessors.size() == 2 &&
136 "Predecessor ending with branch must have two successors.");
137 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
138 assert(!PredBBTerminator->getSuccessor(idx) &&
139 "Trying to reset an existing successor block.");
140 PredBBTerminator->setSuccessor(idx, NewBB);
146 void VPBasicBlock::execute(VPTransformState *State) {
147 bool Replica = State->Instance &&
148 !(State->Instance->Part == 0 && State->Instance->Lane == 0);
149 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
150 VPBlockBase *SingleHPred = nullptr;
151 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
153 // 1. Create an IR basic block, or reuse the last one if possible.
154 // The last IR basic block is reused, as an optimization, in three cases:
155 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
156 // B. when the current VPBB has a single (hierarchical) predecessor which
157 // is PrevVPBB and the latter has a single (hierarchical) successor; and
158 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
159 // is the exit of this region from a previous instance, or the predecessor
161 if (PrevVPBB && /* A */
162 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
163 SingleHPred->getExitBasicBlock() == PrevVPBB &&
164 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
165 !(Replica && getPredecessors().empty())) { /* C */
166 NewBB = createEmptyBasicBlock(State->CFG);
167 State->Builder.SetInsertPoint(NewBB);
168 // Temporarily terminate with unreachable until CFG is rewired.
169 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
170 State->Builder.SetInsertPoint(Terminator);
171 // Register NewBB in its loop. In innermost loops its the same for all BB's.
172 Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
173 L->addBasicBlockToLoop(NewBB, *State->LI);
174 State->CFG.PrevBB = NewBB;
177 // 2. Fill the IR basic block with IR instructions.
178 DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
179 << " in BB:" << NewBB->getName() << '\n');
181 State->CFG.VPBB2IRBB[this] = NewBB;
182 State->CFG.PrevVPBB = this;
184 for (VPRecipeBase &Recipe : Recipes)
185 Recipe.execute(*State);
187 DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
190 void VPRegionBlock::execute(VPTransformState *State) {
191 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
193 if (!isReplicator()) {
194 // Visit the VPBlocks connected to "this", starting from it.
195 for (VPBlockBase *Block : RPOT) {
196 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
197 Block->execute(State);
202 assert(!State->Instance && "Replicating a Region with non-null instance.");
204 // Enter replicating mode.
205 State->Instance = {0, 0};
207 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
208 State->Instance->Part = Part;
209 for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
210 State->Instance->Lane = Lane;
211 // Visit the VPBlocks connected to \p this, starting from it.
212 for (VPBlockBase *Block : RPOT) {
213 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
214 Block->execute(State);
219 // Exit replicating mode.
220 State->Instance.reset();
223 void VPInstruction::generateInstruction(VPTransformState &State,
225 IRBuilder<> &Builder = State.Builder;
227 if (Instruction::isBinaryOp(getOpcode())) {
228 Value *A = State.get(getOperand(0), Part);
229 Value *B = State.get(getOperand(1), Part);
230 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
231 State.set(this, V, Part);
235 switch (getOpcode()) {
236 case VPInstruction::Not: {
237 Value *A = State.get(getOperand(0), Part);
238 Value *V = Builder.CreateNot(A);
239 State.set(this, V, Part);
243 llvm_unreachable("Unsupported opcode for instruction");
247 void VPInstruction::execute(VPTransformState &State) {
248 assert(!State.Instance && "VPInstruction executing an Instance");
249 for (unsigned Part = 0; Part < State.UF; ++Part)
250 generateInstruction(State, Part);
253 void VPInstruction::print(raw_ostream &O, const Twine &Indent) const {
254 O << " +\n" << Indent << "\"EMIT ";
259 void VPInstruction::print(raw_ostream &O) const {
263 switch (getOpcode()) {
264 case VPInstruction::Not:
268 O << Instruction::getOpcodeName(getOpcode());
271 for (const VPValue *Operand : operands()) {
273 Operand->printAsOperand(O);
277 /// Generate the code inside the body of the vectorized loop. Assumes a single
278 /// LoopVectorBody basic-block was created for this. Introduce additional
279 /// basic-blocks as needed, and fill them all.
280 void VPlan::execute(VPTransformState *State) {
281 // 0. Set the reverse mapping from VPValues to Values for code generation.
282 for (auto &Entry : Value2VPValue)
283 State->VPValue2Value[Entry.second] = Entry.first;
285 BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
286 BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
287 assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
288 BasicBlock *VectorLatchBB = VectorHeaderBB;
290 // 1. Make room to generate basic-blocks inside loop body if needed.
291 VectorLatchBB = VectorHeaderBB->splitBasicBlock(
292 VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
293 Loop *L = State->LI->getLoopFor(VectorHeaderBB);
294 L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
295 // Remove the edge between Header and Latch to allow other connections.
296 // Temporarily terminate with unreachable until CFG is rewired.
297 // Note: this asserts the generated code's assumption that
298 // getFirstInsertionPt() can be dereferenced into an Instruction.
299 VectorHeaderBB->getTerminator()->eraseFromParent();
300 State->Builder.SetInsertPoint(VectorHeaderBB);
301 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
302 State->Builder.SetInsertPoint(Terminator);
304 // 2. Generate code in loop body.
305 State->CFG.PrevVPBB = nullptr;
306 State->CFG.PrevBB = VectorHeaderBB;
307 State->CFG.LastBB = VectorLatchBB;
309 for (VPBlockBase *Block : depth_first(Entry))
310 Block->execute(State);
312 // 3. Merge the temporary latch created with the last basic-block filled.
313 BasicBlock *LastBB = State->CFG.PrevBB;
314 // Connect LastBB to VectorLatchBB to facilitate their merge.
315 assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
316 "Expected VPlan CFG to terminate with unreachable");
317 LastBB->getTerminator()->eraseFromParent();
318 BranchInst::Create(VectorLatchBB, LastBB);
320 // Merge LastBB with Latch.
321 bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
323 assert(Merged && "Could not merge last basic block with latch.");
324 VectorLatchBB = LastBB;
326 updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
329 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
330 BasicBlock *LoopLatchBB) {
331 BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
332 assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
333 DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
334 // The vector body may be more than a single basic-block by this point.
335 // Update the dominator tree information inside the vector body by propagating
336 // it from header to latch, expecting only triangular control-flow, if any.
337 BasicBlock *PostDomSucc = nullptr;
338 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
339 // Get the list of successors of this block.
340 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
341 assert(Succs.size() <= 2 &&
342 "Basic block in vector loop has more than 2 successors.");
343 PostDomSucc = Succs[0];
344 if (Succs.size() == 1) {
345 assert(PostDomSucc->getSinglePredecessor() &&
346 "PostDom successor has more than one predecessor.");
347 DT->addNewBlock(PostDomSucc, BB);
350 BasicBlock *InterimSucc = Succs[1];
351 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
352 PostDomSucc = Succs[1];
353 InterimSucc = Succs[0];
355 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
356 "One successor of a basic block does not lead to the other.");
357 assert(InterimSucc->getSinglePredecessor() &&
358 "Interim successor has more than one predecessor.");
359 assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 &&
360 "PostDom successor has more than two predecessors.");
361 DT->addNewBlock(InterimSucc, BB);
362 DT->addNewBlock(PostDomSucc, BB);
366 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
367 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
368 Twine(getOrCreateBID(Block));
371 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
372 const std::string &Name = Block->getName();
375 return "VPB" + Twine(getOrCreateBID(Block));
378 void VPlanPrinter::dump() {
381 OS << "digraph VPlan {\n";
382 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
383 if (!Plan.getName().empty())
384 OS << "\\n" << DOT::EscapeString(Plan.getName());
385 if (!Plan.Value2VPValue.empty()) {
387 for (auto Entry : Plan.Value2VPValue) {
388 OS << "\\n" << *Entry.second;
389 OS << DOT::EscapeString(" := ");
390 Entry.first->printAsOperand(OS, false);
394 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
395 OS << "edge [fontname=Courier, fontsize=30]\n";
396 OS << "compound=true\n";
398 for (VPBlockBase *Block : depth_first(Plan.getEntry()))
404 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
405 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
406 dumpBasicBlock(BasicBlock);
407 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
410 llvm_unreachable("Unsupported kind of VPBlock.");
413 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
414 bool Hidden, const Twine &Label) {
415 // Due to "dot" we print an edge between two regions as an edge between the
416 // exit basic block and the entry basic of the respective regions.
417 const VPBlockBase *Tail = From->getExitBasicBlock();
418 const VPBlockBase *Head = To->getEntryBasicBlock();
419 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
420 OS << " [ label=\"" << Label << '\"';
422 OS << " ltail=" << getUID(From);
424 OS << " lhead=" << getUID(To);
426 OS << "; splines=none";
430 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
431 auto &Successors = Block->getSuccessors();
432 if (Successors.size() == 1)
433 drawEdge(Block, Successors.front(), false, "");
434 else if (Successors.size() == 2) {
435 drawEdge(Block, Successors.front(), false, "T");
436 drawEdge(Block, Successors.back(), false, "F");
438 unsigned SuccessorNumber = 0;
439 for (auto *Successor : Successors)
440 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
444 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
445 OS << Indent << getUID(BasicBlock) << " [label =\n";
447 OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
449 for (const VPRecipeBase &Recipe : *BasicBlock)
450 Recipe.print(OS, Indent);
452 OS << "\n" << Indent << "]\n";
453 dumpEdges(BasicBlock);
456 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
457 OS << Indent << "subgraph " << getUID(Region) << " {\n";
459 OS << Indent << "fontname=Courier\n"
460 << Indent << "label=\""
461 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
462 << DOT::EscapeString(Region->getName()) << "\"\n";
463 // Dump the blocks of the region.
464 assert(Region->getEntry() && "Region contains no inner blocks.");
465 for (const VPBlockBase *Block : depth_first(Region->getEntry()))
468 OS << Indent << "}\n";
472 void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
473 std::string IngredientString;
474 raw_string_ostream RSO(IngredientString);
475 if (auto *Inst = dyn_cast<Instruction>(V)) {
476 if (!Inst->getType()->isVoidTy()) {
477 Inst->printAsOperand(RSO, false);
480 RSO << Inst->getOpcodeName() << " ";
481 unsigned E = Inst->getNumOperands();
483 Inst->getOperand(0)->printAsOperand(RSO, false);
484 for (unsigned I = 1; I < E; ++I)
485 Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
488 V->printAsOperand(RSO, false);
490 O << DOT::EscapeString(IngredientString);
493 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const {
494 O << " +\n" << Indent << "\"WIDEN\\l\"";
495 for (auto &Instr : make_range(Begin, End))
496 O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\"";
499 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O,
500 const Twine &Indent) const {
501 O << " +\n" << Indent << "\"WIDEN-INDUCTION";
504 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
505 O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc) << "\\l\"";
507 O << " " << VPlanIngredient(IV) << "\\l\"";
510 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
511 O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\"";
514 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const {
515 O << " +\n" << Indent << "\"BLEND ";
516 Phi->printAsOperand(O, false);
519 // Not a User of any mask: not really blending, this is a
520 // single-predecessor phi.
522 Phi->getIncomingValue(0)->printAsOperand(O, false);
524 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
526 Phi->getIncomingValue(I)->printAsOperand(O, false);
528 User->getOperand(I)->printAsOperand(O);
534 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const {
536 << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ")
537 << VPlanIngredient(Ingredient);
543 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
545 << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst)
549 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
550 const Twine &Indent) const {
551 O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
554 User->getOperand(0)->printAsOperand(O);