1 //===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 #define DEBUG_TYPE "SLP"
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Analysis/AliasAnalysis.h"
17 #include "llvm/Analysis/ScalarEvolution.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/Verifier.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Type.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetLibraryInfo.h"
34 #include "llvm/Transforms/Scalar.h"
35 #include "llvm/Transforms/Utils/Local.h"
41 static const unsigned MinVecRegSize = 128;
43 static const unsigned RecursionMaxDepth = 6;
47 BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
48 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
49 BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
53 void BoUpSLP::numberInstructions() {
57 // Number the instructions in the block.
58 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
60 InstrVec.push_back(it);
61 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
65 Value *BoUpSLP::getPointerOperand(Value *I) {
66 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
67 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
71 unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
72 if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
73 if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
77 bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
78 Value *PtrA = getPointerOperand(A);
79 Value *PtrB = getPointerOperand(B);
80 unsigned ASA = getAddressSpaceOperand(A);
81 unsigned ASB = getAddressSpaceOperand(B);
83 // Check that the address spaces match and that the pointers are valid.
84 if (!PtrA || !PtrB || (ASA != ASB)) return false;
86 // Check that A and B are of the same type.
87 if (PtrA->getType() != PtrB->getType()) return false;
89 // Calculate the distance.
90 const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
91 const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
92 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
93 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
95 // Non constant distance.
96 if (!ConstOffSCEV) return false;
98 int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
99 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
100 // The Instructions are connsecutive if the size of the first load/store is
101 // the same as the offset.
102 int64_t Sz = DL->getTypeStoreSize(Ty);
103 return ((-Offset) == Sz);
106 bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
107 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
108 unsigned Sz = DL->getTypeSizeInBits(StoreTy);
109 unsigned VF = MinVecRegSize / Sz;
111 if (!isPowerOf2_32(Sz) || VF < 2) return false;
113 bool Changed = false;
114 // Look for profitable vectorizable trees at all offsets, starting at zero.
115 for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
116 if (i + VF > e) return Changed;
117 DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
118 ArrayRef<Value *> Operands = Chain.slice(i, VF);
120 int Cost = getTreeCost(Operands);
121 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
122 if (Cost < CostThreshold) {
123 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
124 vectorizeTree(Operands, VF);
133 bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
134 ValueSet Heads, Tails;
135 SmallDenseMap<Value*, Value*> ConsecutiveChain;
137 // We may run into multiple chains that merge into a single chain. We mark the
138 // stores that we vectorized so that we don't visit the same store twice.
139 ValueSet VectorizedStores;
140 bool Changed = false;
142 // Do a quadratic search on all of the given stores and find
143 // all of the pairs of loads that follow each other.
144 for (unsigned i = 0, e = Stores.size(); i < e; ++i)
145 for (unsigned j = 0; j < e; ++j) {
146 if (i == j) continue;
147 if (isConsecutiveAccess(Stores[i], Stores[j])) {
148 Tails.insert(Stores[j]);
149 Heads.insert(Stores[i]);
150 ConsecutiveChain[Stores[i]] = Stores[j];
154 // For stores that start but don't end a link in the chain:
155 for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
156 if (Tails.count(*it)) continue;
158 // We found a store instr that starts a chain. Now follow the chain and try
162 // Collect the chain into a list.
163 while (Tails.count(I) || Heads.count(I)) {
164 if (VectorizedStores.count(I)) break;
165 Operands.push_back(I);
166 // Move to the next value in the chain.
167 I = ConsecutiveChain[I];
170 bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
172 // Mark the vectorized stores so that we don't vectorize them again.
174 VectorizedStores.insert(Operands.begin(), Operands.end());
175 Changed |= Vectorized;
181 int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
182 // Find the type of the operands in VL.
183 Type *ScalarTy = VL[0]->getType();
184 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
185 ScalarTy = SI->getValueOperand()->getType();
186 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
187 // Find the cost of inserting/extracting values from the vector.
188 return getScalarizationCost(VecTy);
191 int BoUpSLP::getScalarizationCost(Type *Ty) {
193 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
194 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
198 AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
199 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
200 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
201 return AliasAnalysis::Location();
204 Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
205 assert(Src->getParent() == Dst->getParent() && "Not the same BB");
206 BasicBlock::iterator I = Src, E = Dst;
207 /// Scan all of the instruction from SRC to DST and check if
208 /// the source may alias.
209 for (++I; I != E; ++I) {
210 // Ignore store instructions that are marked as 'ignore'.
211 if (MemBarrierIgnoreList.count(I)) continue;
212 if (Src->mayWriteToMemory()) /* Write */ {
213 if (!I->mayReadOrWriteMemory()) continue;
215 if (!I->mayWriteToMemory()) continue;
217 AliasAnalysis::Location A = getLocation(&*I);
218 AliasAnalysis::Location B = getLocation(Src);
220 if (!A.Ptr || !B.Ptr || AA->alias(A, B))
226 void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
227 Value *Vec = vectorizeTree(Operands, Operands.size());
228 BasicBlock::iterator Loc = cast<Instruction>(Vec);
229 IRBuilder<> Builder(++Loc);
230 // After vectorizing the operands we need to generate extractelement
231 // instructions and replace all of the uses of the scalar values with
232 // the values that we extracted from the vectorized tree.
233 for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
234 Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
235 Operands[i]->replaceAllUsesWith(S);
239 int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
240 // Get rid of the list of stores that were removed, and from the
241 // lists of instructions with multiple users.
242 MemBarrierIgnoreList.clear();
244 MultiUserVals.clear();
245 MustScalarize.clear();
247 // Scan the tree and find which value is used by which lane, and which values
248 // must be scalarized.
249 getTreeUses_rec(VL, 0);
251 // Check that instructions with multiple users can be vectorized. Mark unsafe
253 for (ValueSet::iterator it = MultiUserVals.begin(),
254 e = MultiUserVals.end(); it != e; ++it) {
255 // Check that all of the users of this instr are within the tree
256 // and that they are all from the same lane.
258 for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
260 if (LaneMap.find(*I) == LaneMap.end()) {
261 MustScalarize.insert(*it);
262 DEBUG(dbgs()<<"SLP: Adding " << **it <<
263 " to MustScalarize because of an out of tree usage.\n");
266 if (Lane == -1) Lane = LaneMap[*I];
267 if (Lane != LaneMap[*I]) {
268 MustScalarize.insert(*it);
269 DEBUG(dbgs()<<"Adding " << **it <<
270 " to MustScalarize because multiple lane use it: "
271 << Lane << " and " << LaneMap[*I] << ".\n");
277 // Now calculate the cost of vectorizing the tree.
278 return getTreeCost_rec(VL, 0);
281 void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
282 if (Depth == RecursionMaxDepth) return;
284 // Don't handle vectors.
285 if (VL[0]->getType()->isVectorTy()) return;
286 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
287 if (SI->getValueOperand()->getType()->isVectorTy()) return;
289 // Check if all of the operands are constants.
290 bool AllConst = true;
291 bool AllSameScalar = true;
292 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
293 AllConst &= isa<Constant>(VL[i]);
294 AllSameScalar &= (VL[0] == VL[i]);
295 Instruction *I = dyn_cast<Instruction>(VL[i]);
296 // If one of the instructions is out of this BB, we need to scalarize all.
297 if (I && I->getParent() != BB) return;
300 // If all of the operands are identical or constant we have a simple solution.
301 if (AllConst || AllSameScalar) return;
303 // Scalarize unknown structures.
304 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
307 unsigned Opcode = VL0->getOpcode();
308 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
309 Instruction *I = dyn_cast<Instruction>(VL[i]);
310 // If not all of the instructions are identical then we have to scalarize.
311 if (!I || Opcode != I->getOpcode()) return;
314 // Mark instructions with multiple users.
315 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
316 Instruction *I = dyn_cast<Instruction>(VL[i]);
317 // Remember to check if all of the users of this instr are vectorized
319 if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
322 for (int i = 0, e = VL.size(); i < e; ++i) {
323 // Check that the instruction is only used within
325 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
326 // Make this instruction as 'seen' and remember the lane.
331 case Instruction::ZExt:
332 case Instruction::SExt:
333 case Instruction::FPToUI:
334 case Instruction::FPToSI:
335 case Instruction::FPExt:
336 case Instruction::PtrToInt:
337 case Instruction::IntToPtr:
338 case Instruction::SIToFP:
339 case Instruction::UIToFP:
340 case Instruction::Trunc:
341 case Instruction::FPTrunc:
342 case Instruction::BitCast:
343 case Instruction::Add:
344 case Instruction::FAdd:
345 case Instruction::Sub:
346 case Instruction::FSub:
347 case Instruction::Mul:
348 case Instruction::FMul:
349 case Instruction::UDiv:
350 case Instruction::SDiv:
351 case Instruction::FDiv:
352 case Instruction::URem:
353 case Instruction::SRem:
354 case Instruction::FRem:
355 case Instruction::Shl:
356 case Instruction::LShr:
357 case Instruction::AShr:
358 case Instruction::And:
359 case Instruction::Or:
360 case Instruction::Xor: {
361 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
363 // Prepare the operand vector.
364 for (unsigned j = 0; j < VL.size(); ++j)
365 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
367 getTreeUses_rec(Operands, Depth+1);
371 case Instruction::Store: {
373 for (unsigned j = 0; j < VL.size(); ++j)
374 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
375 getTreeUses_rec(Operands, Depth+1);
383 int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
384 Type *ScalarTy = VL[0]->getType();
386 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
387 ScalarTy = SI->getValueOperand()->getType();
389 /// Don't mess with vectors.
390 if (ScalarTy->isVectorTy()) return max_cost;
391 VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
393 if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
395 // Check if all of the operands are constants.
396 bool AllConst = true;
397 bool AllSameScalar = true;
398 bool MustScalarizeFlag = false;
399 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
400 AllConst &= isa<Constant>(VL[i]);
401 AllSameScalar &= (VL[0] == VL[i]);
402 // Must have a single use.
403 Instruction *I = dyn_cast<Instruction>(VL[i]);
404 MustScalarizeFlag |= MustScalarize.count(VL[i]);
405 // This instruction is outside the basic block.
406 if (I && I->getParent() != BB)
407 return getScalarizationCost(VecTy);
410 // Is this a simple vector constant.
411 if (AllConst) return 0;
413 // If all of the operands are identical we can broadcast them.
414 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
416 // If we are in a loop, and this is not an instruction (e.g. constant or
417 // argument) or the instruction is defined outside the loop then assume
418 // that the cost is zero.
419 if (L && (!VL0 || !L->contains(VL0)))
422 // We need to broadcast the scalar.
423 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
426 // If this is not a constant, or a scalar from outside the loop then we
427 // need to scalarize it.
428 if (MustScalarizeFlag)
429 return getScalarizationCost(VecTy);
431 if (!VL0) return getScalarizationCost(VecTy);
432 assert(VL0->getParent() == BB && "Wrong BB");
434 unsigned Opcode = VL0->getOpcode();
435 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
436 Instruction *I = dyn_cast<Instruction>(VL[i]);
437 // If not all of the instructions are identical then we have to scalarize.
438 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
441 // Check if it is safe to sink the loads or the stores.
442 if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
443 int MaxIdx = InstrIdx[VL0];
444 for (unsigned i = 1, e = VL.size(); i < e; ++i )
445 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
447 Instruction *Last = InstrVec[MaxIdx];
448 for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
449 if (VL[i] == Last) continue;
450 Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
452 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
453 *Last << "\n because of " << *Barrier << "\n");
460 case Instruction::ZExt:
461 case Instruction::SExt:
462 case Instruction::FPToUI:
463 case Instruction::FPToSI:
464 case Instruction::FPExt:
465 case Instruction::PtrToInt:
466 case Instruction::IntToPtr:
467 case Instruction::SIToFP:
468 case Instruction::UIToFP:
469 case Instruction::Trunc:
470 case Instruction::FPTrunc:
471 case Instruction::BitCast: {
474 Type *SrcTy = VL0->getOperand(0)->getType();
475 // Prepare the operand vector.
476 for (unsigned j = 0; j < VL.size(); ++j) {
477 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
478 // Check that the casted type is the same for all users.
479 if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
480 return getScalarizationCost(VecTy);
483 Cost += getTreeCost_rec(Operands, Depth+1);
484 if (Cost >= max_cost) return max_cost;
486 // Calculate the cost of this instruction.
487 int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
488 VL0->getType(), SrcTy);
490 VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
491 int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
492 Cost += (VecCost - ScalarCost);
495 case Instruction::Add:
496 case Instruction::FAdd:
497 case Instruction::Sub:
498 case Instruction::FSub:
499 case Instruction::Mul:
500 case Instruction::FMul:
501 case Instruction::UDiv:
502 case Instruction::SDiv:
503 case Instruction::FDiv:
504 case Instruction::URem:
505 case Instruction::SRem:
506 case Instruction::FRem:
507 case Instruction::Shl:
508 case Instruction::LShr:
509 case Instruction::AShr:
510 case Instruction::And:
511 case Instruction::Or:
512 case Instruction::Xor: {
514 // Calculate the cost of all of the operands.
515 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
517 // Prepare the operand vector.
518 for (unsigned j = 0; j < VL.size(); ++j)
519 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
521 Cost += getTreeCost_rec(Operands, Depth+1);
522 if (Cost >= max_cost) return max_cost;
525 // Calculate the cost of this instruction.
526 int ScalarCost = VecTy->getNumElements() *
527 TTI->getArithmeticInstrCost(Opcode, ScalarTy);
529 int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
530 Cost += (VecCost - ScalarCost);
533 case Instruction::Load: {
534 // If we are scalarize the loads, add the cost of forming the vector.
535 for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
536 if (!isConsecutiveAccess(VL[i], VL[i+1]))
537 return getScalarizationCost(VecTy);
539 // Cost of wide load - cost of scalar loads.
540 int ScalarLdCost = VecTy->getNumElements() *
541 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
542 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
543 return VecLdCost - ScalarLdCost;
545 case Instruction::Store: {
546 // We know that we can merge the stores. Calculate the cost.
547 int ScalarStCost = VecTy->getNumElements() *
548 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
549 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
550 int StoreCost = VecStCost - ScalarStCost;
553 for (unsigned j = 0; j < VL.size(); ++j) {
554 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
555 MemBarrierIgnoreList.insert(VL[j]);
558 int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
562 // Unable to vectorize unknown instructions.
563 return getScalarizationCost(VecTy);
567 Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
568 int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
569 for (unsigned i = 0; i < VF; ++i )
570 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
571 return InstrVec[MaxIdx + 1];
574 Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
575 IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
576 Value *Vec = UndefValue::get(Ty);
577 for (unsigned i=0; i < Ty->getNumElements(); ++i) {
578 // Generate the 'InsertElement' instruction.
579 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
580 // Remember that this instruction is used as part of a 'gather' sequence.
581 // The caller of the bottom-up slp vectorizer can try to hoist the sequence
582 // if the users are outside of the basic block.
583 GatherInstructions.push_back(Vec);
589 Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
590 Value *V = vectorizeTree_rec(VL, VF);
591 // We moved some instructions around. We have to number them again
592 // before we can do any analysis.
593 numberInstructions();
594 MustScalarize.clear();
598 Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
599 Type *ScalarTy = VL[0]->getType();
600 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
601 ScalarTy = SI->getValueOperand()->getType();
602 VectorType *VecTy = VectorType::get(ScalarTy, VF);
604 // Check if all of the operands are constants or identical.
605 bool AllConst = true;
606 bool AllSameScalar = true;
607 for (unsigned i = 0, e = VF; i < e; ++i) {
608 AllConst &= isa<Constant>(VL[i]);
609 AllSameScalar &= (VL[0] == VL[i]);
610 // The instruction must be in the same BB, and it must be vectorizable.
611 Instruction *I = dyn_cast<Instruction>(VL[i]);
612 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
613 return Scalarize(VL, VecTy);
616 // Check that this is a simple vector constant.
617 if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
619 // Scalarize unknown structures.
620 Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
621 if (!VL0) return Scalarize(VL, VecTy);
623 if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
625 unsigned Opcode = VL0->getOpcode();
626 for (unsigned i = 0, e = VF; i < e; ++i) {
627 Instruction *I = dyn_cast<Instruction>(VL[i]);
628 // If not all of the instructions are identical then we have to scalarize.
629 if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
633 case Instruction::ZExt:
634 case Instruction::SExt:
635 case Instruction::FPToUI:
636 case Instruction::FPToSI:
637 case Instruction::FPExt:
638 case Instruction::PtrToInt:
639 case Instruction::IntToPtr:
640 case Instruction::SIToFP:
641 case Instruction::UIToFP:
642 case Instruction::Trunc:
643 case Instruction::FPTrunc:
644 case Instruction::BitCast: {
646 for (int i = 0; i < VF; ++i)
647 INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
648 Value *InVec = vectorizeTree_rec(INVL, VF);
649 IRBuilder<> Builder(GetLastInstr(VL, VF));
650 CastInst *CI = dyn_cast<CastInst>(VL0);
651 Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
652 VectorizedValues[VL0] = V;
655 case Instruction::Add:
656 case Instruction::FAdd:
657 case Instruction::Sub:
658 case Instruction::FSub:
659 case Instruction::Mul:
660 case Instruction::FMul:
661 case Instruction::UDiv:
662 case Instruction::SDiv:
663 case Instruction::FDiv:
664 case Instruction::URem:
665 case Instruction::SRem:
666 case Instruction::FRem:
667 case Instruction::Shl:
668 case Instruction::LShr:
669 case Instruction::AShr:
670 case Instruction::And:
671 case Instruction::Or:
672 case Instruction::Xor: {
673 ValueList LHSVL, RHSVL;
674 for (int i = 0; i < VF; ++i) {
675 RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
676 LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
679 Value *RHS = vectorizeTree_rec(RHSVL, VF);
680 Value *LHS = vectorizeTree_rec(LHSVL, VF);
681 IRBuilder<> Builder(GetLastInstr(VL, VF));
682 BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
683 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
684 VectorizedValues[VL0] = V;
687 case Instruction::Load: {
688 LoadInst *LI = cast<LoadInst>(VL0);
689 unsigned Alignment = LI->getAlignment();
691 // Check if all of the loads are consecutive.
692 for (unsigned i = 1, e = VF; i < e; ++i)
693 if (!isConsecutiveAccess(VL[i-1], VL[i]))
694 return Scalarize(VL, VecTy);
696 IRBuilder<> Builder(GetLastInstr(VL, VF));
697 Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
698 VecTy->getPointerTo());
699 LI = Builder.CreateLoad(VecPtr);
700 LI->setAlignment(Alignment);
701 VectorizedValues[VL0] = LI;
704 case Instruction::Store: {
705 StoreInst *SI = cast<StoreInst>(VL0);
706 unsigned Alignment = SI->getAlignment();
709 for (int i = 0; i < VF; ++i)
710 ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
712 Value *VecValue = vectorizeTree_rec(ValueOp, VF);
714 IRBuilder<> Builder(GetLastInstr(VL, VF));
715 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
716 VecTy->getPointerTo());
717 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
719 for (int i = 0; i < VF; ++i)
720 cast<Instruction>(VL[i])->eraseFromParent();
724 Value *S = Scalarize(VL, VecTy);
725 VectorizedValues[VL0] = S;
730 } // end of namespace