contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

   1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements instcombine for ExtractElement, InsertElement and
  11 // ShuffleVector.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "InstCombine.h"
  16 #include "llvm/Support/PatternMatch.h"
  17 using namespace llvm;
  18 using namespace PatternMatch;
  19
  20 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
  21 /// is to leave as a vector operation.  isConstant indicates whether we're
  22 /// extracting one known element.  If false we're extracting a variable index.
  23 static bool CheapToScalarize(Value *V, bool isConstant) {
  24   if (Constant *C = dyn_cast<Constant>(V)) {
  25     if (isConstant) return true;
  26
  27     // If all elts are the same, we can extract it and use any of the values.
  28     Constant *Op0 = C->getAggregateElement(0U);
  29     for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
  30       if (C->getAggregateElement(i) != Op0)
  31         return false;
  32     return true;
  33   }
  34   Instruction *I = dyn_cast<Instruction>(V);
  35   if (!I) return false;
  36
  37   // Insert element gets simplified to the inserted element or is deleted if
  38   // this is constant idx extract element and its a constant idx insertelt.
  39   if (I->getOpcode() == Instruction::InsertElement && isConstant &&
  40       isa<ConstantInt>(I->getOperand(2)))
  41     return true;
  42   if (I->getOpcode() == Instruction::Load && I->hasOneUse())
  43     return true;
  44   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
  45     if (BO->hasOneUse() &&
  46         (CheapToScalarize(BO->getOperand(0), isConstant) ||
  47          CheapToScalarize(BO->getOperand(1), isConstant)))
  48       return true;
  49   if (CmpInst *CI = dyn_cast<CmpInst>(I))
  50     if (CI->hasOneUse() &&
  51         (CheapToScalarize(CI->getOperand(0), isConstant) ||
  52          CheapToScalarize(CI->getOperand(1), isConstant)))
  53       return true;
  54
  55   return false;
  56 }
  57
  58 /// FindScalarElement - Given a vector and an element number, see if the scalar
  59 /// value is already around as a register, for example if it were inserted then
  60 /// extracted from the vector.
  61 static Value *FindScalarElement(Value *V, unsigned EltNo) {
  62   assert(V->getType()->isVectorTy() && "Not looking at a vector?");
  63   VectorType *VTy = cast<VectorType>(V->getType());
  64   unsigned Width = VTy->getNumElements();
  65   if (EltNo >= Width)  // Out of range access.
  66     return UndefValue::get(VTy->getElementType());
  67
  68   if (Constant *C = dyn_cast<Constant>(V))
  69     return C->getAggregateElement(EltNo);
  70
  71   if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
  72     // If this is an insert to a variable element, we don't know what it is.
  73     if (!isa<ConstantInt>(III->getOperand(2)))
  74       return 0;
  75     unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
  76
  77     // If this is an insert to the element we are looking for, return the
  78     // inserted value.
  79     if (EltNo == IIElt)
  80       return III->getOperand(1);
  81
  82     // Otherwise, the insertelement doesn't modify the value, recurse on its
  83     // vector input.
  84     return FindScalarElement(III->getOperand(0), EltNo);
  85   }
  86
  87   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
  88     unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
  89     int InEl = SVI->getMaskValue(EltNo);
  90     if (InEl < 0)
  91       return UndefValue::get(VTy->getElementType());
  92     if (InEl < (int)LHSWidth)
  93       return FindScalarElement(SVI->getOperand(0), InEl);
  94     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
  95   }
  96
  97   // Extract a value from a vector add operation with a constant zero.
  98   Value *Val = 0; Constant *Con = 0;
  99   if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
 100     if (Con->getAggregateElement(EltNo)->isNullValue())
 101       return FindScalarElement(Val, EltNo);
 102   }
 103
 104   // Otherwise, we don't know.
 105   return 0;
 106 }
 107
 108 // If we have a PHI node with a vector type that has only 2 uses: feed
 109 // itself and be an operand of extractelement at a constant location,
 110 // try to replace the PHI of the vector type with a PHI of a scalar type.
 111 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 112   // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
 113   if (!PN->hasNUses(2))
 114     return NULL;
 115
 116   // If so, it's known at this point that one operand is PHI and the other is
 117   // an extractelement node. Find the PHI user that is not the extractelement
 118   // node.
 119   Value::use_iterator iu = PN->use_begin();
 120   Instruction *PHIUser = dyn_cast<Instruction>(*iu);
 121   if (PHIUser == cast<Instruction>(&EI))
 122     PHIUser = cast<Instruction>(*(++iu));
 123
 124   // Verify that this PHI user has one use, which is the PHI itself,
 125   // and that it is a binary operation which is cheap to scalarize.
 126   // otherwise return NULL.
 127   if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
 128       !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
 129     return NULL;
 130
 131   // Create a scalar PHI node that will replace the vector PHI node
 132   // just before the current PHI node.
 133   PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
 134       PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
 135   // Scalarize each PHI operand.
 136   for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
 137     Value *PHIInVal = PN->getIncomingValue(i);
 138     BasicBlock *inBB = PN->getIncomingBlock(i);
 139     Value *Elt = EI.getIndexOperand();
 140     // If the operand is the PHI induction variable:
 141     if (PHIInVal == PHIUser) {
 142       // Scalarize the binary operation. Its first operand is the
 143       // scalar PHI and the second operand is extracted from the other
 144       // vector operand.
 145       BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
 146       unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
 147       Value *Op = InsertNewInstWith(
 148           ExtractElementInst::Create(B0->getOperand(opId), Elt,
 149                                      B0->getOperand(opId)->getName() + ".Elt"),
 150           *B0);
 151       Value *newPHIUser = InsertNewInstWith(
 152           BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
 153       scalarPHI->addIncoming(newPHIUser, inBB);
 154     } else {
 155       // Scalarize PHI input:
 156       Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
 157       // Insert the new instruction into the predecessor basic block.
 158       Instruction *pos = dyn_cast<Instruction>(PHIInVal);
 159       BasicBlock::iterator InsertPos;
 160       if (pos && !isa<PHINode>(pos)) {
 161         InsertPos = pos;
 162         ++InsertPos;
 163       } else {
 164         InsertPos = inBB->getFirstInsertionPt();
 165       }
 166
 167       InsertNewInstWith(newEI, *InsertPos);
 168
 169       scalarPHI->addIncoming(newEI, inBB);
 170     }
 171   }
 172   return ReplaceInstUsesWith(EI, scalarPHI);
 173 }
 174
 175 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 176   // If vector val is constant with all elements the same, replace EI with
 177   // that element.  We handle a known element # below.
 178   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
 179     if (CheapToScalarize(C, false))
 180       return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
 181
 182   // If extracting a specified index from the vector, see if we can recursively
 183   // find a previously computed scalar that was inserted into the vector.
 184   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
 185     unsigned IndexVal = IdxC->getZExtValue();
 186     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
 187
 188     // If this is extracting an invalid index, turn this into undef, to avoid
 189     // crashing the code below.
 190     if (IndexVal >= VectorWidth)
 191       return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
 192
 193     // This instruction only demands the single element from the input vector.
 194     // If the input vector has a single use, simplify it based on this use
 195     // property.
 196     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
 197       APInt UndefElts(VectorWidth, 0);
 198       APInt DemandedMask(VectorWidth, 0);
 199       DemandedMask.setBit(IndexVal);
 200       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
 201                                                 DemandedMask, UndefElts)) {
 202         EI.setOperand(0, V);
 203         return &EI;
 204       }
 205     }
 206
 207     if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
 208       return ReplaceInstUsesWith(EI, Elt);
 209
 210     // If the this extractelement is directly using a bitcast from a vector of
 211     // the same number of elements, see if we can find the source element from
 212     // it.  In this case, we will end up needing to bitcast the scalars.
 213     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
 214       if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
 215         if (VT->getNumElements() == VectorWidth)
 216           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
 217             return new BitCastInst(Elt, EI.getType());
 218     }
 219
 220     // If there's a vector PHI feeding a scalar use through this extractelement
 221     // instruction, try to scalarize the PHI.
 222     if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
 223       Instruction *scalarPHI = scalarizePHI(EI, PN);
 224       if (scalarPHI)
 225         return scalarPHI;
 226     }
 227   }
 228
 229   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
 230     // Push extractelement into predecessor operation if legal and
 231     // profitable to do so
 232     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
 233       if (I->hasOneUse() &&
 234           CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
 235         Value *newEI0 =
 236           Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
 237                                         EI.getName()+".lhs");
 238         Value *newEI1 =
 239           Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
 240                                         EI.getName()+".rhs");
 241         return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
 242       }
 243     } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
 244       // Extracting the inserted element?
 245       if (IE->getOperand(2) == EI.getOperand(1))
 246         return ReplaceInstUsesWith(EI, IE->getOperand(1));
 247       // If the inserted and extracted elements are constants, they must not
 248       // be the same value, extract from the pre-inserted value instead.
 249       if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
 250         Worklist.AddValue(EI.getOperand(0));
 251         EI.setOperand(0, IE->getOperand(0));
 252         return &EI;
 253       }
 254     } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
 255       // If this is extracting an element from a shufflevector, figure out where
 256       // it came from and extract from the appropriate input element instead.
 257       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
 258         int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
 259         Value *Src;
 260         unsigned LHSWidth =
 261           SVI->getOperand(0)->getType()->getVectorNumElements();
 262
 263         if (SrcIdx < 0)
 264           return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
 265         if (SrcIdx < (int)LHSWidth)
 266           Src = SVI->getOperand(0);
 267         else {
 268           SrcIdx -= LHSWidth;
 269           Src = SVI->getOperand(1);
 270         }
 271         Type *Int32Ty = Type::getInt32Ty(EI.getContext());
 272         return ExtractElementInst::Create(Src,
 273                                           ConstantInt::get(Int32Ty,
 274                                                            SrcIdx, false));
 275       }
 276     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
 277       // Canonicalize extractelement(cast) -> cast(extractelement)
 278       // bitcasts can change the number of vector elements and they cost nothing
 279       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
 280         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
 281                                                   EI.getIndexOperand());
 282         Worklist.AddValue(EE);
 283         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
 284       }
 285     } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
 286       if (SI->hasOneUse()) {
 287         // TODO: For a select on vectors, it might be useful to do this if it
 288         // has multiple extractelement uses. For vector select, that seems to
 289         // fight the vectorizer.
 290
 291         // If we are extracting an element from a vector select or a select on
 292         // vectors, a select on the scalars extracted from the vector arguments.
 293         Value *TrueVal = SI->getTrueValue();
 294         Value *FalseVal = SI->getFalseValue();
 295
 296         Value *Cond = SI->getCondition();
 297         if (Cond->getType()->isVectorTy()) {
 298           Cond = Builder->CreateExtractElement(Cond,
 299                                                EI.getIndexOperand(),
 300                                                Cond->getName() + ".elt");
 301         }
 302
 303         Value *V1Elem
 304           = Builder->CreateExtractElement(TrueVal,
 305                                           EI.getIndexOperand(),
 306                                           TrueVal->getName() + ".elt");
 307
 308         Value *V2Elem
 309           = Builder->CreateExtractElement(FalseVal,
 310                                           EI.getIndexOperand(),
 311                                           FalseVal->getName() + ".elt");
 312         return SelectInst::Create(Cond,
 313                                   V1Elem,
 314                                   V2Elem,
 315                                   SI->getName() + ".elt");
 316       }
 317     }
 318   }
 319   return 0;
 320 }
 321
 322 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
 323 /// elements from either LHS or RHS, return the shuffle mask and true.
 324 /// Otherwise, return false.
 325 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 326                                          SmallVectorImpl<Constant*> &Mask) {
 327   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
 328          "Invalid CollectSingleShuffleElements");
 329   unsigned NumElts = V->getType()->getVectorNumElements();
 330
 331   if (isa<UndefValue>(V)) {
 332     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
 333     return true;
 334   }
 335
 336   if (V == LHS) {
 337     for (unsigned i = 0; i != NumElts; ++i)
 338       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
 339     return true;
 340   }
 341
 342   if (V == RHS) {
 343     for (unsigned i = 0; i != NumElts; ++i)
 344       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
 345                                       i+NumElts));
 346     return true;
 347   }
 348
 349   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
 350     // If this is an insert of an extract from some other vector, include it.
 351     Value *VecOp    = IEI->getOperand(0);
 352     Value *ScalarOp = IEI->getOperand(1);
 353     Value *IdxOp    = IEI->getOperand(2);
 354
 355     if (!isa<ConstantInt>(IdxOp))
 356       return false;
 357     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
 358
 359     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
 360       // Okay, we can handle this if the vector we are insertinting into is
 361       // transitively ok.
 362       if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
 363         // If so, update the mask to reflect the inserted undef.
 364         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
 365         return true;
 366       }
 367     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
 368       if (isa<ConstantInt>(EI->getOperand(1)) &&
 369           EI->getOperand(0)->getType() == V->getType()) {
 370         unsigned ExtractedIdx =
 371         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
 372
 373         // This must be extracting from either LHS or RHS.
 374         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
 375           // Okay, we can handle this if the vector we are insertinting into is
 376           // transitively ok.
 377           if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
 378             // If so, update the mask to reflect the inserted value.
 379             if (EI->getOperand(0) == LHS) {
 380               Mask[InsertedIdx % NumElts] =
 381               ConstantInt::get(Type::getInt32Ty(V->getContext()),
 382                                ExtractedIdx);
 383             } else {
 384               assert(EI->getOperand(0) == RHS);
 385               Mask[InsertedIdx % NumElts] =
 386               ConstantInt::get(Type::getInt32Ty(V->getContext()),
 387                                ExtractedIdx+NumElts);
 388             }
 389             return true;
 390           }
 391         }
 392       }
 393     }
 394   }
 395   // TODO: Handle shufflevector here!
 396
 397   return false;
 398 }
 399
 400 /// CollectShuffleElements - We are building a shuffle of V, using RHS as the
 401 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
 402 /// that computes V and the LHS value of the shuffle.
 403 static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
 404                                      Value *&RHS) {
 405   assert(V->getType()->isVectorTy() &&
 406          (RHS == 0 || V->getType() == RHS->getType()) &&
 407          "Invalid shuffle!");
 408   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 409
 410   if (isa<UndefValue>(V)) {
 411     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
 412     return V;
 413   }
 414
 415   if (isa<ConstantAggregateZero>(V)) {
 416     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
 417     return V;
 418   }
 419
 420   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
 421     // If this is an insert of an extract from some other vector, include it.
 422     Value *VecOp    = IEI->getOperand(0);
 423     Value *ScalarOp = IEI->getOperand(1);
 424     Value *IdxOp    = IEI->getOperand(2);
 425
 426     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
 427       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
 428           EI->getOperand(0)->getType() == V->getType()) {
 429         unsigned ExtractedIdx =
 430           cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
 431         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
 432
 433         // Either the extracted from or inserted into vector must be RHSVec,
 434         // otherwise we'd end up with a shuffle of three inputs.
 435         if (EI->getOperand(0) == RHS || RHS == 0) {
 436           RHS = EI->getOperand(0);
 437           Value *V = CollectShuffleElements(VecOp, Mask, RHS);
 438           Mask[InsertedIdx % NumElts] =
 439             ConstantInt::get(Type::getInt32Ty(V->getContext()),
 440                              NumElts+ExtractedIdx);
 441           return V;
 442         }
 443
 444         if (VecOp == RHS) {
 445           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
 446           // Update Mask to reflect that `ScalarOp' has been inserted at
 447           // position `InsertedIdx' within the vector returned by IEI.
 448           Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
 449
 450           // Everything but the extracted element is replaced with the RHS.
 451           for (unsigned i = 0; i != NumElts; ++i) {
 452             if (i != InsertedIdx)
 453               Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
 454                                          NumElts+i);
 455           }
 456           return V;
 457         }
 458
 459         // If this insertelement is a chain that comes from exactly these two
 460         // vectors, return the vector and the effective shuffle.
 461         if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
 462           return EI->getOperand(0);
 463       }
 464     }
 465   }
 466   // TODO: Handle shufflevector here!
 467
 468   // Otherwise, can't do anything fancy.  Return an identity vector.
 469   for (unsigned i = 0; i != NumElts; ++i)
 470     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
 471   return V;
 472 }
 473
 474 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
 475   Value *VecOp    = IE.getOperand(0);
 476   Value *ScalarOp = IE.getOperand(1);
 477   Value *IdxOp    = IE.getOperand(2);
 478
 479   // Inserting an undef or into an undefined place, remove this.
 480   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
 481     ReplaceInstUsesWith(IE, VecOp);
 482
 483   // If the inserted element was extracted from some other vector, and if the
 484   // indexes are constant, try to turn this into a shufflevector operation.
 485   if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
 486     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
 487         EI->getOperand(0)->getType() == IE.getType()) {
 488       unsigned NumVectorElts = IE.getType()->getNumElements();
 489       unsigned ExtractedIdx =
 490         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
 491       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
 492
 493       if (ExtractedIdx >= NumVectorElts) // Out of range extract.
 494         return ReplaceInstUsesWith(IE, VecOp);
 495
 496       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
 497         return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
 498
 499       // If we are extracting a value from a vector, then inserting it right
 500       // back into the same place, just use the input vector.
 501       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
 502         return ReplaceInstUsesWith(IE, VecOp);
 503
 504       // If this insertelement isn't used by some other insertelement, turn it
 505       // (and any insertelements it points to), into one big shuffle.
 506       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
 507         SmallVector<Constant*, 16> Mask;
 508         Value *RHS = 0;
 509         Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
 510         if (RHS == 0) RHS = UndefValue::get(LHS->getType());
 511         // We now have a shuffle of LHS, RHS, Mask.
 512         return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
 513       }
 514     }
 515   }
 516
 517   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
 518   APInt UndefElts(VWidth, 0);
 519   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
 520   if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
 521     if (V != &IE)
 522       return ReplaceInstUsesWith(IE, V);
 523     return &IE;
 524   }
 525
 526   return 0;
 527 }
 528
 529 /// Return true if we can evaluate the specified expression tree if the vector
 530 /// elements were shuffled in a different order.
 531 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
 532                                 unsigned Depth = 5) {
 533   // We can always reorder the elements of a constant.
 534   if (isa<Constant>(V))
 535     return true;
 536
 537   // We won't reorder vector arguments. No IPO here.
 538   Instruction *I = dyn_cast<Instruction>(V);
 539   if (!I) return false;
 540
 541   // Two users may expect different orders of the elements. Don't try it.
 542   if (!I->hasOneUse())
 543     return false;
 544
 545   if (Depth == 0) return false;
 546
 547   switch (I->getOpcode()) {
 548     case Instruction::Add:
 549     case Instruction::FAdd:
 550     case Instruction::Sub:
 551     case Instruction::FSub:
 552     case Instruction::Mul:
 553     case Instruction::FMul:
 554     case Instruction::UDiv:
 555     case Instruction::SDiv:
 556     case Instruction::FDiv:
 557     case Instruction::URem:
 558     case Instruction::SRem:
 559     case Instruction::FRem:
 560     case Instruction::Shl:
 561     case Instruction::LShr:
 562     case Instruction::AShr:
 563     case Instruction::And:
 564     case Instruction::Or:
 565     case Instruction::Xor:
 566     case Instruction::ICmp:
 567     case Instruction::FCmp:
 568     case Instruction::Trunc:
 569     case Instruction::ZExt:
 570     case Instruction::SExt:
 571     case Instruction::FPToUI:
 572     case Instruction::FPToSI:
 573     case Instruction::UIToFP:
 574     case Instruction::SIToFP:
 575     case Instruction::FPTrunc:
 576     case Instruction::FPExt:
 577     case Instruction::GetElementPtr: {
 578       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
 579         if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
 580           return false;
 581       }
 582       return true;
 583     }
 584     case Instruction::InsertElement: {
 585       ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
 586       if (!CI) return false;
 587       int ElementNumber = CI->getLimitedValue();
 588
 589       // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
 590       // can't put an element into multiple indices.
 591       bool SeenOnce = false;
 592       for (int i = 0, e = Mask.size(); i != e; ++i) {
 593         if (Mask[i] == ElementNumber) {
 594           if (SeenOnce)
 595             return false;
 596           SeenOnce = true;
 597         }
 598       }
 599       return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
 600     }
 601   }
 602   return false;
 603 }
 604
 605 /// Rebuild a new instruction just like 'I' but with the new operands given.
 606 /// In the event of type mismatch, the type of the operands is correct.
 607 static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
 608   // We don't want to use the IRBuilder here because we want the replacement
 609   // instructions to appear next to 'I', not the builder's insertion point.
 610   switch (I->getOpcode()) {
 611     case Instruction::Add:
 612     case Instruction::FAdd:
 613     case Instruction::Sub:
 614     case Instruction::FSub:
 615     case Instruction::Mul:
 616     case Instruction::FMul:
 617     case Instruction::UDiv:
 618     case Instruction::SDiv:
 619     case Instruction::FDiv:
 620     case Instruction::URem:
 621     case Instruction::SRem:
 622     case Instruction::FRem:
 623     case Instruction::Shl:
 624     case Instruction::LShr:
 625     case Instruction::AShr:
 626     case Instruction::And:
 627     case Instruction::Or:
 628     case Instruction::Xor: {
 629       BinaryOperator *BO = cast<BinaryOperator>(I);
 630       assert(NewOps.size() == 2 && "binary operator with #ops != 2");
 631       BinaryOperator *New =
 632           BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
 633                                  NewOps[0], NewOps[1], "", BO);
 634       if (isa<OverflowingBinaryOperator>(BO)) {
 635         New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
 636         New->setHasNoSignedWrap(BO->hasNoSignedWrap());
 637       }
 638       if (isa<PossiblyExactOperator>(BO)) {
 639         New->setIsExact(BO->isExact());
 640       }
 641       return New;
 642     }
 643     case Instruction::ICmp:
 644       assert(NewOps.size() == 2 && "icmp with #ops != 2");
 645       return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
 646                           NewOps[0], NewOps[1]);
 647     case Instruction::FCmp:
 648       assert(NewOps.size() == 2 && "fcmp with #ops != 2");
 649       return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
 650                           NewOps[0], NewOps[1]);
 651     case Instruction::Trunc:
 652     case Instruction::ZExt:
 653     case Instruction::SExt:
 654     case Instruction::FPToUI:
 655     case Instruction::FPToSI:
 656     case Instruction::UIToFP:
 657     case Instruction::SIToFP:
 658     case Instruction::FPTrunc:
 659     case Instruction::FPExt: {
 660       // It's possible that the mask has a different number of elements from
 661       // the original cast. We recompute the destination type to match the mask.
 662       Type *DestTy =
 663           VectorType::get(I->getType()->getScalarType(),
 664                           NewOps[0]->getType()->getVectorNumElements());
 665       assert(NewOps.size() == 1 && "cast with #ops != 1");
 666       return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
 667                               "", I);
 668     }
 669     case Instruction::GetElementPtr: {
 670       Value *Ptr = NewOps[0];
 671       ArrayRef<Value*> Idx = NewOps.slice(1);
 672       GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
 673       GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
 674       return GEP;
 675     }
 676   }
 677   llvm_unreachable("failed to rebuild vector instructions");
 678 }
 679
 680 Value *
 681 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
 682   // Mask.size() does not need to be equal to the number of vector elements.
 683
 684   assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
 685   if (isa<UndefValue>(V)) {
 686     return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
 687                                            Mask.size()));
 688   }
 689   if (isa<ConstantAggregateZero>(V)) {
 690     return ConstantAggregateZero::get(
 691                VectorType::get(V->getType()->getScalarType(),
 692                                Mask.size()));
 693   }
 694   if (Constant *C = dyn_cast<Constant>(V)) {
 695     SmallVector<Constant *, 16> MaskValues;
 696     for (int i = 0, e = Mask.size(); i != e; ++i) {
 697       if (Mask[i] == -1)
 698         MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
 699       else
 700         MaskValues.push_back(Builder->getInt32(Mask[i]));
 701     }
 702     return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
 703                                           ConstantVector::get(MaskValues));
 704   }
 705
 706   Instruction *I = cast<Instruction>(V);
 707   switch (I->getOpcode()) {
 708     case Instruction::Add:
 709     case Instruction::FAdd:
 710     case Instruction::Sub:
 711     case Instruction::FSub:
 712     case Instruction::Mul:
 713     case Instruction::FMul:
 714     case Instruction::UDiv:
 715     case Instruction::SDiv:
 716     case Instruction::FDiv:
 717     case Instruction::URem:
 718     case Instruction::SRem:
 719     case Instruction::FRem:
 720     case Instruction::Shl:
 721     case Instruction::LShr:
 722     case Instruction::AShr:
 723     case Instruction::And:
 724     case Instruction::Or:
 725     case Instruction::Xor:
 726     case Instruction::ICmp:
 727     case Instruction::FCmp:
 728     case Instruction::Trunc:
 729     case Instruction::ZExt:
 730     case Instruction::SExt:
 731     case Instruction::FPToUI:
 732     case Instruction::FPToSI:
 733     case Instruction::UIToFP:
 734     case Instruction::SIToFP:
 735     case Instruction::FPTrunc:
 736     case Instruction::FPExt:
 737     case Instruction::Select:
 738     case Instruction::GetElementPtr: {
 739       SmallVector<Value*, 8> NewOps;
 740       bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
 741       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
 742         Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
 743         NewOps.push_back(V);
 744         NeedsRebuild |= (V != I->getOperand(i));
 745       }
 746       if (NeedsRebuild) {
 747         return BuildNew(I, NewOps);
 748       }
 749       return I;
 750     }
 751     case Instruction::InsertElement: {
 752       int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
 753
 754       // The insertelement was inserting at Element. Figure out which element
 755       // that becomes after shuffling. The answer is guaranteed to be unique
 756       // by CanEvaluateShuffled.
 757       bool Found = false;
 758       int Index = 0;
 759       for (int e = Mask.size(); Index != e; ++Index) {
 760         if (Mask[Index] == Element) {
 761           Found = true;
 762           break;
 763         }
 764       }
 765
 766       if (!Found)
 767         return UndefValue::get(
 768             VectorType::get(V->getType()->getScalarType(), Mask.size()));
 769
 770       Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
 771       return InsertElementInst::Create(V, I->getOperand(1),
 772                                        Builder->getInt32(Index), "", I);
 773     }
 774   }
 775   llvm_unreachable("failed to reorder elements of vector instruction!");
 776 }
 777
 778 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 779   Value *LHS = SVI.getOperand(0);
 780   Value *RHS = SVI.getOperand(1);
 781   SmallVector<int, 16> Mask = SVI.getShuffleMask();
 782
 783   bool MadeChange = false;
 784
 785   // Undefined shuffle mask -> undefined value.
 786   if (isa<UndefValue>(SVI.getOperand(2)))
 787     return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
 788
 789   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
 790
 791   APInt UndefElts(VWidth, 0);
 792   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
 793   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
 794     if (V != &SVI)
 795       return ReplaceInstUsesWith(SVI, V);
 796     LHS = SVI.getOperand(0);
 797     RHS = SVI.getOperand(1);
 798     MadeChange = true;
 799   }
 800
 801   unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
 802
 803   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
 804   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
 805   if (LHS == RHS || isa<UndefValue>(LHS)) {
 806     if (isa<UndefValue>(LHS) && LHS == RHS) {
 807       // shuffle(undef,undef,mask) -> undef.
 808       Value *Result = (VWidth == LHSWidth)
 809                       ? LHS : UndefValue::get(SVI.getType());
 810       return ReplaceInstUsesWith(SVI, Result);
 811     }
 812
 813     // Remap any references to RHS to use LHS.
 814     SmallVector<Constant*, 16> Elts;
 815     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
 816       if (Mask[i] < 0) {
 817         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
 818         continue;
 819       }
 820
 821       if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
 822           (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
 823         Mask[i] = -1;     // Turn into undef.
 824         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
 825       } else {
 826         Mask[i] = Mask[i] % e;  // Force to LHS.
 827         Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
 828                                         Mask[i]));
 829       }
 830     }
 831     SVI.setOperand(0, SVI.getOperand(1));
 832     SVI.setOperand(1, UndefValue::get(RHS->getType()));
 833     SVI.setOperand(2, ConstantVector::get(Elts));
 834     LHS = SVI.getOperand(0);
 835     RHS = SVI.getOperand(1);
 836     MadeChange = true;
 837   }
 838
 839   if (VWidth == LHSWidth) {
 840     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
 841     bool isLHSID = true, isRHSID = true;
 842
 843     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
 844       if (Mask[i] < 0) continue;  // Ignore undef values.
 845       // Is this an identity shuffle of the LHS value?
 846       isLHSID &= (Mask[i] == (int)i);
 847
 848       // Is this an identity shuffle of the RHS value?
 849       isRHSID &= (Mask[i]-e == i);
 850     }
 851
 852     // Eliminate identity shuffles.
 853     if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
 854     if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
 855   }
 856
 857   if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
 858     Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
 859     return ReplaceInstUsesWith(SVI, V);
 860   }
 861
 862   // If the LHS is a shufflevector itself, see if we can combine it with this
 863   // one without producing an unusual shuffle.
 864   // Cases that might be simplified:
 865   // 1.
 866   // x1=shuffle(v1,v2,mask1)
 867   //  x=shuffle(x1,undef,mask)
 868   //        ==>
 869   //  x=shuffle(v1,undef,newMask)
 870   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
 871   // 2.
 872   // x1=shuffle(v1,undef,mask1)
 873   //  x=shuffle(x1,x2,mask)
 874   // where v1.size() == mask1.size()
 875   //        ==>
 876   //  x=shuffle(v1,x2,newMask)
 877   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
 878   // 3.
 879   // x2=shuffle(v2,undef,mask2)
 880   //  x=shuffle(x1,x2,mask)
 881   // where v2.size() == mask2.size()
 882   //        ==>
 883   //  x=shuffle(x1,v2,newMask)
 884   // newMask[i] = (mask[i] < x1.size())
 885   //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
 886   // 4.
 887   // x1=shuffle(v1,undef,mask1)
 888   // x2=shuffle(v2,undef,mask2)
 889   //  x=shuffle(x1,x2,mask)
 890   // where v1.size() == v2.size()
 891   //        ==>
 892   //  x=shuffle(v1,v2,newMask)
 893   // newMask[i] = (mask[i] < x1.size())
 894   //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
 895   //
 896   // Here we are really conservative:
 897   // we are absolutely afraid of producing a shuffle mask not in the input
 898   // program, because the code gen may not be smart enough to turn a merged
 899   // shuffle into two specific shuffles: it may produce worse code.  As such,
 900   // we only merge two shuffles if the result is either a splat or one of the
 901   // input shuffle masks.  In this case, merging the shuffles just removes
 902   // one instruction, which we know is safe.  This is good for things like
 903   // turning: (splat(splat)) -> splat, or
 904   // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
 905   ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
 906   ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
 907   if (LHSShuffle)
 908     if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
 909       LHSShuffle = NULL;
 910   if (RHSShuffle)
 911     if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
 912       RHSShuffle = NULL;
 913   if (!LHSShuffle && !RHSShuffle)
 914     return MadeChange ? &SVI : 0;
 915
 916   Value* LHSOp0 = NULL;
 917   Value* LHSOp1 = NULL;
 918   Value* RHSOp0 = NULL;
 919   unsigned LHSOp0Width = 0;
 920   unsigned RHSOp0Width = 0;
 921   if (LHSShuffle) {
 922     LHSOp0 = LHSShuffle->getOperand(0);
 923     LHSOp1 = LHSShuffle->getOperand(1);
 924     LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
 925   }
 926   if (RHSShuffle) {
 927     RHSOp0 = RHSShuffle->getOperand(0);
 928     RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
 929   }
 930   Value* newLHS = LHS;
 931   Value* newRHS = RHS;
 932   if (LHSShuffle) {
 933     // case 1
 934     if (isa<UndefValue>(RHS)) {
 935       newLHS = LHSOp0;
 936       newRHS = LHSOp1;
 937     }
 938     // case 2 or 4
 939     else if (LHSOp0Width == LHSWidth) {
 940       newLHS = LHSOp0;
 941     }
 942   }
 943   // case 3 or 4
 944   if (RHSShuffle && RHSOp0Width == LHSWidth) {
 945     newRHS = RHSOp0;
 946   }
 947   // case 4
 948   if (LHSOp0 == RHSOp0) {
 949     newLHS = LHSOp0;
 950     newRHS = NULL;
 951   }
 952
 953   if (newLHS == LHS && newRHS == RHS)
 954     return MadeChange ? &SVI : 0;
 955
 956   SmallVector<int, 16> LHSMask;
 957   SmallVector<int, 16> RHSMask;
 958   if (newLHS != LHS)
 959     LHSMask = LHSShuffle->getShuffleMask();
 960   if (RHSShuffle && newRHS != RHS)
 961     RHSMask = RHSShuffle->getShuffleMask();
 962
 963   unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
 964   SmallVector<int, 16> newMask;
 965   bool isSplat = true;
 966   int SplatElt = -1;
 967   // Create a new mask for the new ShuffleVectorInst so that the new
 968   // ShuffleVectorInst is equivalent to the original one.
 969   for (unsigned i = 0; i < VWidth; ++i) {
 970     int eltMask;
 971     if (Mask[i] < 0) {
 972       // This element is an undef value.
 973       eltMask = -1;
 974     } else if (Mask[i] < (int)LHSWidth) {
 975       // This element is from left hand side vector operand.
 976       //
 977       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
 978       // new mask value for the element.
 979       if (newLHS != LHS) {
 980         eltMask = LHSMask[Mask[i]];
 981         // If the value selected is an undef value, explicitly specify it
 982         // with a -1 mask value.
 983         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
 984           eltMask = -1;
 985       } else
 986         eltMask = Mask[i];
 987     } else {
 988       // This element is from right hand side vector operand
 989       //
 990       // If the value selected is an undef value, explicitly specify it
 991       // with a -1 mask value. (case 1)
 992       if (isa<UndefValue>(RHS))
 993         eltMask = -1;
 994       // If RHS is going to be replaced (case 3 or 4), calculate the
 995       // new mask value for the element.
 996       else if (newRHS != RHS) {
 997         eltMask = RHSMask[Mask[i]-LHSWidth];
 998         // If the value selected is an undef value, explicitly specify it
 999         // with a -1 mask value.
1000         if (eltMask >= (int)RHSOp0Width) {
1001           assert(isa<UndefValue>(RHSShuffle->getOperand(1))
1002                  && "should have been check above");
1003           eltMask = -1;
1004         }
1005       } else
1006         eltMask = Mask[i]-LHSWidth;
1007
1008       // If LHS's width is changed, shift the mask value accordingly.
1009       // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
1010       // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
1011       // If newRHS == newLHS, we want to remap any references from newRHS to
1012       // newLHS so that we can properly identify splats that may occur due to
1013       // obfuscation accross the two vectors.
1014       if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
1015         eltMask += newLHSWidth;
1016     }
1017
1018     // Check if this could still be a splat.
1019     if (eltMask >= 0) {
1020       if (SplatElt >= 0 && SplatElt != eltMask)
1021         isSplat = false;
1022       SplatElt = eltMask;
1023     }
1024
1025     newMask.push_back(eltMask);
1026   }
1027
1028   // If the result mask is equal to one of the original shuffle masks,
1029   // or is a splat, do the replacement.
1030   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
1031     SmallVector<Constant*, 16> Elts;
1032     Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
1033     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
1034       if (newMask[i] < 0) {
1035         Elts.push_back(UndefValue::get(Int32Ty));
1036       } else {
1037         Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
1038       }
1039     }
1040     if (newRHS == NULL)
1041       newRHS = UndefValue::get(newLHS->getType());
1042     return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
1043   }
1044
1045   return MadeChange ? &SVI : 0;
1046 }