contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

   1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
   9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
  10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
  12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  15 #include "llvm/CodeGen/GlobalISel/Utils.h"
  16 #include "llvm/CodeGen/MachineDominators.h"
  17 #include "llvm/CodeGen/MachineFrameInfo.h"
  18 #include "llvm/CodeGen/MachineInstr.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/CodeGen/TargetInstrInfo.h"
  21 #include "llvm/CodeGen/TargetLowering.h"
  22 #include "llvm/Support/MathExtras.h"
  23 #include "llvm/Target/TargetMachine.h"
  24
  25 #define DEBUG_TYPE "gi-combiner"
  26
  27 using namespace llvm;
  28 using namespace MIPatternMatch;
  29
  30 // Option to allow testing of the combiner while no targets know about indexed
  31 // addressing.
  32 static cl::opt<bool>
  33     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
  34                        cl::desc("Force all indexed operations to be "
  35                                 "legal for the GlobalISel combiner"));
  36
  37
  38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
  39                                MachineIRBuilder &B, GISelKnownBits *KB,
  40                                MachineDominatorTree *MDT,
  41                                const LegalizerInfo *LI)
  42     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
  43       KB(KB), MDT(MDT), LI(LI) {
  44   (void)this->KB;
  45 }
  46
  47 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
  48                                     Register ToReg) const {
  49   Observer.changingAllUsesOfReg(MRI, FromReg);
  50
  51   if (MRI.constrainRegAttrs(ToReg, FromReg))
  52     MRI.replaceRegWith(FromReg, ToReg);
  53   else
  54     Builder.buildCopy(ToReg, FromReg);
  55
  56   Observer.finishedChangingAllUsesOfReg();
  57 }
  58
  59 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
  60                                       MachineOperand &FromRegOp,
  61                                       Register ToReg) const {
  62   assert(FromRegOp.getParent() && "Expected an operand in an MI");
  63   Observer.changingInstr(*FromRegOp.getParent());
  64
  65   FromRegOp.setReg(ToReg);
  66
  67   Observer.changedInstr(*FromRegOp.getParent());
  68 }
  69
  70 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
  71   if (matchCombineCopy(MI)) {
  72     applyCombineCopy(MI);
  73     return true;
  74   }
  75   return false;
  76 }
  77 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
  78   if (MI.getOpcode() != TargetOpcode::COPY)
  79     return false;
  80   Register DstReg = MI.getOperand(0).getReg();
  81   Register SrcReg = MI.getOperand(1).getReg();
  82   return canReplaceReg(DstReg, SrcReg, MRI);
  83 }
  84 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
  85   Register DstReg = MI.getOperand(0).getReg();
  86   Register SrcReg = MI.getOperand(1).getReg();
  87   MI.eraseFromParent();
  88   replaceRegWith(MRI, DstReg, SrcReg);
  89 }
  90
  91 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
  92   bool IsUndef = false;
  93   SmallVector<Register, 4> Ops;
  94   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
  95     applyCombineConcatVectors(MI, IsUndef, Ops);
  96     return true;
  97   }
  98   return false;
  99 }
 100
 101 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
 102                                                SmallVectorImpl<Register> &Ops) {
 103   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
 104          "Invalid instruction");
 105   IsUndef = true;
 106   MachineInstr *Undef = nullptr;
 107
 108   // Walk over all the operands of concat vectors and check if they are
 109   // build_vector themselves or undef.
 110   // Then collect their operands in Ops.
 111   for (const MachineOperand &MO : MI.uses()) {
 112     Register Reg = MO.getReg();
 113     MachineInstr *Def = MRI.getVRegDef(Reg);
 114     assert(Def && "Operand not defined");
 115     switch (Def->getOpcode()) {
 116     case TargetOpcode::G_BUILD_VECTOR:
 117       IsUndef = false;
 118       // Remember the operands of the build_vector to fold
 119       // them into the yet-to-build flattened concat vectors.
 120       for (const MachineOperand &BuildVecMO : Def->uses())
 121         Ops.push_back(BuildVecMO.getReg());
 122       break;
 123     case TargetOpcode::G_IMPLICIT_DEF: {
 124       LLT OpType = MRI.getType(Reg);
 125       // Keep one undef value for all the undef operands.
 126       if (!Undef) {
 127         Builder.setInsertPt(*MI.getParent(), MI);
 128         Undef = Builder.buildUndef(OpType.getScalarType());
 129       }
 130       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
 131                  OpType.getScalarType() &&
 132              "All undefs should have the same type");
 133       // Break the undef vector in as many scalar elements as needed
 134       // for the flattening.
 135       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
 136            EltIdx != EltEnd; ++EltIdx)
 137         Ops.push_back(Undef->getOperand(0).getReg());
 138       break;
 139     }
 140     default:
 141       return false;
 142     }
 143   }
 144   return true;
 145 }
 146 void CombinerHelper::applyCombineConcatVectors(
 147     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
 148   // We determined that the concat_vectors can be flatten.
 149   // Generate the flattened build_vector.
 150   Register DstReg = MI.getOperand(0).getReg();
 151   Builder.setInsertPt(*MI.getParent(), MI);
 152   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
 153
 154   // Note: IsUndef is sort of redundant. We could have determine it by
 155   // checking that at all Ops are undef.  Alternatively, we could have
 156   // generate a build_vector of undefs and rely on another combine to
 157   // clean that up.  For now, given we already gather this information
 158   // in tryCombineConcatVectors, just save compile time and issue the
 159   // right thing.
 160   if (IsUndef)
 161     Builder.buildUndef(NewDstReg);
 162   else
 163     Builder.buildBuildVector(NewDstReg, Ops);
 164   MI.eraseFromParent();
 165   replaceRegWith(MRI, DstReg, NewDstReg);
 166 }
 167
 168 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
 169   SmallVector<Register, 4> Ops;
 170   if (matchCombineShuffleVector(MI, Ops)) {
 171     applyCombineShuffleVector(MI, Ops);
 172     return true;
 173   }
 174   return false;
 175 }
 176
 177 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
 178                                                SmallVectorImpl<Register> &Ops) {
 179   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
 180          "Invalid instruction kind");
 181   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
 182   Register Src1 = MI.getOperand(1).getReg();
 183   LLT SrcType = MRI.getType(Src1);
 184   // As bizarre as it may look, shuffle vector can actually produce
 185   // scalar! This is because at the IR level a <1 x ty> shuffle
 186   // vector is perfectly valid.
 187   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
 188   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
 189
 190   // If the resulting vector is smaller than the size of the source
 191   // vectors being concatenated, we won't be able to replace the
 192   // shuffle vector into a concat_vectors.
 193   //
 194   // Note: We may still be able to produce a concat_vectors fed by
 195   //       extract_vector_elt and so on. It is less clear that would
 196   //       be better though, so don't bother for now.
 197   //
 198   // If the destination is a scalar, the size of the sources doesn't
 199   // matter. we will lower the shuffle to a plain copy. This will
 200   // work only if the source and destination have the same size. But
 201   // that's covered by the next condition.
 202   //
 203   // TODO: If the size between the source and destination don't match
 204   //       we could still emit an extract vector element in that case.
 205   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
 206     return false;
 207
 208   // Check that the shuffle mask can be broken evenly between the
 209   // different sources.
 210   if (DstNumElts % SrcNumElts != 0)
 211     return false;
 212
 213   // Mask length is a multiple of the source vector length.
 214   // Check if the shuffle is some kind of concatenation of the input
 215   // vectors.
 216   unsigned NumConcat = DstNumElts / SrcNumElts;
 217   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
 218   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
 219   for (unsigned i = 0; i != DstNumElts; ++i) {
 220     int Idx = Mask[i];
 221     // Undef value.
 222     if (Idx < 0)
 223       continue;
 224     // Ensure the indices in each SrcType sized piece are sequential and that
 225     // the same source is used for the whole piece.
 226     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
 227         (ConcatSrcs[i / SrcNumElts] >= 0 &&
 228          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
 229       return false;
 230     // Remember which source this index came from.
 231     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
 232   }
 233
 234   // The shuffle is concatenating multiple vectors together.
 235   // Collect the different operands for that.
 236   Register UndefReg;
 237   Register Src2 = MI.getOperand(2).getReg();
 238   for (auto Src : ConcatSrcs) {
 239     if (Src < 0) {
 240       if (!UndefReg) {
 241         Builder.setInsertPt(*MI.getParent(), MI);
 242         UndefReg = Builder.buildUndef(SrcType).getReg(0);
 243       }
 244       Ops.push_back(UndefReg);
 245     } else if (Src == 0)
 246       Ops.push_back(Src1);
 247     else
 248       Ops.push_back(Src2);
 249   }
 250   return true;
 251 }
 252
 253 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
 254                                                const ArrayRef<Register> Ops) {
 255   Register DstReg = MI.getOperand(0).getReg();
 256   Builder.setInsertPt(*MI.getParent(), MI);
 257   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
 258
 259   if (Ops.size() == 1)
 260     Builder.buildCopy(NewDstReg, Ops[0]);
 261   else
 262     Builder.buildMerge(NewDstReg, Ops);
 263
 264   MI.eraseFromParent();
 265   replaceRegWith(MRI, DstReg, NewDstReg);
 266 }
 267
 268 namespace {
 269
 270 /// Select a preference between two uses. CurrentUse is the current preference
 271 /// while *ForCandidate is attributes of the candidate under consideration.
 272 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
 273                                   const LLT TyForCandidate,
 274                                   unsigned OpcodeForCandidate,
 275                                   MachineInstr *MIForCandidate) {
 276   if (!CurrentUse.Ty.isValid()) {
 277     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
 278         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
 279       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
 280     return CurrentUse;
 281   }
 282
 283   // We permit the extend to hoist through basic blocks but this is only
 284   // sensible if the target has extending loads. If you end up lowering back
 285   // into a load and extend during the legalizer then the end result is
 286   // hoisting the extend up to the load.
 287
 288   // Prefer defined extensions to undefined extensions as these are more
 289   // likely to reduce the number of instructions.
 290   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
 291       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
 292     return CurrentUse;
 293   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
 294            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
 295     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
 296
 297   // Prefer sign extensions to zero extensions as sign-extensions tend to be
 298   // more expensive.
 299   if (CurrentUse.Ty == TyForCandidate) {
 300     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
 301         OpcodeForCandidate == TargetOpcode::G_ZEXT)
 302       return CurrentUse;
 303     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
 304              OpcodeForCandidate == TargetOpcode::G_SEXT)
 305       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
 306   }
 307
 308   // This is potentially target specific. We've chosen the largest type
 309   // because G_TRUNC is usually free. One potential catch with this is that
 310   // some targets have a reduced number of larger registers than smaller
 311   // registers and this choice potentially increases the live-range for the
 312   // larger value.
 313   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
 314     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
 315   }
 316   return CurrentUse;
 317 }
 318
 319 /// Find a suitable place to insert some instructions and insert them. This
 320 /// function accounts for special cases like inserting before a PHI node.
 321 /// The current strategy for inserting before PHI's is to duplicate the
 322 /// instructions for each predecessor. However, while that's ok for G_TRUNC
 323 /// on most targets since it generally requires no code, other targets/cases may
 324 /// want to try harder to find a dominating block.
 325 static void InsertInsnsWithoutSideEffectsBeforeUse(
 326     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
 327     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
 328                        MachineOperand &UseMO)>
 329         Inserter) {
 330   MachineInstr &UseMI = *UseMO.getParent();
 331
 332   MachineBasicBlock *InsertBB = UseMI.getParent();
 333
 334   // If the use is a PHI then we want the predecessor block instead.
 335   if (UseMI.isPHI()) {
 336     MachineOperand *PredBB = std::next(&UseMO);
 337     InsertBB = PredBB->getMBB();
 338   }
 339
 340   // If the block is the same block as the def then we want to insert just after
 341   // the def instead of at the start of the block.
 342   if (InsertBB == DefMI.getParent()) {
 343     MachineBasicBlock::iterator InsertPt = &DefMI;
 344     Inserter(InsertBB, std::next(InsertPt), UseMO);
 345     return;
 346   }
 347
 348   // Otherwise we want the start of the BB
 349   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
 350 }
 351 } // end anonymous namespace
 352
 353 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
 354   PreferredTuple Preferred;
 355   if (matchCombineExtendingLoads(MI, Preferred)) {
 356     applyCombineExtendingLoads(MI, Preferred);
 357     return true;
 358   }
 359   return false;
 360 }
 361
 362 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
 363                                                 PreferredTuple &Preferred) {
 364   // We match the loads and follow the uses to the extend instead of matching
 365   // the extends and following the def to the load. This is because the load
 366   // must remain in the same position for correctness (unless we also add code
 367   // to find a safe place to sink it) whereas the extend is freely movable.
 368   // It also prevents us from duplicating the load for the volatile case or just
 369   // for performance.
 370
 371   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
 372       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
 373       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
 374     return false;
 375
 376   auto &LoadValue = MI.getOperand(0);
 377   assert(LoadValue.isReg() && "Result wasn't a register?");
 378
 379   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
 380   if (!LoadValueTy.isScalar())
 381     return false;
 382
 383   // Most architectures are going to legalize <s8 loads into at least a 1 byte
 384   // load, and the MMOs can only describe memory accesses in multiples of bytes.
 385   // If we try to perform extload combining on those, we can end up with
 386   // %a(s8) = extload %ptr (load 1 byte from %ptr)
 387   // ... which is an illegal extload instruction.
 388   if (LoadValueTy.getSizeInBits() < 8)
 389     return false;
 390
 391   // For non power-of-2 types, they will very likely be legalized into multiple
 392   // loads. Don't bother trying to match them into extending loads.
 393   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
 394     return false;
 395
 396   // Find the preferred type aside from the any-extends (unless it's the only
 397   // one) and non-extending ops. We'll emit an extending load to that type and
 398   // and emit a variant of (extend (trunc X)) for the others according to the
 399   // relative type sizes. At the same time, pick an extend to use based on the
 400   // extend involved in the chosen type.
 401   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
 402                                  ? TargetOpcode::G_ANYEXT
 403                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
 404                                        ? TargetOpcode::G_SEXT
 405                                        : TargetOpcode::G_ZEXT;
 406   Preferred = {LLT(), PreferredOpcode, nullptr};
 407   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
 408     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
 409         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
 410         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
 411       // Check for legality.
 412       if (LI) {
 413         LegalityQuery::MemDesc MMDesc;
 414         const auto &MMO = **MI.memoperands_begin();
 415         MMDesc.SizeInBits = MMO.getSizeInBits();
 416         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
 417         MMDesc.Ordering = MMO.getOrdering();
 418         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
 419         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 420         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
 421             LegalizeActions::Legal)
 422           continue;
 423       }
 424       Preferred = ChoosePreferredUse(Preferred,
 425                                      MRI.getType(UseMI.getOperand(0).getReg()),
 426                                      UseMI.getOpcode(), &UseMI);
 427     }
 428   }
 429
 430   // There were no extends
 431   if (!Preferred.MI)
 432     return false;
 433   // It should be impossible to chose an extend without selecting a different
 434   // type since by definition the result of an extend is larger.
 435   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
 436
 437   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
 438   return true;
 439 }
 440
 441 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
 442                                                 PreferredTuple &Preferred) {
 443   // Rewrite the load to the chosen extending load.
 444   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
 445
 446   // Inserter to insert a truncate back to the original type at a given point
 447   // with some basic CSE to limit truncate duplication to one per BB.
 448   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
 449   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
 450                            MachineBasicBlock::iterator InsertBefore,
 451                            MachineOperand &UseMO) {
 452     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
 453     if (PreviouslyEmitted) {
 454       Observer.changingInstr(*UseMO.getParent());
 455       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
 456       Observer.changedInstr(*UseMO.getParent());
 457       return;
 458     }
 459
 460     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
 461     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
 462     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
 463     EmittedInsns[InsertIntoBB] = NewMI;
 464     replaceRegOpWith(MRI, UseMO, NewDstReg);
 465   };
 466
 467   Observer.changingInstr(MI);
 468   MI.setDesc(
 469       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
 470                                ? TargetOpcode::G_SEXTLOAD
 471                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
 472                                      ? TargetOpcode::G_ZEXTLOAD
 473                                      : TargetOpcode::G_LOAD));
 474
 475   // Rewrite all the uses to fix up the types.
 476   auto &LoadValue = MI.getOperand(0);
 477   SmallVector<MachineOperand *, 4> Uses;
 478   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
 479     Uses.push_back(&UseMO);
 480
 481   for (auto *UseMO : Uses) {
 482     MachineInstr *UseMI = UseMO->getParent();
 483
 484     // If the extend is compatible with the preferred extend then we should fix
 485     // up the type and extend so that it uses the preferred use.
 486     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
 487         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
 488       Register UseDstReg = UseMI->getOperand(0).getReg();
 489       MachineOperand &UseSrcMO = UseMI->getOperand(1);
 490       const LLT UseDstTy = MRI.getType(UseDstReg);
 491       if (UseDstReg != ChosenDstReg) {
 492         if (Preferred.Ty == UseDstTy) {
 493           // If the use has the same type as the preferred use, then merge
 494           // the vregs and erase the extend. For example:
 495           //    %1:_(s8) = G_LOAD ...
 496           //    %2:_(s32) = G_SEXT %1(s8)
 497           //    %3:_(s32) = G_ANYEXT %1(s8)
 498           //    ... = ... %3(s32)
 499           // rewrites to:
 500           //    %2:_(s32) = G_SEXTLOAD ...
 501           //    ... = ... %2(s32)
 502           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
 503           Observer.erasingInstr(*UseMO->getParent());
 504           UseMO->getParent()->eraseFromParent();
 505         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
 506           // If the preferred size is smaller, then keep the extend but extend
 507           // from the result of the extending load. For example:
 508           //    %1:_(s8) = G_LOAD ...
 509           //    %2:_(s32) = G_SEXT %1(s8)
 510           //    %3:_(s64) = G_ANYEXT %1(s8)
 511           //    ... = ... %3(s64)
 512           /// rewrites to:
 513           //    %2:_(s32) = G_SEXTLOAD ...
 514           //    %3:_(s64) = G_ANYEXT %2:_(s32)
 515           //    ... = ... %3(s64)
 516           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
 517         } else {
 518           // If the preferred size is large, then insert a truncate. For
 519           // example:
 520           //    %1:_(s8) = G_LOAD ...
 521           //    %2:_(s64) = G_SEXT %1(s8)
 522           //    %3:_(s32) = G_ZEXT %1(s8)
 523           //    ... = ... %3(s32)
 524           /// rewrites to:
 525           //    %2:_(s64) = G_SEXTLOAD ...
 526           //    %4:_(s8) = G_TRUNC %2:_(s32)
 527           //    %3:_(s64) = G_ZEXT %2:_(s8)
 528           //    ... = ... %3(s64)
 529           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
 530                                                  InsertTruncAt);
 531         }
 532         continue;
 533       }
 534       // The use is (one of) the uses of the preferred use we chose earlier.
 535       // We're going to update the load to def this value later so just erase
 536       // the old extend.
 537       Observer.erasingInstr(*UseMO->getParent());
 538       UseMO->getParent()->eraseFromParent();
 539       continue;
 540     }
 541
 542     // The use isn't an extend. Truncate back to the type we originally loaded.
 543     // This is free on many targets.
 544     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
 545   }
 546
 547   MI.getOperand(0).setReg(ChosenDstReg);
 548   Observer.changedInstr(MI);
 549 }
 550
 551 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
 552                                    const MachineInstr &UseMI) {
 553   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
 554          "shouldn't consider debug uses");
 555   assert(DefMI.getParent() == UseMI.getParent());
 556   if (&DefMI == &UseMI)
 557     return false;
 558
 559   // Loop through the basic block until we find one of the instructions.
 560   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
 561   for (; &*I != &DefMI && &*I != &UseMI; ++I)
 562     return &*I == &DefMI;
 563
 564   llvm_unreachable("Block must contain instructions");
 565 }
 566
 567 bool CombinerHelper::dominates(const MachineInstr &DefMI,
 568                                const MachineInstr &UseMI) {
 569   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
 570          "shouldn't consider debug uses");
 571   if (MDT)
 572     return MDT->dominates(&DefMI, &UseMI);
 573   else if (DefMI.getParent() != UseMI.getParent())
 574     return false;
 575
 576   return isPredecessor(DefMI, UseMI);
 577 }
 578
 579 bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
 580   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
 581   Register SrcReg = MI.getOperand(1).getReg();
 582   unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
 583   unsigned NumSextBits =
 584       MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
 585       MI.getOperand(2).getImm();
 586   return SrcSignBits >= NumSextBits;
 587 }
 588
 589 bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
 590   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
 591   MachineIRBuilder MIB(MI);
 592   MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
 593   MI.eraseFromParent();
 594   return true;
 595 }
 596
 597 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
 598                                             Register &Base, Register &Offset) {
 599   auto &MF = *MI.getParent()->getParent();
 600   const auto &TLI = *MF.getSubtarget().getTargetLowering();
 601
 602 #ifndef NDEBUG
 603   unsigned Opcode = MI.getOpcode();
 604   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
 605          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
 606 #endif
 607
 608   Base = MI.getOperand(1).getReg();
 609   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
 610   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
 611     return false;
 612
 613   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
 614
 615   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
 616     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
 617       continue;
 618
 619     Offset = Use.getOperand(2).getReg();
 620     if (!ForceLegalIndexing &&
 621         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
 622       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
 623                         << Use);
 624       continue;
 625     }
 626
 627     // Make sure the offset calculation is before the potentially indexed op.
 628     // FIXME: we really care about dependency here. The offset calculation might
 629     // be movable.
 630     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
 631     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
 632       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
 633                         << Use);
 634       continue;
 635     }
 636
 637     // FIXME: check whether all uses of Base are load/store with foldable
 638     // addressing modes. If so, using the normal addr-modes is better than
 639     // forming an indexed one.
 640
 641     bool MemOpDominatesAddrUses = true;
 642     for (auto &PtrAddUse :
 643          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
 644       if (!dominates(MI, PtrAddUse)) {
 645         MemOpDominatesAddrUses = false;
 646         break;
 647       }
 648     }
 649
 650     if (!MemOpDominatesAddrUses) {
 651       LLVM_DEBUG(
 652           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
 653                  << Use);
 654       continue;
 655     }
 656
 657     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
 658     Addr = Use.getOperand(0).getReg();
 659     return true;
 660   }
 661
 662   return false;
 663 }
 664
 665 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
 666                                            Register &Base, Register &Offset) {
 667   auto &MF = *MI.getParent()->getParent();
 668   const auto &TLI = *MF.getSubtarget().getTargetLowering();
 669
 670 #ifndef NDEBUG
 671   unsigned Opcode = MI.getOpcode();
 672   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
 673          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
 674 #endif
 675
 676   Addr = MI.getOperand(1).getReg();
 677   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
 678   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
 679     return false;
 680
 681   Base = AddrDef->getOperand(1).getReg();
 682   Offset = AddrDef->getOperand(2).getReg();
 683
 684   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
 685
 686   if (!ForceLegalIndexing &&
 687       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
 688     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
 689     return false;
 690   }
 691
 692   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
 693   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
 694     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
 695     return false;
 696   }
 697
 698   if (MI.getOpcode() == TargetOpcode::G_STORE) {
 699     // Would require a copy.
 700     if (Base == MI.getOperand(0).getReg()) {
 701       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
 702       return false;
 703     }
 704
 705     // We're expecting one use of Addr in MI, but it could also be the
 706     // value stored, which isn't actually dominated by the instruction.
 707     if (MI.getOperand(0).getReg() == Addr) {
 708       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
 709       return false;
 710     }
 711   }
 712
 713   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
 714   // That might allow us to end base's liveness here by adjusting the constant.
 715
 716   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
 717     if (!dominates(MI, UseMI)) {
 718       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
 719       return false;
 720     }
 721   }
 722
 723   return true;
 724 }
 725
 726 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
 727   IndexedLoadStoreMatchInfo MatchInfo;
 728   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
 729     applyCombineIndexedLoadStore(MI, MatchInfo);
 730     return true;
 731   }
 732   return false;
 733 }
 734
 735 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
 736   unsigned Opcode = MI.getOpcode();
 737   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
 738       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
 739     return false;
 740
 741   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
 742                                           MatchInfo.Offset);
 743   if (!MatchInfo.IsPre &&
 744       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
 745                               MatchInfo.Offset))
 746     return false;
 747
 748   return true;
 749 }
 750
 751 void CombinerHelper::applyCombineIndexedLoadStore(
 752     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
 753   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
 754   MachineIRBuilder MIRBuilder(MI);
 755   unsigned Opcode = MI.getOpcode();
 756   bool IsStore = Opcode == TargetOpcode::G_STORE;
 757   unsigned NewOpcode;
 758   switch (Opcode) {
 759   case TargetOpcode::G_LOAD:
 760     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
 761     break;
 762   case TargetOpcode::G_SEXTLOAD:
 763     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
 764     break;
 765   case TargetOpcode::G_ZEXTLOAD:
 766     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
 767     break;
 768   case TargetOpcode::G_STORE:
 769     NewOpcode = TargetOpcode::G_INDEXED_STORE;
 770     break;
 771   default:
 772     llvm_unreachable("Unknown load/store opcode");
 773   }
 774
 775   auto MIB = MIRBuilder.buildInstr(NewOpcode);
 776   if (IsStore) {
 777     MIB.addDef(MatchInfo.Addr);
 778     MIB.addUse(MI.getOperand(0).getReg());
 779   } else {
 780     MIB.addDef(MI.getOperand(0).getReg());
 781     MIB.addDef(MatchInfo.Addr);
 782   }
 783
 784   MIB.addUse(MatchInfo.Base);
 785   MIB.addUse(MatchInfo.Offset);
 786   MIB.addImm(MatchInfo.IsPre);
 787   MI.eraseFromParent();
 788   AddrDef.eraseFromParent();
 789
 790   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
 791 }
 792
 793 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
 794   if (MI.getOpcode() != TargetOpcode::G_BR)
 795     return false;
 796
 797   // Try to match the following:
 798   // bb1:
 799   //   %c(s32) = G_ICMP pred, %a, %b
 800   //   %c1(s1) = G_TRUNC %c(s32)
 801   //   G_BRCOND %c1, %bb2
 802   //   G_BR %bb3
 803   // bb2:
 804   // ...
 805   // bb3:
 806
 807   // The above pattern does not have a fall through to the successor bb2, always
 808   // resulting in a branch no matter which path is taken. Here we try to find
 809   // and replace that pattern with conditional branch to bb3 and otherwise
 810   // fallthrough to bb2.
 811
 812   MachineBasicBlock *MBB = MI.getParent();
 813   MachineBasicBlock::iterator BrIt(MI);
 814   if (BrIt == MBB->begin())
 815     return false;
 816   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
 817
 818   MachineInstr *BrCond = &*std::prev(BrIt);
 819   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
 820     return false;
 821
 822   // Check that the next block is the conditional branch target.
 823   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
 824     return false;
 825
 826   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
 827   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
 828       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
 829     return false;
 830   return true;
 831 }
 832
 833 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
 834   if (!matchElideBrByInvertingCond(MI))
 835     return false;
 836   applyElideBrByInvertingCond(MI);
 837   return true;
 838 }
 839
 840 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
 841   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
 842   MachineBasicBlock::iterator BrIt(MI);
 843   MachineInstr *BrCond = &*std::prev(BrIt);
 844   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
 845
 846   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
 847       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
 848
 849   // Invert the G_ICMP condition.
 850   Observer.changingInstr(*CmpMI);
 851   CmpMI->getOperand(1).setPredicate(InversePred);
 852   Observer.changedInstr(*CmpMI);
 853
 854   // Change the conditional branch target.
 855   Observer.changingInstr(*BrCond);
 856   BrCond->getOperand(1).setMBB(BrTarget);
 857   Observer.changedInstr(*BrCond);
 858   MI.eraseFromParent();
 859 }
 860
 861 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
 862   // On Darwin, -Os means optimize for size without hurting performance, so
 863   // only really optimize for size when -Oz (MinSize) is used.
 864   if (MF.getTarget().getTargetTriple().isOSDarwin())
 865     return MF.getFunction().hasMinSize();
 866   return MF.getFunction().hasOptSize();
 867 }
 868
 869 // Returns a list of types to use for memory op lowering in MemOps. A partial
 870 // port of findOptimalMemOpLowering in TargetLowering.
 871 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
 872                                           unsigned Limit, const MemOp &Op,
 873                                           unsigned DstAS, unsigned SrcAS,
 874                                           const AttributeList &FuncAttributes,
 875                                           const TargetLowering &TLI) {
 876   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
 877     return false;
 878
 879   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
 880
 881   if (Ty == LLT()) {
 882     // Use the largest scalar type whose alignment constraints are satisfied.
 883     // We only need to check DstAlign here as SrcAlign is always greater or
 884     // equal to DstAlign (or zero).
 885     Ty = LLT::scalar(64);
 886     if (Op.isFixedDstAlign())
 887       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
 888              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
 889         Ty = LLT::scalar(Ty.getSizeInBytes());
 890     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
 891     // FIXME: check for the largest legal type we can load/store to.
 892   }
 893
 894   unsigned NumMemOps = 0;
 895   uint64_t Size = Op.size();
 896   while (Size) {
 897     unsigned TySize = Ty.getSizeInBytes();
 898     while (TySize > Size) {
 899       // For now, only use non-vector load / store's for the left-over pieces.
 900       LLT NewTy = Ty;
 901       // FIXME: check for mem op safety and legality of the types. Not all of
 902       // SDAGisms map cleanly to GISel concepts.
 903       if (NewTy.isVector())
 904         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
 905       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
 906       unsigned NewTySize = NewTy.getSizeInBytes();
 907       assert(NewTySize > 0 && "Could not find appropriate type");
 908
 909       // If the new LLT cannot cover all of the remaining bits, then consider
 910       // issuing a (or a pair of) unaligned and overlapping load / store.
 911       bool Fast;
 912       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
 913       MVT VT = getMVTForLLT(Ty);
 914       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
 915           TLI.allowsMisalignedMemoryAccesses(
 916               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
 917               MachineMemOperand::MONone, &Fast) &&
 918           Fast)
 919         TySize = Size;
 920       else {
 921         Ty = NewTy;
 922         TySize = NewTySize;
 923       }
 924     }
 925
 926     if (++NumMemOps > Limit)
 927       return false;
 928
 929     MemOps.push_back(Ty);
 930     Size -= TySize;
 931   }
 932
 933   return true;
 934 }
 935
 936 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
 937   if (Ty.isVector())
 938     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
 939                                 Ty.getNumElements());
 940   return IntegerType::get(C, Ty.getSizeInBits());
 941 }
 942
 943 // Get a vectorized representation of the memset value operand, GISel edition.
 944 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
 945   MachineRegisterInfo &MRI = *MIB.getMRI();
 946   unsigned NumBits = Ty.getScalarSizeInBits();
 947   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
 948   if (!Ty.isVector() && ValVRegAndVal) {
 949     unsigned KnownVal = ValVRegAndVal->Value;
 950     APInt Scalar = APInt(8, KnownVal);
 951     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
 952     return MIB.buildConstant(Ty, SplatVal).getReg(0);
 953   }
 954
 955   // Extend the byte value to the larger type, and then multiply by a magic
 956   // value 0x010101... in order to replicate it across every byte.
 957   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
 958   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
 959     return MIB.buildConstant(Ty, 0).getReg(0);
 960   }
 961
 962   LLT ExtType = Ty.getScalarType();
 963   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
 964   if (NumBits > 8) {
 965     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
 966     auto MagicMI = MIB.buildConstant(ExtType, Magic);
 967     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
 968   }
 969
 970   // For vector types create a G_BUILD_VECTOR.
 971   if (Ty.isVector())
 972     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
 973
 974   return Val;
 975 }
 976
 977 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
 978                                     Register Val, unsigned KnownLen,
 979                                     Align Alignment, bool IsVolatile) {
 980   auto &MF = *MI.getParent()->getParent();
 981   const auto &TLI = *MF.getSubtarget().getTargetLowering();
 982   auto &DL = MF.getDataLayout();
 983   LLVMContext &C = MF.getFunction().getContext();
 984
 985   assert(KnownLen != 0 && "Have a zero length memset length!");
 986
 987   bool DstAlignCanChange = false;
 988   MachineFrameInfo &MFI = MF.getFrameInfo();
 989   bool OptSize = shouldLowerMemFuncForSize(MF);
 990
 991   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
 992   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
 993     DstAlignCanChange = true;
 994
 995   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
 996   std::vector<LLT> MemOps;
 997
 998   const auto &DstMMO = **MI.memoperands_begin();
 999   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1000
1001   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1002   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1003
1004   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1005                                      MemOp::Set(KnownLen, DstAlignCanChange,
1006                                                 Alignment,
1007                                                 /*IsZeroMemset=*/IsZeroVal,
1008                                                 /*IsVolatile=*/IsVolatile),
1009                                      DstPtrInfo.getAddrSpace(), ~0u,
1010                                      MF.getFunction().getAttributes(), TLI))
1011     return false;
1012
1013   if (DstAlignCanChange) {
1014     // Get an estimate of the type from the LLT.
1015     Type *IRTy = getTypeForLLT(MemOps[0], C);
1016     Align NewAlign = DL.getABITypeAlign(IRTy);
1017     if (NewAlign > Alignment) {
1018       Alignment = NewAlign;
1019       unsigned FI = FIDef->getOperand(1).getIndex();
1020       // Give the stack frame object a larger alignment if needed.
1021       if (MFI.getObjectAlign(FI) < Alignment)
1022         MFI.setObjectAlignment(FI, Alignment);
1023     }
1024   }
1025
1026   MachineIRBuilder MIB(MI);
1027   // Find the largest store and generate the bit pattern for it.
1028   LLT LargestTy = MemOps[0];
1029   for (unsigned i = 1; i < MemOps.size(); i++)
1030     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1031       LargestTy = MemOps[i];
1032
1033   // The memset stored value is always defined as an s8, so in order to make it
1034   // work with larger store types we need to repeat the bit pattern across the
1035   // wider type.
1036   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1037
1038   if (!MemSetValue)
1039     return false;
1040
1041   // Generate the stores. For each store type in the list, we generate the
1042   // matching store of that type to the destination address.
1043   LLT PtrTy = MRI.getType(Dst);
1044   unsigned DstOff = 0;
1045   unsigned Size = KnownLen;
1046   for (unsigned I = 0; I < MemOps.size(); I++) {
1047     LLT Ty = MemOps[I];
1048     unsigned TySize = Ty.getSizeInBytes();
1049     if (TySize > Size) {
1050       // Issuing an unaligned load / store pair that overlaps with the previous
1051       // pair. Adjust the offset accordingly.
1052       assert(I == MemOps.size() - 1 && I != 0);
1053       DstOff -= TySize - Size;
1054     }
1055
1056     // If this store is smaller than the largest store see whether we can get
1057     // the smaller value for free with a truncate.
1058     Register Value = MemSetValue;
1059     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1060       MVT VT = getMVTForLLT(Ty);
1061       MVT LargestVT = getMVTForLLT(LargestTy);
1062       if (!LargestTy.isVector() && !Ty.isVector() &&
1063           TLI.isTruncateFree(LargestVT, VT))
1064         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1065       else
1066         Value = getMemsetValue(Val, Ty, MIB);
1067       if (!Value)
1068         return false;
1069     }
1070
1071     auto *StoreMMO =
1072         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1073
1074     Register Ptr = Dst;
1075     if (DstOff != 0) {
1076       auto Offset =
1077           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1078       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1079     }
1080
1081     MIB.buildStore(Value, Ptr, *StoreMMO);
1082     DstOff += Ty.getSizeInBytes();
1083     Size -= TySize;
1084   }
1085
1086   MI.eraseFromParent();
1087   return true;
1088 }
1089
1090 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1091                                     Register Src, unsigned KnownLen,
1092                                     Align DstAlign, Align SrcAlign,
1093                                     bool IsVolatile) {
1094   auto &MF = *MI.getParent()->getParent();
1095   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1096   auto &DL = MF.getDataLayout();
1097   LLVMContext &C = MF.getFunction().getContext();
1098
1099   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1100
1101   bool DstAlignCanChange = false;
1102   MachineFrameInfo &MFI = MF.getFrameInfo();
1103   bool OptSize = shouldLowerMemFuncForSize(MF);
1104   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1105
1106   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1107   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1108     DstAlignCanChange = true;
1109
1110   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1111   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1112   // if the memcpy is in a tail call position.
1113
1114   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1115   std::vector<LLT> MemOps;
1116
1117   const auto &DstMMO = **MI.memoperands_begin();
1118   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1119   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1120   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1121
1122   if (!findGISelOptimalMemOpLowering(
1123           MemOps, Limit,
1124           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1125                       IsVolatile),
1126           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1127           MF.getFunction().getAttributes(), TLI))
1128     return false;
1129
1130   if (DstAlignCanChange) {
1131     // Get an estimate of the type from the LLT.
1132     Type *IRTy = getTypeForLLT(MemOps[0], C);
1133     Align NewAlign = DL.getABITypeAlign(IRTy);
1134
1135     // Don't promote to an alignment that would require dynamic stack
1136     // realignment.
1137     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1138     if (!TRI->needsStackRealignment(MF))
1139       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1140         NewAlign = NewAlign / 2;
1141
1142     if (NewAlign > Alignment) {
1143       Alignment = NewAlign;
1144       unsigned FI = FIDef->getOperand(1).getIndex();
1145       // Give the stack frame object a larger alignment if needed.
1146       if (MFI.getObjectAlign(FI) < Alignment)
1147         MFI.setObjectAlignment(FI, Alignment);
1148     }
1149   }
1150
1151   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1152
1153   MachineIRBuilder MIB(MI);
1154   // Now we need to emit a pair of load and stores for each of the types we've
1155   // collected. I.e. for each type, generate a load from the source pointer of
1156   // that type width, and then generate a corresponding store to the dest buffer
1157   // of that value loaded. This can result in a sequence of loads and stores
1158   // mixed types, depending on what the target specifies as good types to use.
1159   unsigned CurrOffset = 0;
1160   LLT PtrTy = MRI.getType(Src);
1161   unsigned Size = KnownLen;
1162   for (auto CopyTy : MemOps) {
1163     // Issuing an unaligned load / store pair  that overlaps with the previous
1164     // pair. Adjust the offset accordingly.
1165     if (CopyTy.getSizeInBytes() > Size)
1166       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1167
1168     // Construct MMOs for the accesses.
1169     auto *LoadMMO =
1170         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1171     auto *StoreMMO =
1172         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1173
1174     // Create the load.
1175     Register LoadPtr = Src;
1176     Register Offset;
1177     if (CurrOffset != 0) {
1178       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1179                    .getReg(0);
1180       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1181     }
1182     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1183
1184     // Create the store.
1185     Register StorePtr =
1186         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1187     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1188     CurrOffset += CopyTy.getSizeInBytes();
1189     Size -= CopyTy.getSizeInBytes();
1190   }
1191
1192   MI.eraseFromParent();
1193   return true;
1194 }
1195
1196 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1197                                      Register Src, unsigned KnownLen,
1198                                      Align DstAlign, Align SrcAlign,
1199                                      bool IsVolatile) {
1200   auto &MF = *MI.getParent()->getParent();
1201   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1202   auto &DL = MF.getDataLayout();
1203   LLVMContext &C = MF.getFunction().getContext();
1204
1205   assert(KnownLen != 0 && "Have a zero length memmove length!");
1206
1207   bool DstAlignCanChange = false;
1208   MachineFrameInfo &MFI = MF.getFrameInfo();
1209   bool OptSize = shouldLowerMemFuncForSize(MF);
1210   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1211
1212   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1213   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1214     DstAlignCanChange = true;
1215
1216   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1217   std::vector<LLT> MemOps;
1218
1219   const auto &DstMMO = **MI.memoperands_begin();
1220   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1221   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1222   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1223
1224   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1225   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1226   // same thing here.
1227   if (!findGISelOptimalMemOpLowering(
1228           MemOps, Limit,
1229           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1230                       /*IsVolatile*/ true),
1231           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1232           MF.getFunction().getAttributes(), TLI))
1233     return false;
1234
1235   if (DstAlignCanChange) {
1236     // Get an estimate of the type from the LLT.
1237     Type *IRTy = getTypeForLLT(MemOps[0], C);
1238     Align NewAlign = DL.getABITypeAlign(IRTy);
1239
1240     // Don't promote to an alignment that would require dynamic stack
1241     // realignment.
1242     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1243     if (!TRI->needsStackRealignment(MF))
1244       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1245         NewAlign = NewAlign / 2;
1246
1247     if (NewAlign > Alignment) {
1248       Alignment = NewAlign;
1249       unsigned FI = FIDef->getOperand(1).getIndex();
1250       // Give the stack frame object a larger alignment if needed.
1251       if (MFI.getObjectAlign(FI) < Alignment)
1252         MFI.setObjectAlignment(FI, Alignment);
1253     }
1254   }
1255
1256   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1257
1258   MachineIRBuilder MIB(MI);
1259   // Memmove requires that we perform the loads first before issuing the stores.
1260   // Apart from that, this loop is pretty much doing the same thing as the
1261   // memcpy codegen function.
1262   unsigned CurrOffset = 0;
1263   LLT PtrTy = MRI.getType(Src);
1264   SmallVector<Register, 16> LoadVals;
1265   for (auto CopyTy : MemOps) {
1266     // Construct MMO for the load.
1267     auto *LoadMMO =
1268         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1269
1270     // Create the load.
1271     Register LoadPtr = Src;
1272     if (CurrOffset != 0) {
1273       auto Offset =
1274           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1275       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1276     }
1277     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1278     CurrOffset += CopyTy.getSizeInBytes();
1279   }
1280
1281   CurrOffset = 0;
1282   for (unsigned I = 0; I < MemOps.size(); ++I) {
1283     LLT CopyTy = MemOps[I];
1284     // Now store the values loaded.
1285     auto *StoreMMO =
1286         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1287
1288     Register StorePtr = Dst;
1289     if (CurrOffset != 0) {
1290       auto Offset =
1291           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1292       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1293     }
1294     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1295     CurrOffset += CopyTy.getSizeInBytes();
1296   }
1297   MI.eraseFromParent();
1298   return true;
1299 }
1300
1301 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1302   // This combine is fairly complex so it's not written with a separate
1303   // matcher function.
1304   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1305   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1306   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1307           ID == Intrinsic::memset) &&
1308          "Expected a memcpy like intrinsic");
1309
1310   auto MMOIt = MI.memoperands_begin();
1311   const MachineMemOperand *MemOp = *MMOIt;
1312   bool IsVolatile = MemOp->isVolatile();
1313   // Don't try to optimize volatile.
1314   if (IsVolatile)
1315     return false;
1316
1317   Align DstAlign = MemOp->getBaseAlign();
1318   Align SrcAlign;
1319   Register Dst = MI.getOperand(1).getReg();
1320   Register Src = MI.getOperand(2).getReg();
1321   Register Len = MI.getOperand(3).getReg();
1322
1323   if (ID != Intrinsic::memset) {
1324     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1325     MemOp = *(++MMOIt);
1326     SrcAlign = MemOp->getBaseAlign();
1327   }
1328
1329   // See if this is a constant length copy
1330   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1331   if (!LenVRegAndVal)
1332     return false; // Leave it to the legalizer to lower it to a libcall.
1333   unsigned KnownLen = LenVRegAndVal->Value;
1334
1335   if (KnownLen == 0) {
1336     MI.eraseFromParent();
1337     return true;
1338   }
1339
1340   if (MaxLen && KnownLen > MaxLen)
1341     return false;
1342
1343   if (ID == Intrinsic::memcpy)
1344     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1345   if (ID == Intrinsic::memmove)
1346     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1347   if (ID == Intrinsic::memset)
1348     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1349   return false;
1350 }
1351
1352 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1353                                            PtrAddChain &MatchInfo) {
1354   // We're trying to match the following pattern:
1355   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1356   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1357   // -->
1358   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1359
1360   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1361     return false;
1362
1363   Register Add2 = MI.getOperand(1).getReg();
1364   Register Imm1 = MI.getOperand(2).getReg();
1365   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1366   if (!MaybeImmVal)
1367     return false;
1368
1369   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1370   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1371     return false;
1372
1373   Register Base = Add2Def->getOperand(1).getReg();
1374   Register Imm2 = Add2Def->getOperand(2).getReg();
1375   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1376   if (!MaybeImm2Val)
1377     return false;
1378
1379   // Pass the combined immediate to the apply function.
1380   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1381   MatchInfo.Base = Base;
1382   return true;
1383 }
1384
1385 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1386                                            PtrAddChain &MatchInfo) {
1387   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1388   MachineIRBuilder MIB(MI);
1389   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1390   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1391   Observer.changingInstr(MI);
1392   MI.getOperand(1).setReg(MatchInfo.Base);
1393   MI.getOperand(2).setReg(NewOffset.getReg(0));
1394   Observer.changedInstr(MI);
1395   return true;
1396 }
1397
1398 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1399                                           unsigned &ShiftVal) {
1400   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1401   auto MaybeImmVal =
1402       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1403   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1404     return false;
1405   ShiftVal = Log2_64(MaybeImmVal->Value);
1406   return true;
1407 }
1408
1409 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1410                                           unsigned &ShiftVal) {
1411   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1412   MachineIRBuilder MIB(MI);
1413   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1414   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1415   Observer.changingInstr(MI);
1416   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1417   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1418   Observer.changedInstr(MI);
1419   return true;
1420 }
1421
1422 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1423                                                 unsigned TargetShiftSize,
1424                                                 unsigned &ShiftVal) {
1425   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1426           MI.getOpcode() == TargetOpcode::G_LSHR ||
1427           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1428
1429   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1430   if (Ty.isVector()) // TODO:
1431     return false;
1432
1433   // Don't narrow further than the requested size.
1434   unsigned Size = Ty.getSizeInBits();
1435   if (Size <= TargetShiftSize)
1436     return false;
1437
1438   auto MaybeImmVal =
1439     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1440   if (!MaybeImmVal)
1441     return false;
1442
1443   ShiftVal = MaybeImmVal->Value;
1444   return ShiftVal >= Size / 2 && ShiftVal < Size;
1445 }
1446
1447 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1448                                                 const unsigned &ShiftVal) {
1449   Register DstReg = MI.getOperand(0).getReg();
1450   Register SrcReg = MI.getOperand(1).getReg();
1451   LLT Ty = MRI.getType(SrcReg);
1452   unsigned Size = Ty.getSizeInBits();
1453   unsigned HalfSize = Size / 2;
1454   assert(ShiftVal >= HalfSize);
1455
1456   LLT HalfTy = LLT::scalar(HalfSize);
1457
1458   Builder.setInstr(MI);
1459   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1460   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1461
1462   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1463     Register Narrowed = Unmerge.getReg(1);
1464
1465     //  dst = G_LSHR s64:x, C for C >= 32
1466     // =>
1467     //   lo, hi = G_UNMERGE_VALUES x
1468     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1469
1470     if (NarrowShiftAmt != 0) {
1471       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1472         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1473     }
1474
1475     auto Zero = Builder.buildConstant(HalfTy, 0);
1476     Builder.buildMerge(DstReg, { Narrowed, Zero });
1477   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1478     Register Narrowed = Unmerge.getReg(0);
1479     //  dst = G_SHL s64:x, C for C >= 32
1480     // =>
1481     //   lo, hi = G_UNMERGE_VALUES x
1482     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1483     if (NarrowShiftAmt != 0) {
1484       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1485         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1486     }
1487
1488     auto Zero = Builder.buildConstant(HalfTy, 0);
1489     Builder.buildMerge(DstReg, { Zero, Narrowed });
1490   } else {
1491     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1492     auto Hi = Builder.buildAShr(
1493       HalfTy, Unmerge.getReg(1),
1494       Builder.buildConstant(HalfTy, HalfSize - 1));
1495
1496     if (ShiftVal == HalfSize) {
1497       // (G_ASHR i64:x, 32) ->
1498       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1499       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1500     } else if (ShiftVal == Size - 1) {
1501       // Don't need a second shift.
1502       // (G_ASHR i64:x, 63) ->
1503       //   %narrowed = (G_ASHR hi_32(x), 31)
1504       //   G_MERGE_VALUES %narrowed, %narrowed
1505       Builder.buildMerge(DstReg, { Hi, Hi });
1506     } else {
1507       auto Lo = Builder.buildAShr(
1508         HalfTy, Unmerge.getReg(1),
1509         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1510
1511       // (G_ASHR i64:x, C) ->, for C >= 32
1512       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1513       Builder.buildMerge(DstReg, { Lo, Hi });
1514     }
1515   }
1516
1517   MI.eraseFromParent();
1518   return true;
1519 }
1520
1521 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1522                                               unsigned TargetShiftAmount) {
1523   unsigned ShiftAmt;
1524   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1525     applyCombineShiftToUnmerge(MI, ShiftAmt);
1526     return true;
1527   }
1528
1529   return false;
1530 }
1531
1532 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1533   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1534     return MO.isReg() &&
1535            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1536   });
1537 }
1538
1539 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1540   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1541     return !MO.isReg() ||
1542            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1543   });
1544 }
1545
1546 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1547   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1548   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1549   return all_of(Mask, [](int Elt) { return Elt < 0; });
1550 }
1551
1552 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1553   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1554   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1555                       MRI);
1556 }
1557
1558 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1559   MI.eraseFromParent();
1560   return true;
1561 }
1562
1563 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1564                                     const MachineOperand &MOP2) {
1565   if (!MOP1.isReg() || !MOP2.isReg())
1566     return false;
1567   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1568   if (!I1)
1569     return false;
1570   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1571   if (!I2)
1572     return false;
1573
1574   // Handle a case like this:
1575   //
1576   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1577   //
1578   // Even though %0 and %1 are produced by the same instruction they are not
1579   // the same values.
1580   if (I1 == I2)
1581     return MOP1.getReg() == MOP2.getReg();
1582
1583   // If we have an instruction which loads or stores, we can't guarantee that
1584   // it is identical.
1585   //
1586   // For example, we may have
1587   //
1588   // %x1 = G_LOAD %addr (load N from @somewhere)
1589   // ...
1590   // call @foo
1591   // ...
1592   // %x2 = G_LOAD %addr (load N from @somewhere)
1593   // ...
1594   // %or = G_OR %x1, %x2
1595   //
1596   // It's possible that @foo will modify whatever lives at the address we're
1597   // loading from. To be safe, let's just assume that all loads and stores
1598   // are different (unless we have something which is guaranteed to not
1599   // change.)
1600   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1601     return false;
1602
1603   // Check for physical registers on the instructions first to avoid cases
1604   // like this:
1605   //
1606   // %a = COPY $physreg
1607   // ...
1608   // SOMETHING implicit-def $physreg
1609   // ...
1610   // %b = COPY $physreg
1611   //
1612   // These copies are not equivalent.
1613   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1614         return MO.isReg() && MO.getReg().isPhysical();
1615       })) {
1616     // Check if we have a case like this:
1617     //
1618     // %a = COPY $physreg
1619     // %b = COPY %a
1620     //
1621     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1622     // From that, we know that they must have the same value, since they must
1623     // have come from the same COPY.
1624     return I1->isIdenticalTo(*I2);
1625   }
1626
1627   // We don't have any physical registers, so we don't necessarily need the
1628   // same vreg defs.
1629   //
1630   // On the off-chance that there's some target instruction feeding into the
1631   // instruction, let's use produceSameValue instead of isIdenticalTo.
1632   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1633 }
1634
1635 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1636   if (!MOP.isReg())
1637     return false;
1638   // MIPatternMatch doesn't let us look through G_ZEXT etc.
1639   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1640   return ValAndVReg && ValAndVReg->Value == C;
1641 }
1642
1643 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1644                                                      unsigned OpIdx) {
1645   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1646   Register OldReg = MI.getOperand(0).getReg();
1647   Register Replacement = MI.getOperand(OpIdx).getReg();
1648   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1649   MI.eraseFromParent();
1650   replaceRegWith(MRI, OldReg, Replacement);
1651   return true;
1652 }
1653
1654 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1655   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1656   // Match (cond ? x : x)
1657   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1658          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1659                        MRI);
1660 }
1661
1662 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1663   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1664          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1665                        MRI);
1666 }
1667
1668 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1669   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1670          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1671                        MRI);
1672 }
1673
1674 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1675   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1676   Builder.setInstr(MI);
1677   Builder.buildFConstant(MI.getOperand(0), C);
1678   MI.eraseFromParent();
1679   return true;
1680 }
1681
1682 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1683   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1684   Builder.setInstr(MI);
1685   Builder.buildConstant(MI.getOperand(0), C);
1686   MI.eraseFromParent();
1687   return true;
1688 }
1689
1690 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1691   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1692   Builder.setInstr(MI);
1693   Builder.buildUndef(MI.getOperand(0));
1694   MI.eraseFromParent();
1695   return true;
1696 }
1697
1698 bool CombinerHelper::matchSimplifyAddToSub(
1699     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1700   Register LHS = MI.getOperand(1).getReg();
1701   Register RHS = MI.getOperand(2).getReg();
1702   Register &NewLHS = std::get<0>(MatchInfo);
1703   Register &NewRHS = std::get<1>(MatchInfo);
1704
1705   // Helper lambda to check for opportunities for
1706   // ((0-A) + B) -> B - A
1707   // (A + (0-B)) -> A - B
1708   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1709     int64_t Cst;
1710     if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1711         Cst != 0)
1712       return false;
1713     NewLHS = MaybeNewLHS;
1714     return true;
1715   };
1716
1717   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1718 }
1719
1720 bool CombinerHelper::applySimplifyAddToSub(
1721     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1722   Builder.setInstr(MI);
1723   Register SubLHS, SubRHS;
1724   std::tie(SubLHS, SubRHS) = MatchInfo;
1725   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1726   MI.eraseFromParent();
1727   return true;
1728 }
1729
1730 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1731   if (tryCombineCopy(MI))
1732     return true;
1733   if (tryCombineExtendingLoads(MI))
1734     return true;
1735   if (tryCombineIndexedLoadStore(MI))
1736     return true;
1737   return false;
1738 }