contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

   1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file implements the targeting of the InstructionSelector class for
  10 /// AArch64.
  11 /// \todo This should be generated by TableGen.
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64InstrInfo.h"
  15 #include "AArch64MachineFunctionInfo.h"
  16 #include "AArch64RegisterBankInfo.h"
  17 #include "AArch64RegisterInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "AArch64TargetMachine.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "llvm/ADT/Optional.h"
  22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  26 #include "llvm/CodeGen/GlobalISel/Utils.h"
  27 #include "llvm/CodeGen/MachineBasicBlock.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineInstr.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineOperand.h"
  33 #include "llvm/CodeGen/MachineRegisterInfo.h"
  34 #include "llvm/IR/Type.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 #define DEBUG_TYPE "aarch64-isel"
  39
  40 using namespace llvm;
  41
  42 namespace {
  43
  44 #define GET_GLOBALISEL_PREDICATE_BITSET
  45 #include "AArch64GenGlobalISel.inc"
  46 #undef GET_GLOBALISEL_PREDICATE_BITSET
  47
  48 class AArch64InstructionSelector : public InstructionSelector {
  49 public:
  50   AArch64InstructionSelector(const AArch64TargetMachine &TM,
  51                              const AArch64Subtarget &STI,
  52                              const AArch64RegisterBankInfo &RBI);
  53
  54   bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
  55   static const char *getName() { return DEBUG_TYPE; }
  56
  57 private:
  58   /// tblgen-erated 'select' implementation, used as the initial selector for
  59   /// the patterns that don't require complex C++.
  60   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  61
  62   // A lowering phase that runs before any selection attempts.
  63
  64   void preISelLower(MachineInstr &I) const;
  65
  66   // An early selection function that runs before the selectImpl() call.
  67   bool earlySelect(MachineInstr &I) const;
  68
  69   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  70
  71   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
  72                           MachineRegisterInfo &MRI) const;
  73   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
  74                            MachineRegisterInfo &MRI) const;
  75
  76   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
  77                            MachineRegisterInfo &MRI) const;
  78
  79   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
  80   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
  81
  82   // Helper to generate an equivalent of scalar_to_vector into a new register,
  83   // returned via 'Dst'.
  84   MachineInstr *emitScalarToVector(unsigned EltSize,
  85                                    const TargetRegisterClass *DstRC,
  86                                    Register Scalar,
  87                                    MachineIRBuilder &MIRBuilder) const;
  88
  89   /// Emit a lane insert into \p DstReg, or a new vector register if None is
  90   /// provided.
  91   ///
  92   /// The lane inserted into is defined by \p LaneIdx. The vector source
  93   /// register is given by \p SrcReg. The register containing the element is
  94   /// given by \p EltReg.
  95   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
  96                                Register EltReg, unsigned LaneIdx,
  97                                const RegisterBank &RB,
  98                                MachineIRBuilder &MIRBuilder) const;
  99   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 100   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 101   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 102   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
 103
 104   void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
 105                                  SmallVectorImpl<Optional<int>> &Idxs) const;
 106   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
 107   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
 108   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
 109   bool selectSplitVectorUnmerge(MachineInstr &I,
 110                                 MachineRegisterInfo &MRI) const;
 111   bool selectIntrinsicWithSideEffects(MachineInstr &I,
 112                                       MachineRegisterInfo &MRI) const;
 113   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
 114   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
 115   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
 116   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
 117   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
 118   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
 119
 120   unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
 121   MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
 122                                          MachineIRBuilder &MIRBuilder) const;
 123
 124   // Emit a vector concat operation.
 125   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
 126                                  Register Op2,
 127                                  MachineIRBuilder &MIRBuilder) const;
 128   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 129                                    MachineOperand &Predicate,
 130                                    MachineIRBuilder &MIRBuilder) const;
 131   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
 132                         MachineIRBuilder &MIRBuilder) const;
 133   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
 134                         MachineIRBuilder &MIRBuilder) const;
 135   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
 136                                      const RegisterBank &DstRB, LLT ScalarTy,
 137                                      Register VecReg, unsigned LaneIdx,
 138                                      MachineIRBuilder &MIRBuilder) const;
 139
 140   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
 141   /// materialized using a FMOV instruction, then update MI and return it.
 142   /// Otherwise, do nothing and return a nullptr.
 143   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
 144                                      MachineRegisterInfo &MRI) const;
 145
 146   /// Emit a CSet for a compare.
 147   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
 148                                 MachineIRBuilder &MIRBuilder) const;
 149
 150   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
 151   // We use these manually instead of using the importer since it doesn't
 152   // support SDNodeXForm.
 153   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
 154   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
 155   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
 156   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
 157
 158   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 159
 160   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
 161                                             unsigned Size) const;
 162
 163   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
 164     return selectAddrModeUnscaled(Root, 1);
 165   }
 166   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
 167     return selectAddrModeUnscaled(Root, 2);
 168   }
 169   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
 170     return selectAddrModeUnscaled(Root, 4);
 171   }
 172   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
 173     return selectAddrModeUnscaled(Root, 8);
 174   }
 175   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
 176     return selectAddrModeUnscaled(Root, 16);
 177   }
 178
 179   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
 180                                            unsigned Size) const;
 181   template <int Width>
 182   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
 183     return selectAddrModeIndexed(Root, Width / 8);
 184   }
 185
 186   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
 187
 188   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
 189   void materializeLargeCMVal(MachineInstr &I, const Value *V,
 190                              unsigned char OpFlags) const;
 191
 192   // Optimization methods.
 193   bool tryOptVectorShuffle(MachineInstr &I) const;
 194   bool tryOptVectorDup(MachineInstr &MI) const;
 195   bool tryOptSelect(MachineInstr &MI) const;
 196   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
 197                                       MachineOperand &Predicate,
 198                                       MachineIRBuilder &MIRBuilder) const;
 199
 200   const AArch64TargetMachine &TM;
 201   const AArch64Subtarget &STI;
 202   const AArch64InstrInfo &TII;
 203   const AArch64RegisterInfo &TRI;
 204   const AArch64RegisterBankInfo &RBI;
 205
 206 #define GET_GLOBALISEL_PREDICATES_DECL
 207 #include "AArch64GenGlobalISel.inc"
 208 #undef GET_GLOBALISEL_PREDICATES_DECL
 209
 210 // We declare the temporaries used by selectImpl() in the class to minimize the
 211 // cost of constructing placeholder values.
 212 #define GET_GLOBALISEL_TEMPORARIES_DECL
 213 #include "AArch64GenGlobalISel.inc"
 214 #undef GET_GLOBALISEL_TEMPORARIES_DECL
 215 };
 216
 217 } // end anonymous namespace
 218
 219 #define GET_GLOBALISEL_IMPL
 220 #include "AArch64GenGlobalISel.inc"
 221 #undef GET_GLOBALISEL_IMPL
 222
 223 AArch64InstructionSelector::AArch64InstructionSelector(
 224     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
 225     const AArch64RegisterBankInfo &RBI)
 226     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
 227       TRI(*STI.getRegisterInfo()), RBI(RBI),
 228 #define GET_GLOBALISEL_PREDICATES_INIT
 229 #include "AArch64GenGlobalISel.inc"
 230 #undef GET_GLOBALISEL_PREDICATES_INIT
 231 #define GET_GLOBALISEL_TEMPORARIES_INIT
 232 #include "AArch64GenGlobalISel.inc"
 233 #undef GET_GLOBALISEL_TEMPORARIES_INIT
 234 {
 235 }
 236
 237 // FIXME: This should be target-independent, inferred from the types declared
 238 // for each class in the bank.
 239 static const TargetRegisterClass *
 240 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 241                          const RegisterBankInfo &RBI,
 242                          bool GetAllRegSet = false) {
 243   if (RB.getID() == AArch64::GPRRegBankID) {
 244     if (Ty.getSizeInBits() <= 32)
 245       return GetAllRegSet ? &AArch64::GPR32allRegClass
 246                           : &AArch64::GPR32RegClass;
 247     if (Ty.getSizeInBits() == 64)
 248       return GetAllRegSet ? &AArch64::GPR64allRegClass
 249                           : &AArch64::GPR64RegClass;
 250     return nullptr;
 251   }
 252
 253   if (RB.getID() == AArch64::FPRRegBankID) {
 254     if (Ty.getSizeInBits() <= 16)
 255       return &AArch64::FPR16RegClass;
 256     if (Ty.getSizeInBits() == 32)
 257       return &AArch64::FPR32RegClass;
 258     if (Ty.getSizeInBits() == 64)
 259       return &AArch64::FPR64RegClass;
 260     if (Ty.getSizeInBits() == 128)
 261       return &AArch64::FPR128RegClass;
 262     return nullptr;
 263   }
 264
 265   return nullptr;
 266 }
 267
 268 /// Given a register bank, and size in bits, return the smallest register class
 269 /// that can represent that combination.
 270 static const TargetRegisterClass *
 271 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
 272                       bool GetAllRegSet = false) {
 273   unsigned RegBankID = RB.getID();
 274
 275   if (RegBankID == AArch64::GPRRegBankID) {
 276     if (SizeInBits <= 32)
 277       return GetAllRegSet ? &AArch64::GPR32allRegClass
 278                           : &AArch64::GPR32RegClass;
 279     if (SizeInBits == 64)
 280       return GetAllRegSet ? &AArch64::GPR64allRegClass
 281                           : &AArch64::GPR64RegClass;
 282   }
 283
 284   if (RegBankID == AArch64::FPRRegBankID) {
 285     switch (SizeInBits) {
 286     default:
 287       return nullptr;
 288     case 8:
 289       return &AArch64::FPR8RegClass;
 290     case 16:
 291       return &AArch64::FPR16RegClass;
 292     case 32:
 293       return &AArch64::FPR32RegClass;
 294     case 64:
 295       return &AArch64::FPR64RegClass;
 296     case 128:
 297       return &AArch64::FPR128RegClass;
 298     }
 299   }
 300
 301   return nullptr;
 302 }
 303
 304 /// Returns the correct subregister to use for a given register class.
 305 static bool getSubRegForClass(const TargetRegisterClass *RC,
 306                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
 307   switch (TRI.getRegSizeInBits(*RC)) {
 308   case 8:
 309     SubReg = AArch64::bsub;
 310     break;
 311   case 16:
 312     SubReg = AArch64::hsub;
 313     break;
 314   case 32:
 315     if (RC == &AArch64::GPR32RegClass)
 316       SubReg = AArch64::sub_32;
 317     else
 318       SubReg = AArch64::ssub;
 319     break;
 320   case 64:
 321     SubReg = AArch64::dsub;
 322     break;
 323   default:
 324     LLVM_DEBUG(
 325         dbgs() << "Couldn't find appropriate subregister for register class.");
 326     return false;
 327   }
 328
 329   return true;
 330 }
 331
 332 /// Check whether \p I is a currently unsupported binary operation:
 333 /// - it has an unsized type
 334 /// - an operand is not a vreg
 335 /// - all operands are not in the same bank
 336 /// These are checks that should someday live in the verifier, but right now,
 337 /// these are mostly limitations of the aarch64 selector.
 338 static bool unsupportedBinOp(const MachineInstr &I,
 339                              const AArch64RegisterBankInfo &RBI,
 340                              const MachineRegisterInfo &MRI,
 341                              const AArch64RegisterInfo &TRI) {
 342   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 343   if (!Ty.isValid()) {
 344     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
 345     return true;
 346   }
 347
 348   const RegisterBank *PrevOpBank = nullptr;
 349   for (auto &MO : I.operands()) {
 350     // FIXME: Support non-register operands.
 351     if (!MO.isReg()) {
 352       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
 353       return true;
 354     }
 355
 356     // FIXME: Can generic operations have physical registers operands? If
 357     // so, this will need to be taught about that, and we'll need to get the
 358     // bank out of the minimal class for the register.
 359     // Either way, this needs to be documented (and possibly verified).
 360     if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
 361       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
 362       return true;
 363     }
 364
 365     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
 366     if (!OpBank) {
 367       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
 368       return true;
 369     }
 370
 371     if (PrevOpBank && OpBank != PrevOpBank) {
 372       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
 373       return true;
 374     }
 375     PrevOpBank = OpBank;
 376   }
 377   return false;
 378 }
 379
 380 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
 381 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
 382 /// and of size \p OpSize.
 383 /// \returns \p GenericOpc if the combination is unsupported.
 384 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
 385                                unsigned OpSize) {
 386   switch (RegBankID) {
 387   case AArch64::GPRRegBankID:
 388     if (OpSize == 32) {
 389       switch (GenericOpc) {
 390       case TargetOpcode::G_SHL:
 391         return AArch64::LSLVWr;
 392       case TargetOpcode::G_LSHR:
 393         return AArch64::LSRVWr;
 394       case TargetOpcode::G_ASHR:
 395         return AArch64::ASRVWr;
 396       default:
 397         return GenericOpc;
 398       }
 399     } else if (OpSize == 64) {
 400       switch (GenericOpc) {
 401       case TargetOpcode::G_GEP:
 402         return AArch64::ADDXrr;
 403       case TargetOpcode::G_SHL:
 404         return AArch64::LSLVXr;
 405       case TargetOpcode::G_LSHR:
 406         return AArch64::LSRVXr;
 407       case TargetOpcode::G_ASHR:
 408         return AArch64::ASRVXr;
 409       default:
 410         return GenericOpc;
 411       }
 412     }
 413     break;
 414   case AArch64::FPRRegBankID:
 415     switch (OpSize) {
 416     case 32:
 417       switch (GenericOpc) {
 418       case TargetOpcode::G_FADD:
 419         return AArch64::FADDSrr;
 420       case TargetOpcode::G_FSUB:
 421         return AArch64::FSUBSrr;
 422       case TargetOpcode::G_FMUL:
 423         return AArch64::FMULSrr;
 424       case TargetOpcode::G_FDIV:
 425         return AArch64::FDIVSrr;
 426       default:
 427         return GenericOpc;
 428       }
 429     case 64:
 430       switch (GenericOpc) {
 431       case TargetOpcode::G_FADD:
 432         return AArch64::FADDDrr;
 433       case TargetOpcode::G_FSUB:
 434         return AArch64::FSUBDrr;
 435       case TargetOpcode::G_FMUL:
 436         return AArch64::FMULDrr;
 437       case TargetOpcode::G_FDIV:
 438         return AArch64::FDIVDrr;
 439       case TargetOpcode::G_OR:
 440         return AArch64::ORRv8i8;
 441       default:
 442         return GenericOpc;
 443       }
 444     }
 445     break;
 446   }
 447   return GenericOpc;
 448 }
 449
 450 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
 451 /// appropriate for the (value) register bank \p RegBankID and of memory access
 452 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
 453 /// addressing mode (e.g., LDRXui).
 454 /// \returns \p GenericOpc if the combination is unsupported.
 455 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
 456                                     unsigned OpSize) {
 457   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
 458   switch (RegBankID) {
 459   case AArch64::GPRRegBankID:
 460     switch (OpSize) {
 461     case 8:
 462       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
 463     case 16:
 464       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
 465     case 32:
 466       return isStore ? AArch64::STRWui : AArch64::LDRWui;
 467     case 64:
 468       return isStore ? AArch64::STRXui : AArch64::LDRXui;
 469     }
 470     break;
 471   case AArch64::FPRRegBankID:
 472     switch (OpSize) {
 473     case 8:
 474       return isStore ? AArch64::STRBui : AArch64::LDRBui;
 475     case 16:
 476       return isStore ? AArch64::STRHui : AArch64::LDRHui;
 477     case 32:
 478       return isStore ? AArch64::STRSui : AArch64::LDRSui;
 479     case 64:
 480       return isStore ? AArch64::STRDui : AArch64::LDRDui;
 481     }
 482     break;
 483   }
 484   return GenericOpc;
 485 }
 486
 487 #ifndef NDEBUG
 488 /// Helper function that verifies that we have a valid copy at the end of
 489 /// selectCopy. Verifies that the source and dest have the expected sizes and
 490 /// then returns true.
 491 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
 492                         const MachineRegisterInfo &MRI,
 493                         const TargetRegisterInfo &TRI,
 494                         const RegisterBankInfo &RBI) {
 495   const unsigned DstReg = I.getOperand(0).getReg();
 496   const unsigned SrcReg = I.getOperand(1).getReg();
 497   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 498   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 499
 500   // Make sure the size of the source and dest line up.
 501   assert(
 502       (DstSize == SrcSize ||
 503        // Copies are a mean to setup initial types, the number of
 504        // bits may not exactly match.
 505        (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
 506        // Copies are a mean to copy bits around, as long as we are
 507        // on the same register class, that's fine. Otherwise, that
 508        // means we need some SUBREG_TO_REG or AND & co.
 509        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
 510       "Copy with different width?!");
 511
 512   // Check the size of the destination.
 513   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
 514          "GPRs cannot get more than 64-bit width values");
 515
 516   return true;
 517 }
 518 #endif
 519
 520 /// Helper function for selectCopy. Inserts a subregister copy from
 521 /// \p *From to \p *To, linking it up to \p I.
 522 ///
 523 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
 524 ///
 525 /// CopyReg (From class) = COPY SrcReg
 526 /// SubRegCopy (To class) = COPY CopyReg:SubReg
 527 /// Dst = COPY SubRegCopy
 528 static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
 529                                   const RegisterBankInfo &RBI, unsigned SrcReg,
 530                                   const TargetRegisterClass *From,
 531                                   const TargetRegisterClass *To,
 532                                   unsigned SubReg) {
 533   MachineIRBuilder MIB(I);
 534   auto Copy = MIB.buildCopy({From}, {SrcReg});
 535   auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
 536                         .addReg(Copy.getReg(0), 0, SubReg);
 537   MachineOperand &RegOp = I.getOperand(1);
 538   RegOp.setReg(SubRegCopy.getReg(0));
 539
 540   // It's possible that the destination register won't be constrained. Make
 541   // sure that happens.
 542   if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
 543     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
 544
 545   return true;
 546 }
 547
 548 /// Helper function to get the source and destination register classes for a
 549 /// copy. Returns a std::pair containing the source register class for the
 550 /// copy, and the destination register class for the copy. If a register class
 551 /// cannot be determined, then it will be nullptr.
 552 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 553 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
 554                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 555                      const RegisterBankInfo &RBI) {
 556   unsigned DstReg = I.getOperand(0).getReg();
 557   unsigned SrcReg = I.getOperand(1).getReg();
 558   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 559   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 560   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
 561   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 562
 563   // Special casing for cross-bank copies of s1s. We can technically represent
 564   // a 1-bit value with any size of register. The minimum size for a GPR is 32
 565   // bits. So, we need to put the FPR on 32 bits as well.
 566   //
 567   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
 568   // then we can pull it into the helpers that get the appropriate class for a
 569   // register bank. Or make a new helper that carries along some constraint
 570   // information.
 571   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
 572     SrcSize = DstSize = 32;
 573
 574   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
 575           getMinClassForRegBank(DstRegBank, DstSize, true)};
 576 }
 577
 578 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 579                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
 580                        const RegisterBankInfo &RBI) {
 581
 582   unsigned DstReg = I.getOperand(0).getReg();
 583   unsigned SrcReg = I.getOperand(1).getReg();
 584   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
 585   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 586
 587   // Find the correct register classes for the source and destination registers.
 588   const TargetRegisterClass *SrcRC;
 589   const TargetRegisterClass *DstRC;
 590   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
 591
 592   if (!DstRC) {
 593     LLVM_DEBUG(dbgs() << "Unexpected dest size "
 594                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
 595     return false;
 596   }
 597
 598   // A couple helpers below, for making sure that the copy we produce is valid.
 599
 600   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
 601   // to verify that the src and dst are the same size, since that's handled by
 602   // the SUBREG_TO_REG.
 603   bool KnownValid = false;
 604
 605   // Returns true, or asserts if something we don't expect happens. Instead of
 606   // returning true, we return isValidCopy() to ensure that we verify the
 607   // result.
 608   auto CheckCopy = [&]() {
 609     // If we have a bitcast or something, we can't have physical registers.
 610     assert(
 611         (I.isCopy() ||
 612          (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
 613           !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
 614         "No phys reg on generic operator!");
 615     assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
 616     (void)KnownValid;
 617     return true;
 618   };
 619
 620   // Is this a copy? If so, then we may need to insert a subregister copy, or
 621   // a SUBREG_TO_REG.
 622   if (I.isCopy()) {
 623     // Yes. Check if there's anything to fix up.
 624     if (!SrcRC) {
 625       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
 626       return false;
 627     }
 628
 629     // Is this a cross-bank copy?
 630     if (DstRegBank.getID() != SrcRegBank.getID()) {
 631       // If we're doing a cross-bank copy on different-sized registers, we need
 632       // to do a bit more work.
 633       unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
 634       unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 635
 636       if (SrcSize > DstSize) {
 637         // We're doing a cross-bank copy into a smaller register. We need a
 638         // subregister copy. First, get a register class that's on the same bank
 639         // as the destination, but the same size as the source.
 640         const TargetRegisterClass *SubregRC =
 641             getMinClassForRegBank(DstRegBank, SrcSize, true);
 642         assert(SubregRC && "Didn't get a register class for subreg?");
 643
 644         // Get the appropriate subregister for the destination.
 645         unsigned SubReg = 0;
 646         if (!getSubRegForClass(DstRC, TRI, SubReg)) {
 647           LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
 648           return false;
 649         }
 650
 651         // Now, insert a subregister copy using the new register class.
 652         selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
 653         return CheckCopy();
 654       }
 655
 656       else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
 657                SrcSize == 16) {
 658         // Special case for FPR16 to GPR32.
 659         // FIXME: This can probably be generalized like the above case.
 660         unsigned PromoteReg =
 661             MRI.createVirtualRegister(&AArch64::FPR32RegClass);
 662         BuildMI(*I.getParent(), I, I.getDebugLoc(),
 663                 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
 664             .addImm(0)
 665             .addUse(SrcReg)
 666             .addImm(AArch64::hsub);
 667         MachineOperand &RegOp = I.getOperand(1);
 668         RegOp.setReg(PromoteReg);
 669
 670         // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
 671         KnownValid = true;
 672       }
 673     }
 674
 675     // If the destination is a physical register, then there's nothing to
 676     // change, so we're done.
 677     if (TargetRegisterInfo::isPhysicalRegister(DstReg))
 678       return CheckCopy();
 679   }
 680
 681   // No need to constrain SrcReg. It will get constrained when we hit another
 682   // of its use or its defs. Copies do not have constraints.
 683   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
 684     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
 685                       << " operand\n");
 686     return false;
 687   }
 688   I.setDesc(TII.get(AArch64::COPY));
 689   return CheckCopy();
 690 }
 691
 692 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
 693   if (!DstTy.isScalar() || !SrcTy.isScalar())
 694     return GenericOpc;
 695
 696   const unsigned DstSize = DstTy.getSizeInBits();
 697   const unsigned SrcSize = SrcTy.getSizeInBits();
 698
 699   switch (DstSize) {
 700   case 32:
 701     switch (SrcSize) {
 702     case 32:
 703       switch (GenericOpc) {
 704       case TargetOpcode::G_SITOFP:
 705         return AArch64::SCVTFUWSri;
 706       case TargetOpcode::G_UITOFP:
 707         return AArch64::UCVTFUWSri;
 708       case TargetOpcode::G_FPTOSI:
 709         return AArch64::FCVTZSUWSr;
 710       case TargetOpcode::G_FPTOUI:
 711         return AArch64::FCVTZUUWSr;
 712       default:
 713         return GenericOpc;
 714       }
 715     case 64:
 716       switch (GenericOpc) {
 717       case TargetOpcode::G_SITOFP:
 718         return AArch64::SCVTFUXSri;
 719       case TargetOpcode::G_UITOFP:
 720         return AArch64::UCVTFUXSri;
 721       case TargetOpcode::G_FPTOSI:
 722         return AArch64::FCVTZSUWDr;
 723       case TargetOpcode::G_FPTOUI:
 724         return AArch64::FCVTZUUWDr;
 725       default:
 726         return GenericOpc;
 727       }
 728     default:
 729       return GenericOpc;
 730     }
 731   case 64:
 732     switch (SrcSize) {
 733     case 32:
 734       switch (GenericOpc) {
 735       case TargetOpcode::G_SITOFP:
 736         return AArch64::SCVTFUWDri;
 737       case TargetOpcode::G_UITOFP:
 738         return AArch64::UCVTFUWDri;
 739       case TargetOpcode::G_FPTOSI:
 740         return AArch64::FCVTZSUXSr;
 741       case TargetOpcode::G_FPTOUI:
 742         return AArch64::FCVTZUUXSr;
 743       default:
 744         return GenericOpc;
 745       }
 746     case 64:
 747       switch (GenericOpc) {
 748       case TargetOpcode::G_SITOFP:
 749         return AArch64::SCVTFUXDri;
 750       case TargetOpcode::G_UITOFP:
 751         return AArch64::UCVTFUXDri;
 752       case TargetOpcode::G_FPTOSI:
 753         return AArch64::FCVTZSUXDr;
 754       case TargetOpcode::G_FPTOUI:
 755         return AArch64::FCVTZUUXDr;
 756       default:
 757         return GenericOpc;
 758       }
 759     default:
 760       return GenericOpc;
 761     }
 762   default:
 763     return GenericOpc;
 764   };
 765   return GenericOpc;
 766 }
 767
 768 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
 769                                 const RegisterBankInfo &RBI) {
 770   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
 771   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
 772                AArch64::GPRRegBankID);
 773   LLT Ty = MRI.getType(I.getOperand(0).getReg());
 774   if (Ty == LLT::scalar(32))
 775     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
 776   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
 777     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
 778   return 0;
 779 }
 780
 781 /// Helper function to select the opcode for a G_FCMP.
 782 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
 783   // If this is a compare against +0.0, then we don't have to explicitly
 784   // materialize a constant.
 785   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
 786   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
 787   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
 788   if (OpSize != 32 && OpSize != 64)
 789     return 0;
 790   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
 791                               {AArch64::FCMPSri, AArch64::FCMPDri}};
 792   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
 793 }
 794
 795 /// Returns true if \p P is an unsigned integer comparison predicate.
 796 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
 797   switch (P) {
 798   default:
 799     return false;
 800   case CmpInst::ICMP_UGT:
 801   case CmpInst::ICMP_UGE:
 802   case CmpInst::ICMP_ULT:
 803   case CmpInst::ICMP_ULE:
 804     return true;
 805   }
 806 }
 807
 808 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
 809   switch (P) {
 810   default:
 811     llvm_unreachable("Unknown condition code!");
 812   case CmpInst::ICMP_NE:
 813     return AArch64CC::NE;
 814   case CmpInst::ICMP_EQ:
 815     return AArch64CC::EQ;
 816   case CmpInst::ICMP_SGT:
 817     return AArch64CC::GT;
 818   case CmpInst::ICMP_SGE:
 819     return AArch64CC::GE;
 820   case CmpInst::ICMP_SLT:
 821     return AArch64CC::LT;
 822   case CmpInst::ICMP_SLE:
 823     return AArch64CC::LE;
 824   case CmpInst::ICMP_UGT:
 825     return AArch64CC::HI;
 826   case CmpInst::ICMP_UGE:
 827     return AArch64CC::HS;
 828   case CmpInst::ICMP_ULT:
 829     return AArch64CC::LO;
 830   case CmpInst::ICMP_ULE:
 831     return AArch64CC::LS;
 832   }
 833 }
 834
 835 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 836                                       AArch64CC::CondCode &CondCode,
 837                                       AArch64CC::CondCode &CondCode2) {
 838   CondCode2 = AArch64CC::AL;
 839   switch (P) {
 840   default:
 841     llvm_unreachable("Unknown FP condition!");
 842   case CmpInst::FCMP_OEQ:
 843     CondCode = AArch64CC::EQ;
 844     break;
 845   case CmpInst::FCMP_OGT:
 846     CondCode = AArch64CC::GT;
 847     break;
 848   case CmpInst::FCMP_OGE:
 849     CondCode = AArch64CC::GE;
 850     break;
 851   case CmpInst::FCMP_OLT:
 852     CondCode = AArch64CC::MI;
 853     break;
 854   case CmpInst::FCMP_OLE:
 855     CondCode = AArch64CC::LS;
 856     break;
 857   case CmpInst::FCMP_ONE:
 858     CondCode = AArch64CC::MI;
 859     CondCode2 = AArch64CC::GT;
 860     break;
 861   case CmpInst::FCMP_ORD:
 862     CondCode = AArch64CC::VC;
 863     break;
 864   case CmpInst::FCMP_UNO:
 865     CondCode = AArch64CC::VS;
 866     break;
 867   case CmpInst::FCMP_UEQ:
 868     CondCode = AArch64CC::EQ;
 869     CondCode2 = AArch64CC::VS;
 870     break;
 871   case CmpInst::FCMP_UGT:
 872     CondCode = AArch64CC::HI;
 873     break;
 874   case CmpInst::FCMP_UGE:
 875     CondCode = AArch64CC::PL;
 876     break;
 877   case CmpInst::FCMP_ULT:
 878     CondCode = AArch64CC::LT;
 879     break;
 880   case CmpInst::FCMP_ULE:
 881     CondCode = AArch64CC::LE;
 882     break;
 883   case CmpInst::FCMP_UNE:
 884     CondCode = AArch64CC::NE;
 885     break;
 886   }
 887 }
 888
 889 bool AArch64InstructionSelector::selectCompareBranch(
 890     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 891
 892   const Register CondReg = I.getOperand(0).getReg();
 893   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 894   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
 895   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
 896     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
 897   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
 898     return false;
 899
 900   Register LHS = CCMI->getOperand(2).getReg();
 901   Register RHS = CCMI->getOperand(3).getReg();
 902   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 903   if (!VRegAndVal)
 904     std::swap(RHS, LHS);
 905
 906   VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
 907   if (!VRegAndVal || VRegAndVal->Value != 0) {
 908     MachineIRBuilder MIB(I);
 909     // If we can't select a CBZ then emit a cmp + Bcc.
 910     if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
 911                             CCMI->getOperand(1), MIB))
 912       return false;
 913     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
 914         (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
 915     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
 916     I.eraseFromParent();
 917     return true;
 918   }
 919
 920   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
 921   if (RB.getID() != AArch64::GPRRegBankID)
 922     return false;
 923
 924   const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
 925   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
 926     return false;
 927
 928   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
 929   unsigned CBOpc = 0;
 930   if (CmpWidth <= 32)
 931     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
 932   else if (CmpWidth == 64)
 933     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
 934   else
 935     return false;
 936
 937   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
 938       .addUse(LHS)
 939       .addMBB(DestMBB)
 940       .constrainAllUses(TII, TRI, RBI);
 941
 942   I.eraseFromParent();
 943   return true;
 944 }
 945
 946 bool AArch64InstructionSelector::selectVectorSHL(
 947     MachineInstr &I, MachineRegisterInfo &MRI) const {
 948   assert(I.getOpcode() == TargetOpcode::G_SHL);
 949   Register DstReg = I.getOperand(0).getReg();
 950   const LLT Ty = MRI.getType(DstReg);
 951   Register Src1Reg = I.getOperand(1).getReg();
 952   Register Src2Reg = I.getOperand(2).getReg();
 953
 954   if (!Ty.isVector())
 955     return false;
 956
 957   unsigned Opc = 0;
 958   if (Ty == LLT::vector(4, 32)) {
 959     Opc = AArch64::USHLv4i32;
 960   } else if (Ty == LLT::vector(2, 32)) {
 961     Opc = AArch64::USHLv2i32;
 962   } else {
 963     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
 964     return false;
 965   }
 966
 967   MachineIRBuilder MIB(I);
 968   auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
 969   constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
 970   I.eraseFromParent();
 971   return true;
 972 }
 973
 974 bool AArch64InstructionSelector::selectVectorASHR(
 975     MachineInstr &I, MachineRegisterInfo &MRI) const {
 976   assert(I.getOpcode() == TargetOpcode::G_ASHR);
 977   Register DstReg = I.getOperand(0).getReg();
 978   const LLT Ty = MRI.getType(DstReg);
 979   Register Src1Reg = I.getOperand(1).getReg();
 980   Register Src2Reg = I.getOperand(2).getReg();
 981
 982   if (!Ty.isVector())
 983     return false;
 984
 985   // There is not a shift right register instruction, but the shift left
 986   // register instruction takes a signed value, where negative numbers specify a
 987   // right shift.
 988
 989   unsigned Opc = 0;
 990   unsigned NegOpc = 0;
 991   const TargetRegisterClass *RC = nullptr;
 992   if (Ty == LLT::vector(4, 32)) {
 993     Opc = AArch64::SSHLv4i32;
 994     NegOpc = AArch64::NEGv4i32;
 995     RC = &AArch64::FPR128RegClass;
 996   } else if (Ty == LLT::vector(2, 32)) {
 997     Opc = AArch64::SSHLv2i32;
 998     NegOpc = AArch64::NEGv2i32;
 999     RC = &AArch64::FPR64RegClass;
1000   } else {
1001     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1002     return false;
1003   }
1004
1005   MachineIRBuilder MIB(I);
1006   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1007   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1008   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1009   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1010   I.eraseFromParent();
1011   return true;
1012 }
1013
1014 bool AArch64InstructionSelector::selectVaStartAAPCS(
1015     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1016   return false;
1017 }
1018
1019 bool AArch64InstructionSelector::selectVaStartDarwin(
1020     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1021   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1022   Register ListReg = I.getOperand(0).getReg();
1023
1024   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1025
1026   auto MIB =
1027       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1028           .addDef(ArgsAddrReg)
1029           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1030           .addImm(0)
1031           .addImm(0);
1032
1033   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1034
1035   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1036             .addUse(ArgsAddrReg)
1037             .addUse(ListReg)
1038             .addImm(0)
1039             .addMemOperand(*I.memoperands_begin());
1040
1041   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1042   I.eraseFromParent();
1043   return true;
1044 }
1045
1046 void AArch64InstructionSelector::materializeLargeCMVal(
1047     MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1048   MachineBasicBlock &MBB = *I.getParent();
1049   MachineFunction &MF = *MBB.getParent();
1050   MachineRegisterInfo &MRI = MF.getRegInfo();
1051   MachineIRBuilder MIB(I);
1052
1053   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1054   MovZ->addOperand(MF, I.getOperand(1));
1055   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1056                                      AArch64II::MO_NC);
1057   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1058   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1059
1060   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1061                        Register ForceDstReg) {
1062     Register DstReg = ForceDstReg
1063                           ? ForceDstReg
1064                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1065     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1066     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1067       MovI->addOperand(MF, MachineOperand::CreateGA(
1068                                GV, MovZ->getOperand(1).getOffset(), Flags));
1069     } else {
1070       MovI->addOperand(
1071           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1072                                        MovZ->getOperand(1).getOffset(), Flags));
1073     }
1074     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1075     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1076     return DstReg;
1077   };
1078   Register DstReg = BuildMovK(MovZ.getReg(0),
1079                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1080   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1081   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1082   return;
1083 }
1084
1085 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1086   MachineBasicBlock &MBB = *I.getParent();
1087   MachineFunction &MF = *MBB.getParent();
1088   MachineRegisterInfo &MRI = MF.getRegInfo();
1089
1090   switch (I.getOpcode()) {
1091   case TargetOpcode::G_SHL:
1092   case TargetOpcode::G_ASHR:
1093   case TargetOpcode::G_LSHR: {
1094     // These shifts are legalized to have 64 bit shift amounts because we want
1095     // to take advantage of the existing imported selection patterns that assume
1096     // the immediates are s64s. However, if the shifted type is 32 bits and for
1097     // some reason we receive input GMIR that has an s64 shift amount that's not
1098     // a G_CONSTANT, insert a truncate so that we can still select the s32
1099     // register-register variant.
1100     unsigned SrcReg = I.getOperand(1).getReg();
1101     unsigned ShiftReg = I.getOperand(2).getReg();
1102     const LLT ShiftTy = MRI.getType(ShiftReg);
1103     const LLT SrcTy = MRI.getType(SrcReg);
1104     if (SrcTy.isVector())
1105       return;
1106     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1107     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1108       return;
1109     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1110     assert(AmtMI && "could not find a vreg definition for shift amount");
1111     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1112       // Insert a subregister copy to implement a 64->32 trunc
1113       MachineIRBuilder MIB(I);
1114       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1115                        .addReg(ShiftReg, 0, AArch64::sub_32);
1116       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1117       I.getOperand(2).setReg(Trunc.getReg(0));
1118     }
1119     return;
1120   }
1121   default:
1122     return;
1123   }
1124 }
1125
1126 bool AArch64InstructionSelector::earlySelectSHL(
1127     MachineInstr &I, MachineRegisterInfo &MRI) const {
1128   // We try to match the immediate variant of LSL, which is actually an alias
1129   // for a special case of UBFM. Otherwise, we fall back to the imported
1130   // selector which will match the register variant.
1131   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1132   const auto &MO = I.getOperand(2);
1133   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1134   if (!VRegAndVal)
1135     return false;
1136
1137   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1138   if (DstTy.isVector())
1139     return false;
1140   bool Is64Bit = DstTy.getSizeInBits() == 64;
1141   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1142   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1143   MachineIRBuilder MIB(I);
1144
1145   if (!Imm1Fn || !Imm2Fn)
1146     return false;
1147
1148   auto NewI =
1149       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1150                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1151
1152   for (auto &RenderFn : *Imm1Fn)
1153     RenderFn(NewI);
1154   for (auto &RenderFn : *Imm2Fn)
1155     RenderFn(NewI);
1156
1157   I.eraseFromParent();
1158   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1159 }
1160
1161 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1162   assert(I.getParent() && "Instruction should be in a basic block!");
1163   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1164
1165   MachineBasicBlock &MBB = *I.getParent();
1166   MachineFunction &MF = *MBB.getParent();
1167   MachineRegisterInfo &MRI = MF.getRegInfo();
1168
1169   switch (I.getOpcode()) {
1170   case TargetOpcode::G_SHL:
1171     return earlySelectSHL(I, MRI);
1172   default:
1173     return false;
1174   }
1175 }
1176
1177 bool AArch64InstructionSelector::select(MachineInstr &I,
1178                                         CodeGenCoverage &CoverageInfo) const {
1179   assert(I.getParent() && "Instruction should be in a basic block!");
1180   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1181
1182   MachineBasicBlock &MBB = *I.getParent();
1183   MachineFunction &MF = *MBB.getParent();
1184   MachineRegisterInfo &MRI = MF.getRegInfo();
1185
1186   unsigned Opcode = I.getOpcode();
1187   // G_PHI requires same handling as PHI
1188   if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1189     // Certain non-generic instructions also need some special handling.
1190
1191     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1192       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1193
1194     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1195       const Register DefReg = I.getOperand(0).getReg();
1196       const LLT DefTy = MRI.getType(DefReg);
1197
1198       const RegClassOrRegBank &RegClassOrBank =
1199         MRI.getRegClassOrRegBank(DefReg);
1200
1201       const TargetRegisterClass *DefRC
1202         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1203       if (!DefRC) {
1204         if (!DefTy.isValid()) {
1205           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1206           return false;
1207         }
1208         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1209         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1210         if (!DefRC) {
1211           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1212           return false;
1213         }
1214       }
1215
1216       I.setDesc(TII.get(TargetOpcode::PHI));
1217
1218       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1219     }
1220
1221     if (I.isCopy())
1222       return selectCopy(I, TII, MRI, TRI, RBI);
1223
1224     return true;
1225   }
1226
1227
1228   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1229     LLVM_DEBUG(
1230         dbgs() << "Generic instruction has unexpected implicit operands\n");
1231     return false;
1232   }
1233
1234   // Try to do some lowering before we start instruction selecting. These
1235   // lowerings are purely transformations on the input G_MIR and so selection
1236   // must continue after any modification of the instruction.
1237   preISelLower(I);
1238
1239   // There may be patterns where the importer can't deal with them optimally,
1240   // but does select it to a suboptimal sequence so our custom C++ selection
1241   // code later never has a chance to work on it. Therefore, we have an early
1242   // selection attempt here to give priority to certain selection routines
1243   // over the imported ones.
1244   if (earlySelect(I))
1245     return true;
1246
1247   if (selectImpl(I, CoverageInfo))
1248     return true;
1249
1250   LLT Ty =
1251       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1252
1253   MachineIRBuilder MIB(I);
1254
1255   switch (Opcode) {
1256   case TargetOpcode::G_BRCOND: {
1257     if (Ty.getSizeInBits() > 32) {
1258       // We shouldn't need this on AArch64, but it would be implemented as an
1259       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1260       // bit being tested is < 32.
1261       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1262                         << ", expected at most 32-bits");
1263       return false;
1264     }
1265
1266     const Register CondReg = I.getOperand(0).getReg();
1267     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1268
1269     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1270     // instructions will not be produced, as they are conditional branch
1271     // instructions that do not set flags.
1272     bool ProduceNonFlagSettingCondBr =
1273         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1274     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1275       return true;
1276
1277     if (ProduceNonFlagSettingCondBr) {
1278       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1279                      .addUse(CondReg)
1280                      .addImm(/*bit offset=*/0)
1281                      .addMBB(DestMBB);
1282
1283       I.eraseFromParent();
1284       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1285     } else {
1286       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1287                      .addDef(AArch64::WZR)
1288                      .addUse(CondReg)
1289                      .addImm(1);
1290       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1291       auto Bcc =
1292           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1293               .addImm(AArch64CC::EQ)
1294               .addMBB(DestMBB);
1295
1296       I.eraseFromParent();
1297       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1298     }
1299   }
1300
1301   case TargetOpcode::G_BRINDIRECT: {
1302     I.setDesc(TII.get(AArch64::BR));
1303     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1304   }
1305
1306   case TargetOpcode::G_BRJT:
1307     return selectBrJT(I, MRI);
1308
1309   case TargetOpcode::G_BSWAP: {
1310     // Handle vector types for G_BSWAP directly.
1311     Register DstReg = I.getOperand(0).getReg();
1312     LLT DstTy = MRI.getType(DstReg);
1313
1314     // We should only get vector types here; everything else is handled by the
1315     // importer right now.
1316     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1317       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1318       return false;
1319     }
1320
1321     // Only handle 4 and 2 element vectors for now.
1322     // TODO: 16-bit elements.
1323     unsigned NumElts = DstTy.getNumElements();
1324     if (NumElts != 4 && NumElts != 2) {
1325       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1326       return false;
1327     }
1328
1329     // Choose the correct opcode for the supported types. Right now, that's
1330     // v2s32, v4s32, and v2s64.
1331     unsigned Opc = 0;
1332     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1333     if (EltSize == 32)
1334       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1335                                           : AArch64::REV32v16i8;
1336     else if (EltSize == 64)
1337       Opc = AArch64::REV64v16i8;
1338
1339     // We should always get something by the time we get here...
1340     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1341
1342     I.setDesc(TII.get(Opc));
1343     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1344   }
1345
1346   case TargetOpcode::G_FCONSTANT:
1347   case TargetOpcode::G_CONSTANT: {
1348     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1349
1350     const LLT s8 = LLT::scalar(8);
1351     const LLT s16 = LLT::scalar(16);
1352     const LLT s32 = LLT::scalar(32);
1353     const LLT s64 = LLT::scalar(64);
1354     const LLT p0 = LLT::pointer(0, 64);
1355
1356     const Register DefReg = I.getOperand(0).getReg();
1357     const LLT DefTy = MRI.getType(DefReg);
1358     const unsigned DefSize = DefTy.getSizeInBits();
1359     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1360
1361     // FIXME: Redundant check, but even less readable when factored out.
1362     if (isFP) {
1363       if (Ty != s32 && Ty != s64) {
1364         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1365                           << " constant, expected: " << s32 << " or " << s64
1366                           << '\n');
1367         return false;
1368       }
1369
1370       if (RB.getID() != AArch64::FPRRegBankID) {
1371         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1372                           << " constant on bank: " << RB
1373                           << ", expected: FPR\n");
1374         return false;
1375       }
1376
1377       // The case when we have 0.0 is covered by tablegen. Reject it here so we
1378       // can be sure tablegen works correctly and isn't rescued by this code.
1379       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1380         return false;
1381     } else {
1382       // s32 and s64 are covered by tablegen.
1383       if (Ty != p0 && Ty != s8 && Ty != s16) {
1384         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1385                           << " constant, expected: " << s32 << ", " << s64
1386                           << ", or " << p0 << '\n');
1387         return false;
1388       }
1389
1390       if (RB.getID() != AArch64::GPRRegBankID) {
1391         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1392                           << " constant on bank: " << RB
1393                           << ", expected: GPR\n");
1394         return false;
1395       }
1396     }
1397
1398     // We allow G_CONSTANT of types < 32b.
1399     const unsigned MovOpc =
1400         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1401
1402     if (isFP) {
1403       // Either emit a FMOV, or emit a copy to emit a normal mov.
1404       const TargetRegisterClass &GPRRC =
1405           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1406       const TargetRegisterClass &FPRRC =
1407           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1408
1409       // Can we use a FMOV instruction to represent the immediate?
1410       if (emitFMovForFConstant(I, MRI))
1411         return true;
1412
1413       // Nope. Emit a copy and use a normal mov instead.
1414       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1415       MachineOperand &RegOp = I.getOperand(0);
1416       RegOp.setReg(DefGPRReg);
1417       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1418       MIB.buildCopy({DefReg}, {DefGPRReg});
1419
1420       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1421         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1422         return false;
1423       }
1424
1425       MachineOperand &ImmOp = I.getOperand(1);
1426       // FIXME: Is going through int64_t always correct?
1427       ImmOp.ChangeToImmediate(
1428           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
1429     } else if (I.getOperand(1).isCImm()) {
1430       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1431       I.getOperand(1).ChangeToImmediate(Val);
1432     } else if (I.getOperand(1).isImm()) {
1433       uint64_t Val = I.getOperand(1).getImm();
1434       I.getOperand(1).ChangeToImmediate(Val);
1435     }
1436
1437     I.setDesc(TII.get(MovOpc));
1438     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1439     return true;
1440   }
1441   case TargetOpcode::G_EXTRACT: {
1442     LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1443     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1444     (void)DstTy;
1445     unsigned SrcSize = SrcTy.getSizeInBits();
1446     // Larger extracts are vectors, same-size extracts should be something else
1447     // by now (either split up or simplified to a COPY).
1448     if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1449       return false;
1450
1451     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1452     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1453                                       Ty.getSizeInBits() - 1);
1454
1455     if (SrcSize < 64) {
1456       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1457              "unexpected G_EXTRACT types");
1458       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1459     }
1460
1461     Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1462     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1463     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1464         .addReg(DstReg, 0, AArch64::sub_32);
1465     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1466                                  AArch64::GPR32RegClass, MRI);
1467     I.getOperand(0).setReg(DstReg);
1468
1469     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1470   }
1471
1472   case TargetOpcode::G_INSERT: {
1473     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1474     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1475     unsigned DstSize = DstTy.getSizeInBits();
1476     // Larger inserts are vectors, same-size ones should be something else by
1477     // now (split up or turned into COPYs).
1478     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1479       return false;
1480
1481     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1482     unsigned LSB = I.getOperand(3).getImm();
1483     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1484     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1485     MachineInstrBuilder(MF, I).addImm(Width - 1);
1486
1487     if (DstSize < 64) {
1488       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1489              "unexpected G_INSERT types");
1490       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1491     }
1492
1493     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1494     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1495             TII.get(AArch64::SUBREG_TO_REG))
1496         .addDef(SrcReg)
1497         .addImm(0)
1498         .addUse(I.getOperand(2).getReg())
1499         .addImm(AArch64::sub_32);
1500     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1501                                  AArch64::GPR32RegClass, MRI);
1502     I.getOperand(2).setReg(SrcReg);
1503
1504     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1505   }
1506   case TargetOpcode::G_FRAME_INDEX: {
1507     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1508     if (Ty != LLT::pointer(0, 64)) {
1509       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1510                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1511       return false;
1512     }
1513     I.setDesc(TII.get(AArch64::ADDXri));
1514
1515     // MOs for a #0 shifted immediate.
1516     I.addOperand(MachineOperand::CreateImm(0));
1517     I.addOperand(MachineOperand::CreateImm(0));
1518
1519     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1520   }
1521
1522   case TargetOpcode::G_GLOBAL_VALUE: {
1523     auto GV = I.getOperand(1).getGlobal();
1524     if (GV->isThreadLocal()) {
1525       // FIXME: we don't support TLS yet.
1526       return false;
1527     }
1528     unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1529     if (OpFlags & AArch64II::MO_GOT) {
1530       I.setDesc(TII.get(AArch64::LOADgot));
1531       I.getOperand(1).setTargetFlags(OpFlags);
1532     } else if (TM.getCodeModel() == CodeModel::Large) {
1533       // Materialize the global using movz/movk instructions.
1534       materializeLargeCMVal(I, GV, OpFlags);
1535       I.eraseFromParent();
1536       return true;
1537     } else if (TM.getCodeModel() == CodeModel::Tiny) {
1538       I.setDesc(TII.get(AArch64::ADR));
1539       I.getOperand(1).setTargetFlags(OpFlags);
1540     } else {
1541       I.setDesc(TII.get(AArch64::MOVaddr));
1542       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1543       MachineInstrBuilder MIB(MF, I);
1544       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1545                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1546     }
1547     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1548   }
1549
1550   case TargetOpcode::G_ZEXTLOAD:
1551   case TargetOpcode::G_LOAD:
1552   case TargetOpcode::G_STORE: {
1553     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1554     MachineIRBuilder MIB(I);
1555
1556     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1557
1558     if (PtrTy != LLT::pointer(0, 64)) {
1559       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1560                         << ", expected: " << LLT::pointer(0, 64) << '\n');
1561       return false;
1562     }
1563
1564     auto &MemOp = **I.memoperands_begin();
1565     if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1566       LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1567       return false;
1568     }
1569     unsigned MemSizeInBits = MemOp.getSize() * 8;
1570
1571     const Register PtrReg = I.getOperand(1).getReg();
1572 #ifndef NDEBUG
1573     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1574     // Sanity-check the pointer register.
1575     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1576            "Load/Store pointer operand isn't a GPR");
1577     assert(MRI.getType(PtrReg).isPointer() &&
1578            "Load/Store pointer operand isn't a pointer");
1579 #endif
1580
1581     const Register ValReg = I.getOperand(0).getReg();
1582     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1583
1584     const unsigned NewOpc =
1585         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1586     if (NewOpc == I.getOpcode())
1587       return false;
1588
1589     I.setDesc(TII.get(NewOpc));
1590
1591     uint64_t Offset = 0;
1592     auto *PtrMI = MRI.getVRegDef(PtrReg);
1593
1594     // Try to fold a GEP into our unsigned immediate addressing mode.
1595     if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1596       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1597         int64_t Imm = *COff;
1598         const unsigned Size = MemSizeInBits / 8;
1599         const unsigned Scale = Log2_32(Size);
1600         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1601           unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1602           I.getOperand(1).setReg(Ptr2Reg);
1603           PtrMI = MRI.getVRegDef(Ptr2Reg);
1604           Offset = Imm / Size;
1605         }
1606       }
1607     }
1608
1609     // If we haven't folded anything into our addressing mode yet, try to fold
1610     // a frame index into the base+offset.
1611     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1612       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1613
1614     I.addOperand(MachineOperand::CreateImm(Offset));
1615
1616     // If we're storing a 0, use WZR/XZR.
1617     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1618       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1619         if (I.getOpcode() == AArch64::STRWui)
1620           I.getOperand(0).setReg(AArch64::WZR);
1621         else if (I.getOpcode() == AArch64::STRXui)
1622           I.getOperand(0).setReg(AArch64::XZR);
1623       }
1624     }
1625
1626     if (IsZExtLoad) {
1627       // The zextload from a smaller type to i32 should be handled by the importer.
1628       if (MRI.getType(ValReg).getSizeInBits() != 64)
1629         return false;
1630       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1631       //and zero_extend with SUBREG_TO_REG.
1632       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1633       Register DstReg = I.getOperand(0).getReg();
1634       I.getOperand(0).setReg(LdReg);
1635
1636       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1637       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1638           .addImm(0)
1639           .addUse(LdReg)
1640           .addImm(AArch64::sub_32);
1641       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1642       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1643                                           MRI);
1644     }
1645     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1646   }
1647
1648   case TargetOpcode::G_SMULH:
1649   case TargetOpcode::G_UMULH: {
1650     // Reject the various things we don't support yet.
1651     if (unsupportedBinOp(I, RBI, MRI, TRI))
1652       return false;
1653
1654     const Register DefReg = I.getOperand(0).getReg();
1655     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1656
1657     if (RB.getID() != AArch64::GPRRegBankID) {
1658       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1659       return false;
1660     }
1661
1662     if (Ty != LLT::scalar(64)) {
1663       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1664                         << ", expected: " << LLT::scalar(64) << '\n');
1665       return false;
1666     }
1667
1668     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1669                                                              : AArch64::UMULHrr;
1670     I.setDesc(TII.get(NewOpc));
1671
1672     // Now that we selected an opcode, we need to constrain the register
1673     // operands to use appropriate classes.
1674     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1675   }
1676   case TargetOpcode::G_FADD:
1677   case TargetOpcode::G_FSUB:
1678   case TargetOpcode::G_FMUL:
1679   case TargetOpcode::G_FDIV:
1680
1681   case TargetOpcode::G_ASHR:
1682     if (MRI.getType(I.getOperand(0).getReg()).isVector())
1683       return selectVectorASHR(I, MRI);
1684     LLVM_FALLTHROUGH;
1685   case TargetOpcode::G_SHL:
1686     if (Opcode == TargetOpcode::G_SHL &&
1687         MRI.getType(I.getOperand(0).getReg()).isVector())
1688       return selectVectorSHL(I, MRI);
1689     LLVM_FALLTHROUGH;
1690   case TargetOpcode::G_OR:
1691   case TargetOpcode::G_LSHR:
1692   case TargetOpcode::G_GEP: {
1693     // Reject the various things we don't support yet.
1694     if (unsupportedBinOp(I, RBI, MRI, TRI))
1695       return false;
1696
1697     const unsigned OpSize = Ty.getSizeInBits();
1698
1699     const Register DefReg = I.getOperand(0).getReg();
1700     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1701
1702     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1703     if (NewOpc == I.getOpcode())
1704       return false;
1705
1706     I.setDesc(TII.get(NewOpc));
1707     // FIXME: Should the type be always reset in setDesc?
1708
1709     // Now that we selected an opcode, we need to constrain the register
1710     // operands to use appropriate classes.
1711     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1712   }
1713
1714   case TargetOpcode::G_UADDO: {
1715     // TODO: Support other types.
1716     unsigned OpSize = Ty.getSizeInBits();
1717     if (OpSize != 32 && OpSize != 64) {
1718       LLVM_DEBUG(
1719           dbgs()
1720           << "G_UADDO currently only supported for 32 and 64 b types.\n");
1721       return false;
1722     }
1723
1724     // TODO: Support vectors.
1725     if (Ty.isVector()) {
1726       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1727       return false;
1728     }
1729
1730     // Add and set the set condition flag.
1731     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1732     MachineIRBuilder MIRBuilder(I);
1733     auto AddsMI = MIRBuilder.buildInstr(
1734         AddsOpc, {I.getOperand(0).getReg()},
1735         {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1736     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1737
1738     // Now, put the overflow result in the register given by the first operand
1739     // to the G_UADDO. CSINC increments the result when the predicate is false,
1740     // so to get the increment when it's true, we need to use the inverse. In
1741     // this case, we want to increment when carry is set.
1742     auto CsetMI = MIRBuilder
1743                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1744                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
1745                       .addImm(getInvertedCondCode(AArch64CC::HS));
1746     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1747     I.eraseFromParent();
1748     return true;
1749   }
1750
1751   case TargetOpcode::G_PTR_MASK: {
1752     uint64_t Align = I.getOperand(2).getImm();
1753     if (Align >= 64 || Align == 0)
1754       return false;
1755
1756     uint64_t Mask = ~((1ULL << Align) - 1);
1757     I.setDesc(TII.get(AArch64::ANDXri));
1758     I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1759
1760     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1761   }
1762   case TargetOpcode::G_PTRTOINT:
1763   case TargetOpcode::G_TRUNC: {
1764     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1765     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1766
1767     const Register DstReg = I.getOperand(0).getReg();
1768     const Register SrcReg = I.getOperand(1).getReg();
1769
1770     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1771     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1772
1773     if (DstRB.getID() != SrcRB.getID()) {
1774       LLVM_DEBUG(
1775           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1776       return false;
1777     }
1778
1779     if (DstRB.getID() == AArch64::GPRRegBankID) {
1780       const TargetRegisterClass *DstRC =
1781           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1782       if (!DstRC)
1783         return false;
1784
1785       const TargetRegisterClass *SrcRC =
1786           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1787       if (!SrcRC)
1788         return false;
1789
1790       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1791           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1792         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1793         return false;
1794       }
1795
1796       if (DstRC == SrcRC) {
1797         // Nothing to be done
1798       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1799                  SrcTy == LLT::scalar(64)) {
1800         llvm_unreachable("TableGen can import this case");
1801         return false;
1802       } else if (DstRC == &AArch64::GPR32RegClass &&
1803                  SrcRC == &AArch64::GPR64RegClass) {
1804         I.getOperand(1).setSubReg(AArch64::sub_32);
1805       } else {
1806         LLVM_DEBUG(
1807             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1808         return false;
1809       }
1810
1811       I.setDesc(TII.get(TargetOpcode::COPY));
1812       return true;
1813     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1814       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1815         I.setDesc(TII.get(AArch64::XTNv4i16));
1816         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1817         return true;
1818       }
1819     }
1820
1821     return false;
1822   }
1823
1824   case TargetOpcode::G_ANYEXT: {
1825     const Register DstReg = I.getOperand(0).getReg();
1826     const Register SrcReg = I.getOperand(1).getReg();
1827
1828     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1829     if (RBDst.getID() != AArch64::GPRRegBankID) {
1830       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1831                         << ", expected: GPR\n");
1832       return false;
1833     }
1834
1835     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1836     if (RBSrc.getID() != AArch64::GPRRegBankID) {
1837       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1838                         << ", expected: GPR\n");
1839       return false;
1840     }
1841
1842     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1843
1844     if (DstSize == 0) {
1845       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
1846       return false;
1847     }
1848
1849     if (DstSize != 64 && DstSize > 32) {
1850       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1851                         << ", expected: 32 or 64\n");
1852       return false;
1853     }
1854     // At this point G_ANYEXT is just like a plain COPY, but we need
1855     // to explicitly form the 64-bit value if any.
1856     if (DstSize > 32) {
1857       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1858       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1859           .addDef(ExtSrc)
1860           .addImm(0)
1861           .addUse(SrcReg)
1862           .addImm(AArch64::sub_32);
1863       I.getOperand(1).setReg(ExtSrc);
1864     }
1865     return selectCopy(I, TII, MRI, TRI, RBI);
1866   }
1867
1868   case TargetOpcode::G_ZEXT:
1869   case TargetOpcode::G_SEXT: {
1870     unsigned Opcode = I.getOpcode();
1871     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1872               SrcTy = MRI.getType(I.getOperand(1).getReg());
1873     const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1874     const Register DefReg = I.getOperand(0).getReg();
1875     const Register SrcReg = I.getOperand(1).getReg();
1876     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1877
1878     if (RB.getID() != AArch64::GPRRegBankID) {
1879       LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1880                         << ", expected: GPR\n");
1881       return false;
1882     }
1883
1884     MachineInstr *ExtI;
1885     if (DstTy == LLT::scalar(64)) {
1886       // FIXME: Can we avoid manually doing this?
1887       if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
1888         LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1889                           << " operand\n");
1890         return false;
1891       }
1892
1893       const Register SrcXReg =
1894           MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1895       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1896           .addDef(SrcXReg)
1897           .addImm(0)
1898           .addUse(SrcReg)
1899           .addImm(AArch64::sub_32);
1900
1901       const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1902       ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1903                  .addDef(DefReg)
1904                  .addUse(SrcXReg)
1905                  .addImm(0)
1906                  .addImm(SrcTy.getSizeInBits() - 1);
1907     } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
1908       const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1909       ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1910                  .addDef(DefReg)
1911                  .addUse(SrcReg)
1912                  .addImm(0)
1913                  .addImm(SrcTy.getSizeInBits() - 1);
1914     } else {
1915       return false;
1916     }
1917
1918     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1919
1920     I.eraseFromParent();
1921     return true;
1922   }
1923
1924   case TargetOpcode::G_SITOFP:
1925   case TargetOpcode::G_UITOFP:
1926   case TargetOpcode::G_FPTOSI:
1927   case TargetOpcode::G_FPTOUI: {
1928     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1929               SrcTy = MRI.getType(I.getOperand(1).getReg());
1930     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1931     if (NewOpc == Opcode)
1932       return false;
1933
1934     I.setDesc(TII.get(NewOpc));
1935     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1936
1937     return true;
1938   }
1939
1940
1941   case TargetOpcode::G_INTTOPTR:
1942     // The importer is currently unable to import pointer types since they
1943     // didn't exist in SelectionDAG.
1944     return selectCopy(I, TII, MRI, TRI, RBI);
1945
1946   case TargetOpcode::G_BITCAST:
1947     // Imported SelectionDAG rules can handle every bitcast except those that
1948     // bitcast from a type to the same type. Ideally, these shouldn't occur
1949     // but we might not run an optimizer that deletes them. The other exception
1950     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1951     // of them.
1952     return selectCopy(I, TII, MRI, TRI, RBI);
1953
1954   case TargetOpcode::G_SELECT: {
1955     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
1956       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1957                         << ", expected: " << LLT::scalar(1) << '\n');
1958       return false;
1959     }
1960
1961     const Register CondReg = I.getOperand(1).getReg();
1962     const Register TReg = I.getOperand(2).getReg();
1963     const Register FReg = I.getOperand(3).getReg();
1964
1965     if (tryOptSelect(I))
1966       return true;
1967
1968     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
1969     MachineInstr &TstMI =
1970         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1971              .addDef(AArch64::WZR)
1972              .addUse(CondReg)
1973              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1974
1975     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1976                                 .addDef(I.getOperand(0).getReg())
1977                                 .addUse(TReg)
1978                                 .addUse(FReg)
1979                                 .addImm(AArch64CC::NE);
1980
1981     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1982     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1983
1984     I.eraseFromParent();
1985     return true;
1986   }
1987   case TargetOpcode::G_ICMP: {
1988     if (Ty.isVector())
1989       return selectVectorICmp(I, MRI);
1990
1991     if (Ty != LLT::scalar(32)) {
1992       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1993                         << ", expected: " << LLT::scalar(32) << '\n');
1994       return false;
1995     }
1996
1997     MachineIRBuilder MIRBuilder(I);
1998     if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
1999                             MIRBuilder))
2000       return false;
2001     emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2002                     MIRBuilder);
2003     I.eraseFromParent();
2004     return true;
2005   }
2006
2007   case TargetOpcode::G_FCMP: {
2008     if (Ty != LLT::scalar(32)) {
2009       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2010                         << ", expected: " << LLT::scalar(32) << '\n');
2011       return false;
2012     }
2013
2014     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2015     if (!CmpOpc)
2016       return false;
2017
2018     // FIXME: regbank
2019
2020     AArch64CC::CondCode CC1, CC2;
2021     changeFCMPPredToAArch64CC(
2022         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2023
2024     // Partially build the compare. Decide if we need to add a use for the
2025     // third operand based off whether or not we're comparing against 0.0.
2026     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2027                      .addUse(I.getOperand(2).getReg());
2028
2029     // If we don't have an immediate compare, then we need to add a use of the
2030     // register which wasn't used for the immediate.
2031     // Note that the immediate will always be the last operand.
2032     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2033       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2034
2035     const Register DefReg = I.getOperand(0).getReg();
2036     Register Def1Reg = DefReg;
2037     if (CC2 != AArch64CC::AL)
2038       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2039
2040     MachineInstr &CSetMI =
2041         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2042              .addDef(Def1Reg)
2043              .addUse(AArch64::WZR)
2044              .addUse(AArch64::WZR)
2045              .addImm(getInvertedCondCode(CC1));
2046
2047     if (CC2 != AArch64CC::AL) {
2048       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2049       MachineInstr &CSet2MI =
2050           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2051                .addDef(Def2Reg)
2052                .addUse(AArch64::WZR)
2053                .addUse(AArch64::WZR)
2054                .addImm(getInvertedCondCode(CC2));
2055       MachineInstr &OrMI =
2056           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2057                .addDef(DefReg)
2058                .addUse(Def1Reg)
2059                .addUse(Def2Reg);
2060       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2061       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2062     }
2063     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2064     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2065
2066     I.eraseFromParent();
2067     return true;
2068   }
2069   case TargetOpcode::G_VASTART:
2070     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2071                                 : selectVaStartAAPCS(I, MF, MRI);
2072   case TargetOpcode::G_INTRINSIC:
2073     return selectIntrinsic(I, MRI);
2074   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2075     return selectIntrinsicWithSideEffects(I, MRI);
2076   case TargetOpcode::G_IMPLICIT_DEF: {
2077     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2078     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2079     const Register DstReg = I.getOperand(0).getReg();
2080     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2081     const TargetRegisterClass *DstRC =
2082         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2083     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2084     return true;
2085   }
2086   case TargetOpcode::G_BLOCK_ADDR: {
2087     if (TM.getCodeModel() == CodeModel::Large) {
2088       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2089       I.eraseFromParent();
2090       return true;
2091     } else {
2092       I.setDesc(TII.get(AArch64::MOVaddrBA));
2093       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2094                            I.getOperand(0).getReg())
2095                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2096                                         /* Offset */ 0, AArch64II::MO_PAGE)
2097                        .addBlockAddress(
2098                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2099                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2100       I.eraseFromParent();
2101       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2102     }
2103   }
2104   case TargetOpcode::G_INTRINSIC_TRUNC:
2105     return selectIntrinsicTrunc(I, MRI);
2106   case TargetOpcode::G_INTRINSIC_ROUND:
2107     return selectIntrinsicRound(I, MRI);
2108   case TargetOpcode::G_BUILD_VECTOR:
2109     return selectBuildVector(I, MRI);
2110   case TargetOpcode::G_MERGE_VALUES:
2111     return selectMergeValues(I, MRI);
2112   case TargetOpcode::G_UNMERGE_VALUES:
2113     return selectUnmergeValues(I, MRI);
2114   case TargetOpcode::G_SHUFFLE_VECTOR:
2115     return selectShuffleVector(I, MRI);
2116   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2117     return selectExtractElt(I, MRI);
2118   case TargetOpcode::G_INSERT_VECTOR_ELT:
2119     return selectInsertElt(I, MRI);
2120   case TargetOpcode::G_CONCAT_VECTORS:
2121     return selectConcatVectors(I, MRI);
2122   case TargetOpcode::G_JUMP_TABLE:
2123     return selectJumpTable(I, MRI);
2124   }
2125
2126   return false;
2127 }
2128
2129 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2130                                             MachineRegisterInfo &MRI) const {
2131   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2132   Register JTAddr = I.getOperand(0).getReg();
2133   unsigned JTI = I.getOperand(1).getIndex();
2134   Register Index = I.getOperand(2).getReg();
2135   MachineIRBuilder MIB(I);
2136
2137   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2138   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2139   MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2140                  {JTAddr, Index})
2141       .addJumpTableIndex(JTI);
2142
2143   // Build the indirect branch.
2144   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2145   I.eraseFromParent();
2146   return true;
2147 }
2148
2149 bool AArch64InstructionSelector::selectJumpTable(
2150     MachineInstr &I, MachineRegisterInfo &MRI) const {
2151   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2152   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2153
2154   Register DstReg = I.getOperand(0).getReg();
2155   unsigned JTI = I.getOperand(1).getIndex();
2156   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2157   MachineIRBuilder MIB(I);
2158   auto MovMI =
2159     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2160           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2161           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2162   I.eraseFromParent();
2163   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2164 }
2165
2166 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2167     MachineInstr &I, MachineRegisterInfo &MRI) const {
2168   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2169
2170   // Select the correct opcode.
2171   unsigned Opc = 0;
2172   if (!SrcTy.isVector()) {
2173     switch (SrcTy.getSizeInBits()) {
2174     default:
2175     case 16:
2176       Opc = AArch64::FRINTZHr;
2177       break;
2178     case 32:
2179       Opc = AArch64::FRINTZSr;
2180       break;
2181     case 64:
2182       Opc = AArch64::FRINTZDr;
2183       break;
2184     }
2185   } else {
2186     unsigned NumElts = SrcTy.getNumElements();
2187     switch (SrcTy.getElementType().getSizeInBits()) {
2188     default:
2189       break;
2190     case 16:
2191       if (NumElts == 4)
2192         Opc = AArch64::FRINTZv4f16;
2193       else if (NumElts == 8)
2194         Opc = AArch64::FRINTZv8f16;
2195       break;
2196     case 32:
2197       if (NumElts == 2)
2198         Opc = AArch64::FRINTZv2f32;
2199       else if (NumElts == 4)
2200         Opc = AArch64::FRINTZv4f32;
2201       break;
2202     case 64:
2203       if (NumElts == 2)
2204         Opc = AArch64::FRINTZv2f64;
2205       break;
2206     }
2207   }
2208
2209   if (!Opc) {
2210     // Didn't get an opcode above, bail.
2211     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2212     return false;
2213   }
2214
2215   // Legalization would have set us up perfectly for this; we just need to
2216   // set the opcode and move on.
2217   I.setDesc(TII.get(Opc));
2218   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2219 }
2220
2221 bool AArch64InstructionSelector::selectIntrinsicRound(
2222     MachineInstr &I, MachineRegisterInfo &MRI) const {
2223   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2224
2225   // Select the correct opcode.
2226   unsigned Opc = 0;
2227   if (!SrcTy.isVector()) {
2228     switch (SrcTy.getSizeInBits()) {
2229     default:
2230     case 16:
2231       Opc = AArch64::FRINTAHr;
2232       break;
2233     case 32:
2234       Opc = AArch64::FRINTASr;
2235       break;
2236     case 64:
2237       Opc = AArch64::FRINTADr;
2238       break;
2239     }
2240   } else {
2241     unsigned NumElts = SrcTy.getNumElements();
2242     switch (SrcTy.getElementType().getSizeInBits()) {
2243     default:
2244       break;
2245     case 16:
2246       if (NumElts == 4)
2247         Opc = AArch64::FRINTAv4f16;
2248       else if (NumElts == 8)
2249         Opc = AArch64::FRINTAv8f16;
2250       break;
2251     case 32:
2252       if (NumElts == 2)
2253         Opc = AArch64::FRINTAv2f32;
2254       else if (NumElts == 4)
2255         Opc = AArch64::FRINTAv4f32;
2256       break;
2257     case 64:
2258       if (NumElts == 2)
2259         Opc = AArch64::FRINTAv2f64;
2260       break;
2261     }
2262   }
2263
2264   if (!Opc) {
2265     // Didn't get an opcode above, bail.
2266     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2267     return false;
2268   }
2269
2270   // Legalization would have set us up perfectly for this; we just need to
2271   // set the opcode and move on.
2272   I.setDesc(TII.get(Opc));
2273   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2274 }
2275
2276 bool AArch64InstructionSelector::selectVectorICmp(
2277     MachineInstr &I, MachineRegisterInfo &MRI) const {
2278   Register DstReg = I.getOperand(0).getReg();
2279   LLT DstTy = MRI.getType(DstReg);
2280   Register SrcReg = I.getOperand(2).getReg();
2281   Register Src2Reg = I.getOperand(3).getReg();
2282   LLT SrcTy = MRI.getType(SrcReg);
2283
2284   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2285   unsigned NumElts = DstTy.getNumElements();
2286
2287   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2288   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2289   // Third index is cc opcode:
2290   // 0 == eq
2291   // 1 == ugt
2292   // 2 == uge
2293   // 3 == ult
2294   // 4 == ule
2295   // 5 == sgt
2296   // 6 == sge
2297   // 7 == slt
2298   // 8 == sle
2299   // ne is done by negating 'eq' result.
2300
2301   // This table below assumes that for some comparisons the operands will be
2302   // commuted.
2303   // ult op == commute + ugt op
2304   // ule op == commute + uge op
2305   // slt op == commute + sgt op
2306   // sle op == commute + sge op
2307   unsigned PredIdx = 0;
2308   bool SwapOperands = false;
2309   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2310   switch (Pred) {
2311   case CmpInst::ICMP_NE:
2312   case CmpInst::ICMP_EQ:
2313     PredIdx = 0;
2314     break;
2315   case CmpInst::ICMP_UGT:
2316     PredIdx = 1;
2317     break;
2318   case CmpInst::ICMP_UGE:
2319     PredIdx = 2;
2320     break;
2321   case CmpInst::ICMP_ULT:
2322     PredIdx = 3;
2323     SwapOperands = true;
2324     break;
2325   case CmpInst::ICMP_ULE:
2326     PredIdx = 4;
2327     SwapOperands = true;
2328     break;
2329   case CmpInst::ICMP_SGT:
2330     PredIdx = 5;
2331     break;
2332   case CmpInst::ICMP_SGE:
2333     PredIdx = 6;
2334     break;
2335   case CmpInst::ICMP_SLT:
2336     PredIdx = 7;
2337     SwapOperands = true;
2338     break;
2339   case CmpInst::ICMP_SLE:
2340     PredIdx = 8;
2341     SwapOperands = true;
2342     break;
2343   default:
2344     llvm_unreachable("Unhandled icmp predicate");
2345     return false;
2346   }
2347
2348   // This table obviously should be tablegen'd when we have our GISel native
2349   // tablegen selector.
2350
2351   static const unsigned OpcTable[4][4][9] = {
2352       {
2353           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2354            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2355            0 /* invalid */},
2356           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2357            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2358            0 /* invalid */},
2359           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2360            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2361            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2362           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2363            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2364            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2365       },
2366       {
2367           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2368            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2369            0 /* invalid */},
2370           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2371            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2372            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2373           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2374            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2375            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2376           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2377            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2378            0 /* invalid */}
2379       },
2380       {
2381           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2382            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2383            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2384           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2385            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2386            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2387           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2388            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2389            0 /* invalid */},
2390           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2391            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2392            0 /* invalid */}
2393       },
2394       {
2395           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2396            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2397            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2398           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2399            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2400            0 /* invalid */},
2401           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2402            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2403            0 /* invalid */},
2404           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2405            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2406            0 /* invalid */}
2407       },
2408   };
2409   unsigned EltIdx = Log2_32(SrcEltSize / 8);
2410   unsigned NumEltsIdx = Log2_32(NumElts / 2);
2411   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2412   if (!Opc) {
2413     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2414     return false;
2415   }
2416
2417   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2418   const TargetRegisterClass *SrcRC =
2419       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2420   if (!SrcRC) {
2421     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2422     return false;
2423   }
2424
2425   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2426   if (SrcTy.getSizeInBits() == 128)
2427     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2428
2429   if (SwapOperands)
2430     std::swap(SrcReg, Src2Reg);
2431
2432   MachineIRBuilder MIB(I);
2433   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2434   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2435
2436   // Invert if we had a 'ne' cc.
2437   if (NotOpc) {
2438     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2439     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2440   } else {
2441     MIB.buildCopy(DstReg, Cmp.getReg(0));
2442   }
2443   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2444   I.eraseFromParent();
2445   return true;
2446 }
2447
2448 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2449     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2450     MachineIRBuilder &MIRBuilder) const {
2451   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2452
2453   auto BuildFn = [&](unsigned SubregIndex) {
2454     auto Ins =
2455         MIRBuilder
2456             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2457             .addImm(SubregIndex);
2458     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2459     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2460     return &*Ins;
2461   };
2462
2463   switch (EltSize) {
2464   case 16:
2465     return BuildFn(AArch64::hsub);
2466   case 32:
2467     return BuildFn(AArch64::ssub);
2468   case 64:
2469     return BuildFn(AArch64::dsub);
2470   default:
2471     return nullptr;
2472   }
2473 }
2474
2475 bool AArch64InstructionSelector::selectMergeValues(
2476     MachineInstr &I, MachineRegisterInfo &MRI) const {
2477   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2478   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2479   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2480   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2481
2482   // At the moment we only support merging two s32s into an s64.
2483   if (I.getNumOperands() != 3)
2484     return false;
2485   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2486     return false;
2487   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2488   if (RB.getID() != AArch64::GPRRegBankID)
2489     return false;
2490
2491   auto *DstRC = &AArch64::GPR64RegClass;
2492   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2493   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2494                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2495                                 .addDef(SubToRegDef)
2496                                 .addImm(0)
2497                                 .addUse(I.getOperand(1).getReg())
2498                                 .addImm(AArch64::sub_32);
2499   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2500   // Need to anyext the second scalar before we can use bfm
2501   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2502                                     TII.get(TargetOpcode::SUBREG_TO_REG))
2503                                 .addDef(SubToRegDef2)
2504                                 .addImm(0)
2505                                 .addUse(I.getOperand(2).getReg())
2506                                 .addImm(AArch64::sub_32);
2507   MachineInstr &BFM =
2508       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2509            .addDef(I.getOperand(0).getReg())
2510            .addUse(SubToRegDef)
2511            .addUse(SubToRegDef2)
2512            .addImm(32)
2513            .addImm(31);
2514   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2515   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2516   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2517   I.eraseFromParent();
2518   return true;
2519 }
2520
2521 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2522                               const unsigned EltSize) {
2523   // Choose a lane copy opcode and subregister based off of the size of the
2524   // vector's elements.
2525   switch (EltSize) {
2526   case 16:
2527     CopyOpc = AArch64::CPYi16;
2528     ExtractSubReg = AArch64::hsub;
2529     break;
2530   case 32:
2531     CopyOpc = AArch64::CPYi32;
2532     ExtractSubReg = AArch64::ssub;
2533     break;
2534   case 64:
2535     CopyOpc = AArch64::CPYi64;
2536     ExtractSubReg = AArch64::dsub;
2537     break;
2538   default:
2539     // Unknown size, bail out.
2540     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2541     return false;
2542   }
2543   return true;
2544 }
2545
2546 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2547     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2548     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2549   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2550   unsigned CopyOpc = 0;
2551   unsigned ExtractSubReg = 0;
2552   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2553     LLVM_DEBUG(
2554         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2555     return nullptr;
2556   }
2557
2558   const TargetRegisterClass *DstRC =
2559       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2560   if (!DstRC) {
2561     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2562     return nullptr;
2563   }
2564
2565   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2566   const LLT &VecTy = MRI.getType(VecReg);
2567   const TargetRegisterClass *VecRC =
2568       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2569   if (!VecRC) {
2570     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2571     return nullptr;
2572   }
2573
2574   // The register that we're going to copy into.
2575   Register InsertReg = VecReg;
2576   if (!DstReg)
2577     DstReg = MRI.createVirtualRegister(DstRC);
2578   // If the lane index is 0, we just use a subregister COPY.
2579   if (LaneIdx == 0) {
2580     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2581                     .addReg(VecReg, 0, ExtractSubReg);
2582     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2583     return &*Copy;
2584   }
2585
2586   // Lane copies require 128-bit wide registers. If we're dealing with an
2587   // unpacked vector, then we need to move up to that width. Insert an implicit
2588   // def and a subregister insert to get us there.
2589   if (VecTy.getSizeInBits() != 128) {
2590     MachineInstr *ScalarToVector = emitScalarToVector(
2591         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2592     if (!ScalarToVector)
2593       return nullptr;
2594     InsertReg = ScalarToVector->getOperand(0).getReg();
2595   }
2596
2597   MachineInstr *LaneCopyMI =
2598       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2599   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2600
2601   // Make sure that we actually constrain the initial copy.
2602   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2603   return LaneCopyMI;
2604 }
2605
2606 bool AArch64InstructionSelector::selectExtractElt(
2607     MachineInstr &I, MachineRegisterInfo &MRI) const {
2608   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2609          "unexpected opcode!");
2610   Register DstReg = I.getOperand(0).getReg();
2611   const LLT NarrowTy = MRI.getType(DstReg);
2612   const Register SrcReg = I.getOperand(1).getReg();
2613   const LLT WideTy = MRI.getType(SrcReg);
2614   (void)WideTy;
2615   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2616          "source register size too small!");
2617   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2618
2619   // Need the lane index to determine the correct copy opcode.
2620   MachineOperand &LaneIdxOp = I.getOperand(2);
2621   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2622
2623   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2624     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2625     return false;
2626   }
2627
2628   // Find the index to extract from.
2629   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2630   if (!VRegAndVal)
2631     return false;
2632   unsigned LaneIdx = VRegAndVal->Value;
2633
2634   MachineIRBuilder MIRBuilder(I);
2635
2636   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2637   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2638                                                LaneIdx, MIRBuilder);
2639   if (!Extract)
2640     return false;
2641
2642   I.eraseFromParent();
2643   return true;
2644 }
2645
2646 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2647     MachineInstr &I, MachineRegisterInfo &MRI) const {
2648   unsigned NumElts = I.getNumOperands() - 1;
2649   Register SrcReg = I.getOperand(NumElts).getReg();
2650   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2651   const LLT SrcTy = MRI.getType(SrcReg);
2652
2653   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2654   if (SrcTy.getSizeInBits() > 128) {
2655     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2656     return false;
2657   }
2658
2659   MachineIRBuilder MIB(I);
2660
2661   // We implement a split vector operation by treating the sub-vectors as
2662   // scalars and extracting them.
2663   const RegisterBank &DstRB =
2664       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2665   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2666     Register Dst = I.getOperand(OpIdx).getReg();
2667     MachineInstr *Extract =
2668         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2669     if (!Extract)
2670       return false;
2671   }
2672   I.eraseFromParent();
2673   return true;
2674 }
2675
2676 bool AArch64InstructionSelector::selectUnmergeValues(
2677     MachineInstr &I, MachineRegisterInfo &MRI) const {
2678   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2679          "unexpected opcode");
2680
2681   // TODO: Handle unmerging into GPRs and from scalars to scalars.
2682   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2683           AArch64::FPRRegBankID ||
2684       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2685           AArch64::FPRRegBankID) {
2686     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2687                          "currently unsupported.\n");
2688     return false;
2689   }
2690
2691   // The last operand is the vector source register, and every other operand is
2692   // a register to unpack into.
2693   unsigned NumElts = I.getNumOperands() - 1;
2694   Register SrcReg = I.getOperand(NumElts).getReg();
2695   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2696   const LLT WideTy = MRI.getType(SrcReg);
2697   (void)WideTy;
2698   assert(WideTy.isVector() && "can only unmerge from vector types!");
2699   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2700          "source register size too small!");
2701
2702   if (!NarrowTy.isScalar())
2703     return selectSplitVectorUnmerge(I, MRI);
2704
2705   MachineIRBuilder MIB(I);
2706
2707   // Choose a lane copy opcode and subregister based off of the size of the
2708   // vector's elements.
2709   unsigned CopyOpc = 0;
2710   unsigned ExtractSubReg = 0;
2711   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2712     return false;
2713
2714   // Set up for the lane copies.
2715   MachineBasicBlock &MBB = *I.getParent();
2716
2717   // Stores the registers we'll be copying from.
2718   SmallVector<Register, 4> InsertRegs;
2719
2720   // We'll use the first register twice, so we only need NumElts-1 registers.
2721   unsigned NumInsertRegs = NumElts - 1;
2722
2723   // If our elements fit into exactly 128 bits, then we can copy from the source
2724   // directly. Otherwise, we need to do a bit of setup with some subregister
2725   // inserts.
2726   if (NarrowTy.getSizeInBits() * NumElts == 128) {
2727     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2728   } else {
2729     // No. We have to perform subregister inserts. For each insert, create an
2730     // implicit def and a subregister insert, and save the register we create.
2731     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2732       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2733       MachineInstr &ImpDefMI =
2734           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2735                    ImpDefReg);
2736
2737       // Now, create the subregister insert from SrcReg.
2738       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2739       MachineInstr &InsMI =
2740           *BuildMI(MBB, I, I.getDebugLoc(),
2741                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2742                .addUse(ImpDefReg)
2743                .addUse(SrcReg)
2744                .addImm(AArch64::dsub);
2745
2746       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2747       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2748
2749       // Save the register so that we can copy from it after.
2750       InsertRegs.push_back(InsertReg);
2751     }
2752   }
2753
2754   // Now that we've created any necessary subregister inserts, we can
2755   // create the copies.
2756   //
2757   // Perform the first copy separately as a subregister copy.
2758   Register CopyTo = I.getOperand(0).getReg();
2759   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2760                        .addReg(InsertRegs[0], 0, ExtractSubReg);
2761   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2762
2763   // Now, perform the remaining copies as vector lane copies.
2764   unsigned LaneIdx = 1;
2765   for (Register InsReg : InsertRegs) {
2766     Register CopyTo = I.getOperand(LaneIdx).getReg();
2767     MachineInstr &CopyInst =
2768         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2769              .addUse(InsReg)
2770              .addImm(LaneIdx);
2771     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2772     ++LaneIdx;
2773   }
2774
2775   // Separately constrain the first copy's destination. Because of the
2776   // limitation in constrainOperandRegClass, we can't guarantee that this will
2777   // actually be constrained. So, do it ourselves using the second operand.
2778   const TargetRegisterClass *RC =
2779       MRI.getRegClassOrNull(I.getOperand(1).getReg());
2780   if (!RC) {
2781     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2782     return false;
2783   }
2784
2785   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2786   I.eraseFromParent();
2787   return true;
2788 }
2789
2790 bool AArch64InstructionSelector::selectConcatVectors(
2791     MachineInstr &I, MachineRegisterInfo &MRI) const {
2792   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2793          "Unexpected opcode");
2794   Register Dst = I.getOperand(0).getReg();
2795   Register Op1 = I.getOperand(1).getReg();
2796   Register Op2 = I.getOperand(2).getReg();
2797   MachineIRBuilder MIRBuilder(I);
2798   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2799   if (!ConcatMI)
2800     return false;
2801   I.eraseFromParent();
2802   return true;
2803 }
2804
2805 void AArch64InstructionSelector::collectShuffleMaskIndices(
2806     MachineInstr &I, MachineRegisterInfo &MRI,
2807     SmallVectorImpl<Optional<int>> &Idxs) const {
2808   MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2809   assert(
2810       MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2811       "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2812   // Find the constant indices.
2813   for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2814     // Look through copies.
2815     MachineInstr *ScalarDef =
2816         getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
2817     assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2818     if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2819       // This be an undef if not a constant.
2820       assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2821       Idxs.push_back(None);
2822     } else {
2823       Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2824     }
2825   }
2826 }
2827
2828 unsigned
2829 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2830                                                   MachineFunction &MF) const {
2831   Type *CPTy = CPVal->getType();
2832   unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2833   if (Align == 0)
2834     Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2835
2836   MachineConstantPool *MCP = MF.getConstantPool();
2837   return MCP->getConstantPoolIndex(CPVal, Align);
2838 }
2839
2840 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2841     Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2842   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2843
2844   auto Adrp =
2845       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2846           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
2847
2848   MachineInstr *LoadMI = nullptr;
2849   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2850   case 16:
2851     LoadMI =
2852         &*MIRBuilder
2853               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2854               .addConstantPoolIndex(CPIdx, 0,
2855                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2856     break;
2857   case 8:
2858     LoadMI = &*MIRBuilder
2859                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2860                  .addConstantPoolIndex(
2861                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2862     break;
2863   default:
2864     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2865                       << *CPVal->getType());
2866     return nullptr;
2867   }
2868   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
2869   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2870   return LoadMI;
2871 }
2872
2873 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2874 /// size and RB.
2875 static std::pair<unsigned, unsigned>
2876 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2877   unsigned Opc, SubregIdx;
2878   if (RB.getID() == AArch64::GPRRegBankID) {
2879     if (EltSize == 32) {
2880       Opc = AArch64::INSvi32gpr;
2881       SubregIdx = AArch64::ssub;
2882     } else if (EltSize == 64) {
2883       Opc = AArch64::INSvi64gpr;
2884       SubregIdx = AArch64::dsub;
2885     } else {
2886       llvm_unreachable("invalid elt size!");
2887     }
2888   } else {
2889     if (EltSize == 8) {
2890       Opc = AArch64::INSvi8lane;
2891       SubregIdx = AArch64::bsub;
2892     } else if (EltSize == 16) {
2893       Opc = AArch64::INSvi16lane;
2894       SubregIdx = AArch64::hsub;
2895     } else if (EltSize == 32) {
2896       Opc = AArch64::INSvi32lane;
2897       SubregIdx = AArch64::ssub;
2898     } else if (EltSize == 64) {
2899       Opc = AArch64::INSvi64lane;
2900       SubregIdx = AArch64::dsub;
2901     } else {
2902       llvm_unreachable("invalid elt size!");
2903     }
2904   }
2905   return std::make_pair(Opc, SubregIdx);
2906 }
2907
2908 MachineInstr *
2909 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
2910                                     MachineIRBuilder &MIRBuilder) const {
2911   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
2912   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2913   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
2914                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
2915   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
2916   auto ImmFns = selectArithImmed(RHS);
2917   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
2918   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
2919
2920   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
2921
2922   // If we matched a valid constant immediate, add those operands.
2923   if (ImmFns) {
2924     for (auto &RenderFn : *ImmFns)
2925       RenderFn(CmpMI);
2926   } else {
2927     CmpMI.addUse(RHS.getReg());
2928   }
2929
2930   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2931   return &*CmpMI;
2932 }
2933
2934 MachineInstr *
2935 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
2936                                     MachineIRBuilder &MIRBuilder) const {
2937   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2938   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
2939   bool Is32Bit = (RegSize == 32);
2940   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
2941                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
2942   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
2943
2944   // We might be able to fold in an immediate into the TST. We need to make sure
2945   // it's a logical immediate though, since ANDS requires that.
2946   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
2947   bool IsImmForm = ValAndVReg.hasValue() &&
2948                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
2949   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
2950   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
2951
2952   if (IsImmForm)
2953     TstMI.addImm(
2954         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
2955   else
2956     TstMI.addUse(RHS);
2957
2958   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
2959   return &*TstMI;
2960 }
2961
2962 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
2963     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
2964     MachineIRBuilder &MIRBuilder) const {
2965   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
2966   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2967
2968   // Fold the compare if possible.
2969   MachineInstr *FoldCmp =
2970       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
2971   if (FoldCmp)
2972     return FoldCmp;
2973
2974   // Can't fold into a CMN. Just emit a normal compare.
2975   unsigned CmpOpc = 0;
2976   Register ZReg;
2977
2978   LLT CmpTy = MRI.getType(LHS.getReg());
2979   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
2980          "Expected scalar or pointer");
2981   if (CmpTy == LLT::scalar(32)) {
2982     CmpOpc = AArch64::SUBSWrr;
2983     ZReg = AArch64::WZR;
2984   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
2985     CmpOpc = AArch64::SUBSXrr;
2986     ZReg = AArch64::XZR;
2987   } else {
2988     return nullptr;
2989   }
2990
2991   // Try to match immediate forms.
2992   auto ImmFns = selectArithImmed(RHS);
2993   if (ImmFns)
2994     CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
2995
2996   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
2997   // If we matched a valid constant immediate, add those operands.
2998   if (ImmFns) {
2999     for (auto &RenderFn : *ImmFns)
3000       RenderFn(CmpMI);
3001   } else {
3002     CmpMI.addUse(RHS.getReg());
3003   }
3004
3005   // Make sure that we can constrain the compare that we emitted.
3006   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3007   return &*CmpMI;
3008 }
3009
3010 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3011     Optional<Register> Dst, Register Op1, Register Op2,
3012     MachineIRBuilder &MIRBuilder) const {
3013   // We implement a vector concat by:
3014   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3015   // 2. Insert the upper vector into the destination's upper element
3016   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3017   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3018
3019   const LLT Op1Ty = MRI.getType(Op1);
3020   const LLT Op2Ty = MRI.getType(Op2);
3021
3022   if (Op1Ty != Op2Ty) {
3023     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3024     return nullptr;
3025   }
3026   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3027
3028   if (Op1Ty.getSizeInBits() >= 128) {
3029     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3030     return nullptr;
3031   }
3032
3033   // At the moment we just support 64 bit vector concats.
3034   if (Op1Ty.getSizeInBits() != 64) {
3035     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3036     return nullptr;
3037   }
3038
3039   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3040   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3041   const TargetRegisterClass *DstRC =
3042       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3043
3044   MachineInstr *WidenedOp1 =
3045       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3046   MachineInstr *WidenedOp2 =
3047       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3048   if (!WidenedOp1 || !WidenedOp2) {
3049     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3050     return nullptr;
3051   }
3052
3053   // Now do the insert of the upper element.
3054   unsigned InsertOpc, InsSubRegIdx;
3055   std::tie(InsertOpc, InsSubRegIdx) =
3056       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3057
3058   if (!Dst)
3059     Dst = MRI.createVirtualRegister(DstRC);
3060   auto InsElt =
3061       MIRBuilder
3062           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3063           .addImm(1) /* Lane index */
3064           .addUse(WidenedOp2->getOperand(0).getReg())
3065           .addImm(0);
3066   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3067   return &*InsElt;
3068 }
3069
3070 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3071     MachineInstr &I, MachineRegisterInfo &MRI) const {
3072   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3073          "Expected a G_FCONSTANT!");
3074   MachineOperand &ImmOp = I.getOperand(1);
3075   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3076
3077   // Only handle 32 and 64 bit defs for now.
3078   if (DefSize != 32 && DefSize != 64)
3079     return nullptr;
3080
3081   // Don't handle null values using FMOV.
3082   if (ImmOp.getFPImm()->isNullValue())
3083     return nullptr;
3084
3085   // Get the immediate representation for the FMOV.
3086   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3087   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3088                           : AArch64_AM::getFP64Imm(ImmValAPF);
3089
3090   // If this is -1, it means the immediate can't be represented as the requested
3091   // floating point value. Bail.
3092   if (Imm == -1)
3093     return nullptr;
3094
3095   // Update MI to represent the new FMOV instruction, constrain it, and return.
3096   ImmOp.ChangeToImmediate(Imm);
3097   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3098   I.setDesc(TII.get(MovOpc));
3099   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3100   return &I;
3101 }
3102
3103 MachineInstr *
3104 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3105                                      MachineIRBuilder &MIRBuilder) const {
3106   // CSINC increments the result when the predicate is false. Invert it.
3107   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3108       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3109   auto I =
3110       MIRBuilder
3111     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3112           .addImm(InvCC);
3113   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3114   return &*I;
3115 }
3116
3117 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3118   MachineIRBuilder MIB(I);
3119   MachineRegisterInfo &MRI = *MIB.getMRI();
3120   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3121
3122   // We want to recognize this pattern:
3123   //
3124   // $z = G_FCMP pred, $x, $y
3125   // ...
3126   // $w = G_SELECT $z, $a, $b
3127   //
3128   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3129   // some copies/truncs in between.)
3130   //
3131   // If we see this, then we can emit something like this:
3132   //
3133   // fcmp $x, $y
3134   // fcsel $w, $a, $b, pred
3135   //
3136   // Rather than emitting both of the rather long sequences in the standard
3137   // G_FCMP/G_SELECT select methods.
3138
3139   // First, check if the condition is defined by a compare.
3140   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3141   while (CondDef) {
3142     // We can only fold if all of the defs have one use.
3143     if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3144       return false;
3145
3146     // We can skip over G_TRUNC since the condition is 1-bit.
3147     // Truncating/extending can have no impact on the value.
3148     unsigned Opc = CondDef->getOpcode();
3149     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3150       break;
3151
3152     // Can't see past copies from physregs.
3153     if (Opc == TargetOpcode::COPY &&
3154         TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3155       return false;
3156
3157     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3158   }
3159
3160   // Is the condition defined by a compare?
3161   if (!CondDef)
3162     return false;
3163
3164   unsigned CondOpc = CondDef->getOpcode();
3165   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3166     return false;
3167
3168   AArch64CC::CondCode CondCode;
3169   if (CondOpc == TargetOpcode::G_ICMP) {
3170     CondCode = changeICMPPredToAArch64CC(
3171         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3172     if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3173                             CondDef->getOperand(1), MIB)) {
3174       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3175       return false;
3176     }
3177   } else {
3178     // Get the condition code for the select.
3179     AArch64CC::CondCode CondCode2;
3180     changeFCMPPredToAArch64CC(
3181         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3182         CondCode2);
3183
3184     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3185     // instructions to emit the comparison.
3186     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3187     // unnecessary.
3188     if (CondCode2 != AArch64CC::AL)
3189       return false;
3190
3191     // Make sure we'll be able to select the compare.
3192     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3193     if (!CmpOpc)
3194       return false;
3195
3196     // Emit a new compare.
3197     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3198     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3199       Cmp.addUse(CondDef->getOperand(3).getReg());
3200     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3201   }
3202
3203   // Emit the select.
3204   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3205   auto CSel =
3206       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3207                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3208           .addImm(CondCode);
3209   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3210   I.eraseFromParent();
3211   return true;
3212 }
3213
3214 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3215     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3216     MachineIRBuilder &MIRBuilder) const {
3217   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3218          "Unexpected MachineOperand");
3219   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3220   // We want to find this sort of thing:
3221   // x = G_SUB 0, y
3222   // G_ICMP z, x
3223   //
3224   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3225   // e.g:
3226   //
3227   // cmn z, y
3228
3229   // Helper lambda to detect the subtract followed by the compare.
3230   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3231   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3232     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3233       return false;
3234
3235     // Need to make sure NZCV is the same at the end of the transformation.
3236     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3237       return false;
3238
3239     // We want to match against SUBs.
3240     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3241       return false;
3242
3243     // Make sure that we're getting
3244     // x = G_SUB 0, y
3245     auto ValAndVReg =
3246         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3247     if (!ValAndVReg || ValAndVReg->Value != 0)
3248       return false;
3249
3250     // This can safely be represented as a CMN.
3251     return true;
3252   };
3253
3254   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3255   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3256   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3257   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3258   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
3259
3260   // Given this:
3261   //
3262   // x = G_SUB 0, y
3263   // G_ICMP x, z
3264   //
3265   // Produce this:
3266   //
3267   // cmn y, z
3268   if (IsCMN(LHSDef, CC))
3269     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3270
3271   // Same idea here, but with the RHS of the compare instead:
3272   //
3273   // Given this:
3274   //
3275   // x = G_SUB 0, y
3276   // G_ICMP z, x
3277   //
3278   // Produce this:
3279   //
3280   // cmn z, y
3281   if (IsCMN(RHSDef, CC))
3282     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3283
3284   // Given this:
3285   //
3286   // z = G_AND x, y
3287   // G_ICMP z, 0
3288   //
3289   // Produce this if the compare is signed:
3290   //
3291   // tst x, y
3292   if (!isUnsignedICMPPred(P) && LHSDef &&
3293       LHSDef->getOpcode() == TargetOpcode::G_AND) {
3294     // Make sure that the RHS is 0.
3295     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3296     if (!ValAndVReg || ValAndVReg->Value != 0)
3297       return nullptr;
3298
3299     return emitTST(LHSDef->getOperand(1).getReg(),
3300                    LHSDef->getOperand(2).getReg(), MIRBuilder);
3301   }
3302
3303   return nullptr;
3304 }
3305
3306 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3307   // Try to match a vector splat operation into a dup instruction.
3308   // We're looking for this pattern:
3309   //    %scalar:gpr(s64) = COPY $x0
3310   //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3311   //    %cst0:gpr(s32) = G_CONSTANT i32 0
3312   //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3313   //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3314   //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3315   //                                             %zerovec(<2 x s32>)
3316   //
3317   // ...into:
3318   // %splat = DUP %scalar
3319   // We use the regbank of the scalar to determine which kind of dup to use.
3320   MachineIRBuilder MIB(I);
3321   MachineRegisterInfo &MRI = *MIB.getMRI();
3322   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3323   using namespace TargetOpcode;
3324   using namespace MIPatternMatch;
3325
3326   // Begin matching the insert.
3327   auto *InsMI =
3328       getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3329   if (!InsMI)
3330     return false;
3331   // Match the undef vector operand.
3332   auto *UndefMI =
3333       getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3334   if (!UndefMI)
3335     return false;
3336   // Match the scalar being splatted.
3337   Register ScalarReg = InsMI->getOperand(2).getReg();
3338   const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3339   // Match the index constant 0.
3340   int64_t Index = 0;
3341   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3342     return false;
3343
3344   // The shuffle's second operand doesn't matter if the mask is all zero.
3345   auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
3346   if (!ZeroVec)
3347     return false;
3348   int64_t Zero = 0;
3349   if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3350     return false;
3351   for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
3352     if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3353       return false; // This wasn't an all zeros vector.
3354   }
3355
3356   // We're done, now find out what kind of splat we need.
3357   LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3358   LLT EltTy = VecTy.getElementType();
3359   if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3360     LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3361     return false;
3362   }
3363   bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3364   static const unsigned OpcTable[2][2] = {
3365       {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3366       {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3367   unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3368
3369   // For FP splats, we need to widen the scalar reg via undef too.
3370   if (IsFP) {
3371     MachineInstr *Widen = emitScalarToVector(
3372         EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3373     if (!Widen)
3374       return false;
3375     ScalarReg = Widen->getOperand(0).getReg();
3376   }
3377   auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3378   if (IsFP)
3379     Dup.addImm(0);
3380   constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3381   I.eraseFromParent();
3382   return true;
3383 }
3384
3385 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3386   if (TM.getOptLevel() == CodeGenOpt::None)
3387     return false;
3388   if (tryOptVectorDup(I))
3389     return true;
3390   return false;
3391 }
3392
3393 bool AArch64InstructionSelector::selectShuffleVector(
3394     MachineInstr &I, MachineRegisterInfo &MRI) const {
3395   if (tryOptVectorShuffle(I))
3396     return true;
3397   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3398   Register Src1Reg = I.getOperand(1).getReg();
3399   const LLT Src1Ty = MRI.getType(Src1Reg);
3400   Register Src2Reg = I.getOperand(2).getReg();
3401   const LLT Src2Ty = MRI.getType(Src2Reg);
3402
3403   MachineBasicBlock &MBB = *I.getParent();
3404   MachineFunction &MF = *MBB.getParent();
3405   LLVMContext &Ctx = MF.getFunction().getContext();
3406
3407   // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3408   // operand, it comes in as a normal vector value which we have to analyze to
3409   // find the mask indices. If the mask element is undef, then
3410   // collectShuffleMaskIndices() will add a None entry for that index into
3411   // the list.
3412   SmallVector<Optional<int>, 8> Mask;
3413   collectShuffleMaskIndices(I, MRI, Mask);
3414   assert(!Mask.empty() && "Expected to find mask indices");
3415
3416   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3417   // it's originated from a <1 x T> type. Those should have been lowered into
3418   // G_BUILD_VECTOR earlier.
3419   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3420     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3421     return false;
3422   }
3423
3424   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3425
3426   SmallVector<Constant *, 64> CstIdxs;
3427   for (auto &MaybeVal : Mask) {
3428     // For now, any undef indexes we'll just assume to be 0. This should be
3429     // optimized in future, e.g. to select DUP etc.
3430     int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
3431     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3432       unsigned Offset = Byte + Val * BytesPerElt;
3433       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3434     }
3435   }
3436
3437   MachineIRBuilder MIRBuilder(I);
3438
3439   // Use a constant pool to load the index vector for TBL.
3440   Constant *CPVal = ConstantVector::get(CstIdxs);
3441   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3442   if (!IndexLoad) {
3443     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3444     return false;
3445   }
3446
3447   if (DstTy.getSizeInBits() != 128) {
3448     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3449     // This case can be done with TBL1.
3450     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3451     if (!Concat) {
3452       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3453       return false;
3454     }
3455
3456     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3457     IndexLoad =
3458         emitScalarToVector(64, &AArch64::FPR128RegClass,
3459                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
3460
3461     auto TBL1 = MIRBuilder.buildInstr(
3462         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3463         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3464     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3465
3466     auto Copy =
3467         MIRBuilder
3468             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3469             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3470     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3471     I.eraseFromParent();
3472     return true;
3473   }
3474
3475   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3476   // Q registers for regalloc.
3477   auto RegSeq = MIRBuilder
3478                     .buildInstr(TargetOpcode::REG_SEQUENCE,
3479                                 {&AArch64::QQRegClass}, {Src1Reg})
3480                     .addImm(AArch64::qsub0)
3481                     .addUse(Src2Reg)
3482                     .addImm(AArch64::qsub1);
3483
3484   auto TBL2 =
3485       MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3486                             {RegSeq, IndexLoad->getOperand(0).getReg()});
3487   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3488   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3489   I.eraseFromParent();
3490   return true;
3491 }
3492
3493 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3494     Optional<Register> DstReg, Register SrcReg, Register EltReg,
3495     unsigned LaneIdx, const RegisterBank &RB,
3496     MachineIRBuilder &MIRBuilder) const {
3497   MachineInstr *InsElt = nullptr;
3498   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3499   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3500
3501   // Create a register to define with the insert if one wasn't passed in.
3502   if (!DstReg)
3503     DstReg = MRI.createVirtualRegister(DstRC);
3504
3505   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3506   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3507
3508   if (RB.getID() == AArch64::FPRRegBankID) {
3509     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3510     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3511                  .addImm(LaneIdx)
3512                  .addUse(InsSub->getOperand(0).getReg())
3513                  .addImm(0);
3514   } else {
3515     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3516                  .addImm(LaneIdx)
3517                  .addUse(EltReg);
3518   }
3519
3520   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3521   return InsElt;
3522 }
3523
3524 bool AArch64InstructionSelector::selectInsertElt(
3525     MachineInstr &I, MachineRegisterInfo &MRI) const {
3526   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3527
3528   // Get information on the destination.
3529   Register DstReg = I.getOperand(0).getReg();
3530   const LLT DstTy = MRI.getType(DstReg);
3531   unsigned VecSize = DstTy.getSizeInBits();
3532
3533   // Get information on the element we want to insert into the destination.
3534   Register EltReg = I.getOperand(2).getReg();
3535   const LLT EltTy = MRI.getType(EltReg);
3536   unsigned EltSize = EltTy.getSizeInBits();
3537   if (EltSize < 16 || EltSize > 64)
3538     return false; // Don't support all element types yet.
3539
3540   // Find the definition of the index. Bail out if it's not defined by a
3541   // G_CONSTANT.
3542   Register IdxReg = I.getOperand(3).getReg();
3543   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3544   if (!VRegAndVal)
3545     return false;
3546   unsigned LaneIdx = VRegAndVal->Value;
3547
3548   // Perform the lane insert.
3549   Register SrcReg = I.getOperand(1).getReg();
3550   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3551   MachineIRBuilder MIRBuilder(I);
3552
3553   if (VecSize < 128) {
3554     // If the vector we're inserting into is smaller than 128 bits, widen it
3555     // to 128 to do the insert.
3556     MachineInstr *ScalarToVec = emitScalarToVector(
3557         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3558     if (!ScalarToVec)
3559       return false;
3560     SrcReg = ScalarToVec->getOperand(0).getReg();
3561   }
3562
3563   // Create an insert into a new FPR128 register.
3564   // Note that if our vector is already 128 bits, we end up emitting an extra
3565   // register.
3566   MachineInstr *InsMI =
3567       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3568
3569   if (VecSize < 128) {
3570     // If we had to widen to perform the insert, then we have to demote back to
3571     // the original size to get the result we want.
3572     Register DemoteVec = InsMI->getOperand(0).getReg();
3573     const TargetRegisterClass *RC =
3574         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3575     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3576       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3577       return false;
3578     }
3579     unsigned SubReg = 0;
3580     if (!getSubRegForClass(RC, TRI, SubReg))
3581       return false;
3582     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3583       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3584                         << "\n");
3585       return false;
3586     }
3587     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3588         .addReg(DemoteVec, 0, SubReg);
3589     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3590   } else {
3591     // No widening needed.
3592     InsMI->getOperand(0).setReg(DstReg);
3593     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3594   }
3595
3596   I.eraseFromParent();
3597   return true;
3598 }
3599
3600 bool AArch64InstructionSelector::selectBuildVector(
3601     MachineInstr &I, MachineRegisterInfo &MRI) const {
3602   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3603   // Until we port more of the optimized selections, for now just use a vector
3604   // insert sequence.
3605   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3606   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3607   unsigned EltSize = EltTy.getSizeInBits();
3608   if (EltSize < 16 || EltSize > 64)
3609     return false; // Don't support all element types yet.
3610   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3611   MachineIRBuilder MIRBuilder(I);
3612
3613   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3614   MachineInstr *ScalarToVec =
3615       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3616                          I.getOperand(1).getReg(), MIRBuilder);
3617   if (!ScalarToVec)
3618     return false;
3619
3620   Register DstVec = ScalarToVec->getOperand(0).getReg();
3621   unsigned DstSize = DstTy.getSizeInBits();
3622
3623   // Keep track of the last MI we inserted. Later on, we might be able to save
3624   // a copy using it.
3625   MachineInstr *PrevMI = nullptr;
3626   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3627     // Note that if we don't do a subregister copy, we can end up making an
3628     // extra register.
3629     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3630                               MIRBuilder);
3631     DstVec = PrevMI->getOperand(0).getReg();
3632   }
3633
3634   // If DstTy's size in bits is less than 128, then emit a subregister copy
3635   // from DstVec to the last register we've defined.
3636   if (DstSize < 128) {
3637     // Force this to be FPR using the destination vector.
3638     const TargetRegisterClass *RC =
3639         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3640     if (!RC)
3641       return false;
3642     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3643       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3644       return false;
3645     }
3646
3647     unsigned SubReg = 0;
3648     if (!getSubRegForClass(RC, TRI, SubReg))
3649       return false;
3650     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3651       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3652                         << "\n");
3653       return false;
3654     }
3655
3656     Register Reg = MRI.createVirtualRegister(RC);
3657     Register DstReg = I.getOperand(0).getReg();
3658
3659     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3660         .addReg(DstVec, 0, SubReg);
3661     MachineOperand &RegOp = I.getOperand(1);
3662     RegOp.setReg(Reg);
3663     RBI.constrainGenericRegister(DstReg, *RC, MRI);
3664   } else {
3665     // We don't need a subregister copy. Save a copy by re-using the
3666     // destination register on the final insert.
3667     assert(PrevMI && "PrevMI was null?");
3668     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3669     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3670   }
3671
3672   I.eraseFromParent();
3673   return true;
3674 }
3675
3676 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3677 /// ID if it exists, and 0 otherwise.
3678 static unsigned findIntrinsicID(MachineInstr &I) {
3679   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3680     return Op.isIntrinsicID();
3681   });
3682   if (IntrinOp == I.operands_end())
3683     return 0;
3684   return IntrinOp->getIntrinsicID();
3685 }
3686
3687 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3688 /// intrinsic.
3689 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3690   switch (NumBytesToStore) {
3691   // TODO: 1, 2, and 4 byte stores.
3692   case 8:
3693     return AArch64::STLXRX;
3694   default:
3695     LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3696                       << NumBytesToStore << ")\n");
3697     break;
3698   }
3699   return 0;
3700 }
3701
3702 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3703     MachineInstr &I, MachineRegisterInfo &MRI) const {
3704   // Find the intrinsic ID.
3705   unsigned IntrinID = findIntrinsicID(I);
3706   if (!IntrinID)
3707     return false;
3708   MachineIRBuilder MIRBuilder(I);
3709
3710   // Select the instruction.
3711   switch (IntrinID) {
3712   default:
3713     return false;
3714   case Intrinsic::trap:
3715     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3716     break;
3717   case Intrinsic::debugtrap:
3718     if (!STI.isTargetWindows())
3719       return false;
3720     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3721     break;
3722   case Intrinsic::aarch64_stlxr:
3723     Register StatReg = I.getOperand(0).getReg();
3724     assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3725            "Status register must be 32 bits!");
3726     Register SrcReg = I.getOperand(2).getReg();
3727
3728     if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3729       LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3730       return false;
3731     }
3732
3733     Register PtrReg = I.getOperand(3).getReg();
3734     assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3735
3736     // Expect only one memory operand.
3737     if (!I.hasOneMemOperand())
3738       return false;
3739
3740     const MachineMemOperand *MemOp = *I.memoperands_begin();
3741     unsigned NumBytesToStore = MemOp->getSize();
3742     unsigned Opc = getStlxrOpcode(NumBytesToStore);
3743     if (!Opc)
3744       return false;
3745
3746     auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3747     constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3748   }
3749
3750   I.eraseFromParent();
3751   return true;
3752 }
3753
3754 bool AArch64InstructionSelector::selectIntrinsic(
3755     MachineInstr &I, MachineRegisterInfo &MRI) const {
3756   unsigned IntrinID = findIntrinsicID(I);
3757   if (!IntrinID)
3758     return false;
3759   MachineIRBuilder MIRBuilder(I);
3760
3761   switch (IntrinID) {
3762   default:
3763     break;
3764   case Intrinsic::aarch64_crypto_sha1h:
3765     Register DstReg = I.getOperand(0).getReg();
3766     Register SrcReg = I.getOperand(2).getReg();
3767
3768     // FIXME: Should this be an assert?
3769     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3770         MRI.getType(SrcReg).getSizeInBits() != 32)
3771       return false;
3772
3773     // The operation has to happen on FPRs. Set up some new FPR registers for
3774     // the source and destination if they are on GPRs.
3775     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3776       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3777       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3778
3779       // Make sure the copy ends up getting constrained properly.
3780       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3781                                    AArch64::GPR32RegClass, MRI);
3782     }
3783
3784     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3785       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3786
3787     // Actually insert the instruction.
3788     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3789     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3790
3791     // Did we create a new register for the destination?
3792     if (DstReg != I.getOperand(0).getReg()) {
3793       // Yep. Copy the result of the instruction back into the original
3794       // destination.
3795       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3796       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3797                                    AArch64::GPR32RegClass, MRI);
3798     }
3799
3800     I.eraseFromParent();
3801     return true;
3802   }
3803   return false;
3804 }
3805
3806 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
3807   auto &MI = *Root.getParent();
3808   auto &MBB = *MI.getParent();
3809   auto &MF = *MBB.getParent();
3810   auto &MRI = MF.getRegInfo();
3811   uint64_t Immed;
3812   if (Root.isImm())
3813     Immed = Root.getImm();
3814   else if (Root.isCImm())
3815     Immed = Root.getCImm()->getZExtValue();
3816   else if (Root.isReg()) {
3817     auto ValAndVReg =
3818         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
3819     if (!ValAndVReg)
3820       return None;
3821     Immed = ValAndVReg->Value;
3822   } else
3823     return None;
3824   return Immed;
3825 }
3826
3827 InstructionSelector::ComplexRendererFns
3828 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
3829   auto MaybeImmed = getImmedFromMO(Root);
3830   if (MaybeImmed == None || *MaybeImmed > 31)
3831     return None;
3832   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
3833   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3834 }
3835
3836 InstructionSelector::ComplexRendererFns
3837 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
3838   auto MaybeImmed = getImmedFromMO(Root);
3839   if (MaybeImmed == None || *MaybeImmed > 31)
3840     return None;
3841   uint64_t Enc = 31 - *MaybeImmed;
3842   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3843 }
3844
3845 InstructionSelector::ComplexRendererFns
3846 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
3847   auto MaybeImmed = getImmedFromMO(Root);
3848   if (MaybeImmed == None || *MaybeImmed > 63)
3849     return None;
3850   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
3851   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3852 }
3853
3854 InstructionSelector::ComplexRendererFns
3855 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
3856   auto MaybeImmed = getImmedFromMO(Root);
3857   if (MaybeImmed == None || *MaybeImmed > 63)
3858     return None;
3859   uint64_t Enc = 63 - *MaybeImmed;
3860   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3861 }
3862
3863 /// SelectArithImmed - Select an immediate value that can be represented as
3864 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
3865 /// Val set to the 12-bit value and Shift set to the shifter operand.
3866 InstructionSelector::ComplexRendererFns
3867 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3868   // This function is called from the addsub_shifted_imm ComplexPattern,
3869   // which lists [imm] as the list of opcode it's interested in, however
3870   // we still need to check whether the operand is actually an immediate
3871   // here because the ComplexPattern opcode list is only used in
3872   // root-level opcode matching.
3873   auto MaybeImmed = getImmedFromMO(Root);
3874   if (MaybeImmed == None)
3875     return None;
3876   uint64_t Immed = *MaybeImmed;
3877   unsigned ShiftAmt;
3878
3879   if (Immed >> 12 == 0) {
3880     ShiftAmt = 0;
3881   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3882     ShiftAmt = 12;
3883     Immed = Immed >> 12;
3884   } else
3885     return None;
3886
3887   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
3888   return {{
3889       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3890       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3891   }};
3892 }
3893
3894 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
3895 /// should only match when there is an offset that is not valid for a scaled
3896 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
3897 /// memory reference, which is needed here to know what is valid for a scaled
3898 /// immediate.
3899 InstructionSelector::ComplexRendererFns
3900 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3901                                                    unsigned Size) const {
3902   MachineRegisterInfo &MRI =
3903       Root.getParent()->getParent()->getParent()->getRegInfo();
3904
3905   if (!Root.isReg())
3906     return None;
3907
3908   if (!isBaseWithConstantOffset(Root, MRI))
3909     return None;
3910
3911   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3912   if (!RootDef)
3913     return None;
3914
3915   MachineOperand &OffImm = RootDef->getOperand(2);
3916   if (!OffImm.isReg())
3917     return None;
3918   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3919   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3920     return None;
3921   int64_t RHSC;
3922   MachineOperand &RHSOp1 = RHS->getOperand(1);
3923   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3924     return None;
3925   RHSC = RHSOp1.getCImm()->getSExtValue();
3926
3927   // If the offset is valid as a scaled immediate, don't match here.
3928   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3929     return None;
3930   if (RHSC >= -256 && RHSC < 256) {
3931     MachineOperand &Base = RootDef->getOperand(1);
3932     return {{
3933         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3934         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3935     }};
3936   }
3937   return None;
3938 }
3939
3940 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
3941 /// "Size" argument is the size in bytes of the memory reference, which
3942 /// determines the scale.
3943 InstructionSelector::ComplexRendererFns
3944 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3945                                                   unsigned Size) const {
3946   MachineRegisterInfo &MRI =
3947       Root.getParent()->getParent()->getParent()->getRegInfo();
3948
3949   if (!Root.isReg())
3950     return None;
3951
3952   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3953   if (!RootDef)
3954     return None;
3955
3956   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3957     return {{
3958         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3959         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3960     }};
3961   }
3962
3963   if (isBaseWithConstantOffset(Root, MRI)) {
3964     MachineOperand &LHS = RootDef->getOperand(1);
3965     MachineOperand &RHS = RootDef->getOperand(2);
3966     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3967     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3968     if (LHSDef && RHSDef) {
3969       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3970       unsigned Scale = Log2_32(Size);
3971       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3972         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
3973           return {{
3974               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3975               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3976           }};
3977
3978         return {{
3979             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3980             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3981         }};
3982       }
3983     }
3984   }
3985
3986   // Before falling back to our general case, check if the unscaled
3987   // instructions can handle this. If so, that's preferable.
3988   if (selectAddrModeUnscaled(Root, Size).hasValue())
3989     return None;
3990
3991   return {{
3992       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3993       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3994   }};
3995 }
3996
3997 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3998                                                 const MachineInstr &MI) const {
3999   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4000   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4001   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4002   assert(CstVal && "Expected constant value");
4003   MIB.addImm(CstVal.getValue());
4004 }
4005
4006 namespace llvm {
4007 InstructionSelector *
4008 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4009                                  AArch64Subtarget &Subtarget,
4010                                  AArch64RegisterBankInfo &RBI) {
4011   return new AArch64InstructionSelector(TM, Subtarget, RBI);
4012 }
4013 }