contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp

   1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64CallingConvention.h"
  18 #include "AArch64RegisterInfo.h"
  19 #include "AArch64Subtarget.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "Utils/AArch64BaseInfo.h"
  22 #include "llvm/ADT/APFloat.h"
  23 #include "llvm/ADT/APInt.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/Analysis/BranchProbabilityInfo.h"
  27 #include "llvm/CodeGen/CallingConvLower.h"
  28 #include "llvm/CodeGen/FastISel.h"
  29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  30 #include "llvm/CodeGen/ISDOpcodes.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineConstantPool.h"
  33 #include "llvm/CodeGen/MachineFrameInfo.h"
  34 #include "llvm/CodeGen/MachineInstr.h"
  35 #include "llvm/CodeGen/MachineInstrBuilder.h"
  36 #include "llvm/CodeGen/MachineMemOperand.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/MachineValueType.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/ValueTypes.h"
  41 #include "llvm/IR/Argument.h"
  42 #include "llvm/IR/Attributes.h"
  43 #include "llvm/IR/BasicBlock.h"
  44 #include "llvm/IR/CallingConv.h"
  45 #include "llvm/IR/Constant.h"
  46 #include "llvm/IR/Constants.h"
  47 #include "llvm/IR/DataLayout.h"
  48 #include "llvm/IR/DerivedTypes.h"
  49 #include "llvm/IR/Function.h"
  50 #include "llvm/IR/GetElementPtrTypeIterator.h"
  51 #include "llvm/IR/GlobalValue.h"
  52 #include "llvm/IR/InstrTypes.h"
  53 #include "llvm/IR/Instruction.h"
  54 #include "llvm/IR/Instructions.h"
  55 #include "llvm/IR/IntrinsicInst.h"
  56 #include "llvm/IR/Intrinsics.h"
  57 #include "llvm/IR/Operator.h"
  58 #include "llvm/IR/Type.h"
  59 #include "llvm/IR/User.h"
  60 #include "llvm/IR/Value.h"
  61 #include "llvm/MC/MCInstrDesc.h"
  62 #include "llvm/MC/MCRegisterInfo.h"
  63 #include "llvm/MC/MCSymbol.h"
  64 #include "llvm/Support/AtomicOrdering.h"
  65 #include "llvm/Support/Casting.h"
  66 #include "llvm/Support/CodeGen.h"
  67 #include "llvm/Support/Compiler.h"
  68 #include "llvm/Support/ErrorHandling.h"
  69 #include "llvm/Support/MathExtras.h"
  70 #include <algorithm>
  71 #include <cassert>
  72 #include <cstdint>
  73 #include <iterator>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 namespace {
  79
  80 class AArch64FastISel final : public FastISel {
  81   class Address {
  82   public:
  83     using BaseKind = enum {
  84       RegBase,
  85       FrameIndexBase
  86     };
  87
  88   private:
  89     BaseKind Kind = RegBase;
  90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
  91     union {
  92       unsigned Reg;
  93       int FI;
  94     } Base;
  95     unsigned OffsetReg = 0;
  96     unsigned Shift = 0;
  97     int64_t Offset = 0;
  98     const GlobalValue *GV = nullptr;
  99
 100   public:
 101     Address() { Base.Reg = 0; }
 102
 103     void setKind(BaseKind K) { Kind = K; }
 104     BaseKind getKind() const { return Kind; }
 105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
 106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
 107     bool isRegBase() const { return Kind == RegBase; }
 108     bool isFIBase() const { return Kind == FrameIndexBase; }
 109
 110     void setReg(unsigned Reg) {
 111       assert(isRegBase() && "Invalid base register access!");
 112       Base.Reg = Reg;
 113     }
 114
 115     unsigned getReg() const {
 116       assert(isRegBase() && "Invalid base register access!");
 117       return Base.Reg;
 118     }
 119
 120     void setOffsetReg(unsigned Reg) {
 121       OffsetReg = Reg;
 122     }
 123
 124     unsigned getOffsetReg() const {
 125       return OffsetReg;
 126     }
 127
 128     void setFI(unsigned FI) {
 129       assert(isFIBase() && "Invalid base frame index  access!");
 130       Base.FI = FI;
 131     }
 132
 133     unsigned getFI() const {
 134       assert(isFIBase() && "Invalid base frame index access!");
 135       return Base.FI;
 136     }
 137
 138     void setOffset(int64_t O) { Offset = O; }
 139     int64_t getOffset() { return Offset; }
 140     void setShift(unsigned S) { Shift = S; }
 141     unsigned getShift() { return Shift; }
 142
 143     void setGlobalValue(const GlobalValue *G) { GV = G; }
 144     const GlobalValue *getGlobalValue() { return GV; }
 145   };
 146
 147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 148   /// make the right decision when generating code for different targets.
 149   const AArch64Subtarget *Subtarget;
 150   LLVMContext *Context;
 151
 152   bool fastLowerArguments() override;
 153   bool fastLowerCall(CallLoweringInfo &CLI) override;
 154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 155
 156 private:
 157   // Selection routines.
 158   bool selectAddSub(const Instruction *I);
 159   bool selectLogicalOp(const Instruction *I);
 160   bool selectLoad(const Instruction *I);
 161   bool selectStore(const Instruction *I);
 162   bool selectBranch(const Instruction *I);
 163   bool selectIndirectBr(const Instruction *I);
 164   bool selectCmp(const Instruction *I);
 165   bool selectSelect(const Instruction *I);
 166   bool selectFPExt(const Instruction *I);
 167   bool selectFPTrunc(const Instruction *I);
 168   bool selectFPToInt(const Instruction *I, bool Signed);
 169   bool selectIntToFP(const Instruction *I, bool Signed);
 170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 171   bool selectRet(const Instruction *I);
 172   bool selectTrunc(const Instruction *I);
 173   bool selectIntExt(const Instruction *I);
 174   bool selectMul(const Instruction *I);
 175   bool selectShift(const Instruction *I);
 176   bool selectBitCast(const Instruction *I);
 177   bool selectFRem(const Instruction *I);
 178   bool selectSDiv(const Instruction *I);
 179   bool selectGetElementPtr(const Instruction *I);
 180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 181
 182   // Utility helper routines.
 183   bool isTypeLegal(Type *Ty, MVT &VT);
 184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 185   bool isValueAvailable(const Value *V) const;
 186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 187   bool computeCallAddress(const Value *V, Address &Addr);
 188   bool simplifyAddress(Address &Addr, MVT VT);
 189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 190                             MachineMemOperand::Flags Flags,
 191                             unsigned ScaleFactor, MachineMemOperand *MMO);
 192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 194                           unsigned Alignment);
 195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 196                          const Value *Cond);
 197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 198   bool optimizeSelect(const SelectInst *SI);
 199   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 200
 201   // Emit helper routines.
 202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 203                       const Value *RHS, bool SetFlags = false,
 204                       bool WantResult = true,  bool IsZExt = false);
 205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 206                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 207                          bool SetFlags = false, bool WantResult = true);
 208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 209                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 210                          bool WantResult = true);
 211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 212                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 213                          AArch64_AM::ShiftExtendType ShiftType,
 214                          uint64_t ShiftImm, bool SetFlags = false,
 215                          bool WantResult = true);
 216   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 217                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 218                           AArch64_AM::ShiftExtendType ExtType,
 219                           uint64_t ShiftImm, bool SetFlags = false,
 220                          bool WantResult = true);
 221
 222   // Emit functions.
 223   bool emitCompareAndBranch(const BranchInst *BI);
 224   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 225   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 226   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 227   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 228   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 229                     MachineMemOperand *MMO = nullptr);
 230   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 231                  MachineMemOperand *MMO = nullptr);
 232   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
 233                         MachineMemOperand *MMO = nullptr);
 234   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 235   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 236   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 237                    bool SetFlags = false, bool WantResult = true,
 238                    bool IsZExt = false);
 239   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 240   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 241                    bool SetFlags = false, bool WantResult = true,
 242                    bool IsZExt = false);
 243   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 244                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 245   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 246                        unsigned RHSReg, bool RHSIsKill,
 247                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 248                        bool WantResult = true);
 249   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 250                          const Value *RHS);
 251   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 252                             bool LHSIsKill, uint64_t Imm);
 253   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 254                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 255                             uint64_t ShiftImm);
 256   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 257   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 258                       unsigned Op1, bool Op1IsKill);
 259   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 260                         unsigned Op1, bool Op1IsKill);
 261   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 262                         unsigned Op1, bool Op1IsKill);
 263   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 264                       unsigned Op1Reg, bool Op1IsKill);
 265   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 266                       uint64_t Imm, bool IsZExt = true);
 267   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 268                       unsigned Op1Reg, bool Op1IsKill);
 269   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 270                       uint64_t Imm, bool IsZExt = true);
 271   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 272                       unsigned Op1Reg, bool Op1IsKill);
 273   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 274                       uint64_t Imm, bool IsZExt = false);
 275
 276   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 277   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 278   unsigned materializeGV(const GlobalValue *GV);
 279
 280   // Call handling routines.
 281 private:
 282   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 283   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 284                        unsigned &NumBytes);
 285   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 286
 287 public:
 288   // Backend specific FastISel code.
 289   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 290   unsigned fastMaterializeConstant(const Constant *C) override;
 291   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 292
 293   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 294                            const TargetLibraryInfo *LibInfo)
 295       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 296     Subtarget =
 297         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
 298     Context = &FuncInfo.Fn->getContext();
 299   }
 300
 301   bool fastSelectInstruction(const Instruction *I) override;
 302
 303 #include "AArch64GenFastISel.inc"
 304 };
 305
 306 } // end anonymous namespace
 307
 308 #include "AArch64GenCallingConv.inc"
 309
 310 /// \brief Check if the sign-/zero-extend will be a noop.
 311 static bool isIntExtFree(const Instruction *I) {
 312   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 313          "Unexpected integer extend instruction.");
 314   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 315          "Unexpected value type.");
 316   bool IsZExt = isa<ZExtInst>(I);
 317
 318   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 319     if (LI->hasOneUse())
 320       return true;
 321
 322   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 323     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 324       return true;
 325
 326   return false;
 327 }
 328
 329 /// \brief Determine the implicit scale factor that is applied by a memory
 330 /// operation for a given value type.
 331 static unsigned getImplicitScaleFactor(MVT VT) {
 332   switch (VT.SimpleTy) {
 333   default:
 334     return 0;    // invalid
 335   case MVT::i1:  // fall-through
 336   case MVT::i8:
 337     return 1;
 338   case MVT::i16:
 339     return 2;
 340   case MVT::i32: // fall-through
 341   case MVT::f32:
 342     return 4;
 343   case MVT::i64: // fall-through
 344   case MVT::f64:
 345     return 8;
 346   }
 347 }
 348
 349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 350   if (CC == CallingConv::WebKit_JS)
 351     return CC_AArch64_WebKit_JS;
 352   if (CC == CallingConv::GHC)
 353     return CC_AArch64_GHC;
 354   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 355 }
 356
 357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 358   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
 359          "Alloca should always return a pointer.");
 360
 361   // Don't handle dynamic allocas.
 362   if (!FuncInfo.StaticAllocaMap.count(AI))
 363     return 0;
 364
 365   DenseMap<const AllocaInst *, int>::iterator SI =
 366       FuncInfo.StaticAllocaMap.find(AI);
 367
 368   if (SI != FuncInfo.StaticAllocaMap.end()) {
 369     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 371             ResultReg)
 372         .addFrameIndex(SI->second)
 373         .addImm(0)
 374         .addImm(0);
 375     return ResultReg;
 376   }
 377
 378   return 0;
 379 }
 380
 381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 382   if (VT > MVT::i64)
 383     return 0;
 384
 385   if (!CI->isZero())
 386     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 387
 388   // Create a copy from the zero register to materialize a "0" value.
 389   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 390                                                    : &AArch64::GPR32RegClass;
 391   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 392   unsigned ResultReg = createResultReg(RC);
 393   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 394           ResultReg).addReg(ZeroReg, getKillRegState(true));
 395   return ResultReg;
 396 }
 397
 398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 399   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 400   // register, because the immediate version of fmov cannot encode zero.
 401   if (CFP->isNullValue())
 402     return fastMaterializeFloatZero(CFP);
 403
 404   if (VT != MVT::f32 && VT != MVT::f64)
 405     return 0;
 406
 407   const APFloat Val = CFP->getValueAPF();
 408   bool Is64Bit = (VT == MVT::f64);
 409   // This checks to see if we can use FMOV instructions to materialize
 410   // a constant, otherwise we have to materialize via the constant pool.
 411   if (TLI.isFPImmLegal(Val, VT)) {
 412     int Imm =
 413         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 414     assert((Imm != -1) && "Cannot encode floating-point constant.");
 415     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 416     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 417   }
 418
 419   // For the MachO large code model materialize the FP constant in code.
 420   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
 421     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 422     const TargetRegisterClass *RC = Is64Bit ?
 423         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 424
 425     unsigned TmpReg = createResultReg(RC);
 426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
 427         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
 428
 429     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 430     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 431             TII.get(TargetOpcode::COPY), ResultReg)
 432         .addReg(TmpReg, getKillRegState(true));
 433
 434     return ResultReg;
 435   }
 436
 437   // Materialize via constant pool.  MachineConstantPool wants an explicit
 438   // alignment.
 439   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 440   if (Align == 0)
 441     Align = DL.getTypeAllocSize(CFP->getType());
 442
 443   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 444   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 446           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 447
 448   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 449   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 450   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 451       .addReg(ADRPReg)
 452       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 453   return ResultReg;
 454 }
 455
 456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 457   // We can't handle thread-local variables quickly yet.
 458   if (GV->isThreadLocal())
 459     return 0;
 460
 461   // MachO still uses GOT for large code-model accesses, but ELF requires
 462   // movz/movk sequences, which FastISel doesn't handle yet.
 463   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
 464     return 0;
 465
 466   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 467
 468   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
 469   if (!DestEVT.isSimple())
 470     return 0;
 471
 472   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 473   unsigned ResultReg;
 474
 475   if (OpFlags & AArch64II::MO_GOT) {
 476     // ADRP + LDRX
 477     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 478             ADRPReg)
 479       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 480
 481     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 482     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 483             ResultReg)
 484       .addReg(ADRPReg)
 485       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 486                         AArch64II::MO_NC);
 487   } else {
 488     // ADRP + ADDX
 489     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 490             ADRPReg)
 491       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 492
 493     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 494     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 495             ResultReg)
 496       .addReg(ADRPReg)
 497       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 498       .addImm(0);
 499   }
 500   return ResultReg;
 501 }
 502
 503 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 504   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 505
 506   // Only handle simple types.
 507   if (!CEVT.isSimple())
 508     return 0;
 509   MVT VT = CEVT.getSimpleVT();
 510
 511   if (const auto *CI = dyn_cast<ConstantInt>(C))
 512     return materializeInt(CI, VT);
 513   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 514     return materializeFP(CFP, VT);
 515   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 516     return materializeGV(GV);
 517
 518   return 0;
 519 }
 520
 521 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 522   assert(CFP->isNullValue() &&
 523          "Floating-point constant is not a positive zero.");
 524   MVT VT;
 525   if (!isTypeLegal(CFP->getType(), VT))
 526     return 0;
 527
 528   if (VT != MVT::f32 && VT != MVT::f64)
 529     return 0;
 530
 531   bool Is64Bit = (VT == MVT::f64);
 532   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 533   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 534   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 535 }
 536
 537 /// \brief Check if the multiply is by a power-of-2 constant.
 538 static bool isMulPowOf2(const Value *I) {
 539   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 540     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 541       if (C->getValue().isPowerOf2())
 542         return true;
 543     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 544       if (C->getValue().isPowerOf2())
 545         return true;
 546   }
 547   return false;
 548 }
 549
 550 // Computes the address to get to an object.
 551 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 552 {
 553   const User *U = nullptr;
 554   unsigned Opcode = Instruction::UserOp1;
 555   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 556     // Don't walk into other basic blocks unless the object is an alloca from
 557     // another block, otherwise it may not have a virtual register assigned.
 558     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 559         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 560       Opcode = I->getOpcode();
 561       U = I;
 562     }
 563   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 564     Opcode = C->getOpcode();
 565     U = C;
 566   }
 567
 568   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
 569     if (Ty->getAddressSpace() > 255)
 570       // Fast instruction selection doesn't support the special
 571       // address spaces.
 572       return false;
 573
 574   switch (Opcode) {
 575   default:
 576     break;
 577   case Instruction::BitCast:
 578     // Look through bitcasts.
 579     return computeAddress(U->getOperand(0), Addr, Ty);
 580
 581   case Instruction::IntToPtr:
 582     // Look past no-op inttoptrs.
 583     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 584         TLI.getPointerTy(DL))
 585       return computeAddress(U->getOperand(0), Addr, Ty);
 586     break;
 587
 588   case Instruction::PtrToInt:
 589     // Look past no-op ptrtoints.
 590     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 591       return computeAddress(U->getOperand(0), Addr, Ty);
 592     break;
 593
 594   case Instruction::GetElementPtr: {
 595     Address SavedAddr = Addr;
 596     uint64_t TmpOffset = Addr.getOffset();
 597
 598     // Iterate through the GEP folding the constants into offsets where
 599     // we can.
 600     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
 601          GTI != E; ++GTI) {
 602       const Value *Op = GTI.getOperand();
 603       if (StructType *STy = GTI.getStructTypeOrNull()) {
 604         const StructLayout *SL = DL.getStructLayout(STy);
 605         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 606         TmpOffset += SL->getElementOffset(Idx);
 607       } else {
 608         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 609         while (true) {
 610           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 611             // Constant-offset addressing.
 612             TmpOffset += CI->getSExtValue() * S;
 613             break;
 614           }
 615           if (canFoldAddIntoGEP(U, Op)) {
 616             // A compatible add with a constant operand. Fold the constant.
 617             ConstantInt *CI =
 618                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 619             TmpOffset += CI->getSExtValue() * S;
 620             // Iterate on the other operand.
 621             Op = cast<AddOperator>(Op)->getOperand(0);
 622             continue;
 623           }
 624           // Unsupported
 625           goto unsupported_gep;
 626         }
 627       }
 628     }
 629
 630     // Try to grab the base operand now.
 631     Addr.setOffset(TmpOffset);
 632     if (computeAddress(U->getOperand(0), Addr, Ty))
 633       return true;
 634
 635     // We failed, restore everything and try the other options.
 636     Addr = SavedAddr;
 637
 638   unsupported_gep:
 639     break;
 640   }
 641   case Instruction::Alloca: {
 642     const AllocaInst *AI = cast<AllocaInst>(Obj);
 643     DenseMap<const AllocaInst *, int>::iterator SI =
 644         FuncInfo.StaticAllocaMap.find(AI);
 645     if (SI != FuncInfo.StaticAllocaMap.end()) {
 646       Addr.setKind(Address::FrameIndexBase);
 647       Addr.setFI(SI->second);
 648       return true;
 649     }
 650     break;
 651   }
 652   case Instruction::Add: {
 653     // Adds of constants are common and easy enough.
 654     const Value *LHS = U->getOperand(0);
 655     const Value *RHS = U->getOperand(1);
 656
 657     if (isa<ConstantInt>(LHS))
 658       std::swap(LHS, RHS);
 659
 660     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 661       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 662       return computeAddress(LHS, Addr, Ty);
 663     }
 664
 665     Address Backup = Addr;
 666     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 667       return true;
 668     Addr = Backup;
 669
 670     break;
 671   }
 672   case Instruction::Sub: {
 673     // Subs of constants are common and easy enough.
 674     const Value *LHS = U->getOperand(0);
 675     const Value *RHS = U->getOperand(1);
 676
 677     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 678       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 679       return computeAddress(LHS, Addr, Ty);
 680     }
 681     break;
 682   }
 683   case Instruction::Shl: {
 684     if (Addr.getOffsetReg())
 685       break;
 686
 687     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 688     if (!CI)
 689       break;
 690
 691     unsigned Val = CI->getZExtValue();
 692     if (Val < 1 || Val > 3)
 693       break;
 694
 695     uint64_t NumBytes = 0;
 696     if (Ty && Ty->isSized()) {
 697       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 698       NumBytes = NumBits / 8;
 699       if (!isPowerOf2_64(NumBits))
 700         NumBytes = 0;
 701     }
 702
 703     if (NumBytes != (1ULL << Val))
 704       break;
 705
 706     Addr.setShift(Val);
 707     Addr.setExtendType(AArch64_AM::LSL);
 708
 709     const Value *Src = U->getOperand(0);
 710     if (const auto *I = dyn_cast<Instruction>(Src)) {
 711       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 712         // Fold the zext or sext when it won't become a noop.
 713         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 714           if (!isIntExtFree(ZE) &&
 715               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 716             Addr.setExtendType(AArch64_AM::UXTW);
 717             Src = ZE->getOperand(0);
 718           }
 719         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 720           if (!isIntExtFree(SE) &&
 721               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 722             Addr.setExtendType(AArch64_AM::SXTW);
 723             Src = SE->getOperand(0);
 724           }
 725         }
 726       }
 727     }
 728
 729     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 730       if (AI->getOpcode() == Instruction::And) {
 731         const Value *LHS = AI->getOperand(0);
 732         const Value *RHS = AI->getOperand(1);
 733
 734         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 735           if (C->getValue() == 0xffffffff)
 736             std::swap(LHS, RHS);
 737
 738         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 739           if (C->getValue() == 0xffffffff) {
 740             Addr.setExtendType(AArch64_AM::UXTW);
 741             unsigned Reg = getRegForValue(LHS);
 742             if (!Reg)
 743               return false;
 744             bool RegIsKill = hasTrivialKill(LHS);
 745             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 746                                              AArch64::sub_32);
 747             Addr.setOffsetReg(Reg);
 748             return true;
 749           }
 750       }
 751
 752     unsigned Reg = getRegForValue(Src);
 753     if (!Reg)
 754       return false;
 755     Addr.setOffsetReg(Reg);
 756     return true;
 757   }
 758   case Instruction::Mul: {
 759     if (Addr.getOffsetReg())
 760       break;
 761
 762     if (!isMulPowOf2(U))
 763       break;
 764
 765     const Value *LHS = U->getOperand(0);
 766     const Value *RHS = U->getOperand(1);
 767
 768     // Canonicalize power-of-2 value to the RHS.
 769     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 770       if (C->getValue().isPowerOf2())
 771         std::swap(LHS, RHS);
 772
 773     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 774     const auto *C = cast<ConstantInt>(RHS);
 775     unsigned Val = C->getValue().logBase2();
 776     if (Val < 1 || Val > 3)
 777       break;
 778
 779     uint64_t NumBytes = 0;
 780     if (Ty && Ty->isSized()) {
 781       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 782       NumBytes = NumBits / 8;
 783       if (!isPowerOf2_64(NumBits))
 784         NumBytes = 0;
 785     }
 786
 787     if (NumBytes != (1ULL << Val))
 788       break;
 789
 790     Addr.setShift(Val);
 791     Addr.setExtendType(AArch64_AM::LSL);
 792
 793     const Value *Src = LHS;
 794     if (const auto *I = dyn_cast<Instruction>(Src)) {
 795       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 796         // Fold the zext or sext when it won't become a noop.
 797         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 798           if (!isIntExtFree(ZE) &&
 799               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 800             Addr.setExtendType(AArch64_AM::UXTW);
 801             Src = ZE->getOperand(0);
 802           }
 803         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 804           if (!isIntExtFree(SE) &&
 805               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 806             Addr.setExtendType(AArch64_AM::SXTW);
 807             Src = SE->getOperand(0);
 808           }
 809         }
 810       }
 811     }
 812
 813     unsigned Reg = getRegForValue(Src);
 814     if (!Reg)
 815       return false;
 816     Addr.setOffsetReg(Reg);
 817     return true;
 818   }
 819   case Instruction::And: {
 820     if (Addr.getOffsetReg())
 821       break;
 822
 823     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
 824       break;
 825
 826     const Value *LHS = U->getOperand(0);
 827     const Value *RHS = U->getOperand(1);
 828
 829     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 830       if (C->getValue() == 0xffffffff)
 831         std::swap(LHS, RHS);
 832
 833     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 834       if (C->getValue() == 0xffffffff) {
 835         Addr.setShift(0);
 836         Addr.setExtendType(AArch64_AM::LSL);
 837         Addr.setExtendType(AArch64_AM::UXTW);
 838
 839         unsigned Reg = getRegForValue(LHS);
 840         if (!Reg)
 841           return false;
 842         bool RegIsKill = hasTrivialKill(LHS);
 843         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 844                                          AArch64::sub_32);
 845         Addr.setOffsetReg(Reg);
 846         return true;
 847       }
 848     break;
 849   }
 850   case Instruction::SExt:
 851   case Instruction::ZExt: {
 852     if (!Addr.getReg() || Addr.getOffsetReg())
 853       break;
 854
 855     const Value *Src = nullptr;
 856     // Fold the zext or sext when it won't become a noop.
 857     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 858       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 859         Addr.setExtendType(AArch64_AM::UXTW);
 860         Src = ZE->getOperand(0);
 861       }
 862     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 863       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 864         Addr.setExtendType(AArch64_AM::SXTW);
 865         Src = SE->getOperand(0);
 866       }
 867     }
 868
 869     if (!Src)
 870       break;
 871
 872     Addr.setShift(0);
 873     unsigned Reg = getRegForValue(Src);
 874     if (!Reg)
 875       return false;
 876     Addr.setOffsetReg(Reg);
 877     return true;
 878   }
 879   } // end switch
 880
 881   if (Addr.isRegBase() && !Addr.getReg()) {
 882     unsigned Reg = getRegForValue(Obj);
 883     if (!Reg)
 884       return false;
 885     Addr.setReg(Reg);
 886     return true;
 887   }
 888
 889   if (!Addr.getOffsetReg()) {
 890     unsigned Reg = getRegForValue(Obj);
 891     if (!Reg)
 892       return false;
 893     Addr.setOffsetReg(Reg);
 894     return true;
 895   }
 896
 897   return false;
 898 }
 899
 900 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 901   const User *U = nullptr;
 902   unsigned Opcode = Instruction::UserOp1;
 903   bool InMBB = true;
 904
 905   if (const auto *I = dyn_cast<Instruction>(V)) {
 906     Opcode = I->getOpcode();
 907     U = I;
 908     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 909   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 910     Opcode = C->getOpcode();
 911     U = C;
 912   }
 913
 914   switch (Opcode) {
 915   default: break;
 916   case Instruction::BitCast:
 917     // Look past bitcasts if its operand is in the same BB.
 918     if (InMBB)
 919       return computeCallAddress(U->getOperand(0), Addr);
 920     break;
 921   case Instruction::IntToPtr:
 922     // Look past no-op inttoptrs if its operand is in the same BB.
 923     if (InMBB &&
 924         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 925             TLI.getPointerTy(DL))
 926       return computeCallAddress(U->getOperand(0), Addr);
 927     break;
 928   case Instruction::PtrToInt:
 929     // Look past no-op ptrtoints if its operand is in the same BB.
 930     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 931       return computeCallAddress(U->getOperand(0), Addr);
 932     break;
 933   }
 934
 935   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 936     Addr.setGlobalValue(GV);
 937     return true;
 938   }
 939
 940   // If all else fails, try to materialize the value in a register.
 941   if (!Addr.getGlobalValue()) {
 942     Addr.setReg(getRegForValue(V));
 943     return Addr.getReg() != 0;
 944   }
 945
 946   return false;
 947 }
 948
 949 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 950   EVT evt = TLI.getValueType(DL, Ty, true);
 951
 952   // Only handle simple types.
 953   if (evt == MVT::Other || !evt.isSimple())
 954     return false;
 955   VT = evt.getSimpleVT();
 956
 957   // This is a legal type, but it's not something we handle in fast-isel.
 958   if (VT == MVT::f128)
 959     return false;
 960
 961   // Handle all other legal types, i.e. a register that will directly hold this
 962   // value.
 963   return TLI.isTypeLegal(VT);
 964 }
 965
 966 /// \brief Determine if the value type is supported by FastISel.
 967 ///
 968 /// FastISel for AArch64 can handle more value types than are legal. This adds
 969 /// simple value type such as i1, i8, and i16.
 970 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 971   if (Ty->isVectorTy() && !IsVectorAllowed)
 972     return false;
 973
 974   if (isTypeLegal(Ty, VT))
 975     return true;
 976
 977   // If this is a type than can be sign or zero-extended to a basic operation
 978   // go ahead and accept it now.
 979   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 980     return true;
 981
 982   return false;
 983 }
 984
 985 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 986   if (!isa<Instruction>(V))
 987     return true;
 988
 989   const auto *I = cast<Instruction>(V);
 990   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 991 }
 992
 993 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 994   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 995   if (!ScaleFactor)
 996     return false;
 997
 998   bool ImmediateOffsetNeedsLowering = false;
 999   bool RegisterOffsetNeedsLowering = false;
1000   int64_t Offset = Addr.getOffset();
1001   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1002     ImmediateOffsetNeedsLowering = true;
1003   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1004            !isUInt<12>(Offset / ScaleFactor))
1005     ImmediateOffsetNeedsLowering = true;
1006
1007   // Cannot encode an offset register and an immediate offset in the same
1008   // instruction. Fold the immediate offset into the load/store instruction and
1009   // emit an additional add to take care of the offset register.
1010   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1011     RegisterOffsetNeedsLowering = true;
1012
1013   // Cannot encode zero register as base.
1014   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1015     RegisterOffsetNeedsLowering = true;
1016
1017   // If this is a stack pointer and the offset needs to be simplified then put
1018   // the alloca address into a register, set the base type back to register and
1019   // continue. This should almost never happen.
1020   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1021   {
1022     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1023     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1024             ResultReg)
1025       .addFrameIndex(Addr.getFI())
1026       .addImm(0)
1027       .addImm(0);
1028     Addr.setKind(Address::RegBase);
1029     Addr.setReg(ResultReg);
1030   }
1031
1032   if (RegisterOffsetNeedsLowering) {
1033     unsigned ResultReg = 0;
1034     if (Addr.getReg()) {
1035       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1036           Addr.getExtendType() == AArch64_AM::UXTW   )
1037         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1038                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1039                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1040                                   Addr.getShift());
1041       else
1042         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1043                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1044                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1045                                   Addr.getShift());
1046     } else {
1047       if (Addr.getExtendType() == AArch64_AM::UXTW)
1048         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1049                                /*Op0IsKill=*/false, Addr.getShift(),
1050                                /*IsZExt=*/true);
1051       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1052         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1053                                /*Op0IsKill=*/false, Addr.getShift(),
1054                                /*IsZExt=*/false);
1055       else
1056         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1057                                /*Op0IsKill=*/false, Addr.getShift());
1058     }
1059     if (!ResultReg)
1060       return false;
1061
1062     Addr.setReg(ResultReg);
1063     Addr.setOffsetReg(0);
1064     Addr.setShift(0);
1065     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1066   }
1067
1068   // Since the offset is too large for the load/store instruction get the
1069   // reg+offset into a register.
1070   if (ImmediateOffsetNeedsLowering) {
1071     unsigned ResultReg;
1072     if (Addr.getReg())
1073       // Try to fold the immediate into the add instruction.
1074       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1075     else
1076       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1077
1078     if (!ResultReg)
1079       return false;
1080     Addr.setReg(ResultReg);
1081     Addr.setOffset(0);
1082   }
1083   return true;
1084 }
1085
1086 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1087                                            const MachineInstrBuilder &MIB,
1088                                            MachineMemOperand::Flags Flags,
1089                                            unsigned ScaleFactor,
1090                                            MachineMemOperand *MMO) {
1091   int64_t Offset = Addr.getOffset() / ScaleFactor;
1092   // Frame base works a bit differently. Handle it separately.
1093   if (Addr.isFIBase()) {
1094     int FI = Addr.getFI();
1095     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1096     // and alignment should be based on the VT.
1097     MMO = FuncInfo.MF->getMachineMemOperand(
1098         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1099         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1100     // Now add the rest of the operands.
1101     MIB.addFrameIndex(FI).addImm(Offset);
1102   } else {
1103     assert(Addr.isRegBase() && "Unexpected address kind.");
1104     const MCInstrDesc &II = MIB->getDesc();
1105     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1106     Addr.setReg(
1107       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1108     Addr.setOffsetReg(
1109       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1110     if (Addr.getOffsetReg()) {
1111       assert(Addr.getOffset() == 0 && "Unexpected offset");
1112       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1113                       Addr.getExtendType() == AArch64_AM::SXTX;
1114       MIB.addReg(Addr.getReg());
1115       MIB.addReg(Addr.getOffsetReg());
1116       MIB.addImm(IsSigned);
1117       MIB.addImm(Addr.getShift() != 0);
1118     } else
1119       MIB.addReg(Addr.getReg()).addImm(Offset);
1120   }
1121
1122   if (MMO)
1123     MIB.addMemOperand(MMO);
1124 }
1125
1126 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1127                                      const Value *RHS, bool SetFlags,
1128                                      bool WantResult,  bool IsZExt) {
1129   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1130   bool NeedExtend = false;
1131   switch (RetVT.SimpleTy) {
1132   default:
1133     return 0;
1134   case MVT::i1:
1135     NeedExtend = true;
1136     break;
1137   case MVT::i8:
1138     NeedExtend = true;
1139     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1140     break;
1141   case MVT::i16:
1142     NeedExtend = true;
1143     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1144     break;
1145   case MVT::i32:  // fall-through
1146   case MVT::i64:
1147     break;
1148   }
1149   MVT SrcVT = RetVT;
1150   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1151
1152   // Canonicalize immediates to the RHS first.
1153   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1154     std::swap(LHS, RHS);
1155
1156   // Canonicalize mul by power of 2 to the RHS.
1157   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1158     if (isMulPowOf2(LHS))
1159       std::swap(LHS, RHS);
1160
1161   // Canonicalize shift immediate to the RHS.
1162   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1163     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1164       if (isa<ConstantInt>(SI->getOperand(1)))
1165         if (SI->getOpcode() == Instruction::Shl  ||
1166             SI->getOpcode() == Instruction::LShr ||
1167             SI->getOpcode() == Instruction::AShr   )
1168           std::swap(LHS, RHS);
1169
1170   unsigned LHSReg = getRegForValue(LHS);
1171   if (!LHSReg)
1172     return 0;
1173   bool LHSIsKill = hasTrivialKill(LHS);
1174
1175   if (NeedExtend)
1176     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1177
1178   unsigned ResultReg = 0;
1179   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1180     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1181     if (C->isNegative())
1182       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1183                                 SetFlags, WantResult);
1184     else
1185       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1186                                 WantResult);
1187   } else if (const auto *C = dyn_cast<Constant>(RHS))
1188     if (C->isNullValue())
1189       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1190                                 WantResult);
1191
1192   if (ResultReg)
1193     return ResultReg;
1194
1195   // Only extend the RHS within the instruction if there is a valid extend type.
1196   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1197       isValueAvailable(RHS)) {
1198     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1199       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1200         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1201           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1202           if (!RHSReg)
1203             return 0;
1204           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1205           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1206                                RHSIsKill, ExtendType, C->getZExtValue(),
1207                                SetFlags, WantResult);
1208         }
1209     unsigned RHSReg = getRegForValue(RHS);
1210     if (!RHSReg)
1211       return 0;
1212     bool RHSIsKill = hasTrivialKill(RHS);
1213     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1214                          ExtendType, 0, SetFlags, WantResult);
1215   }
1216
1217   // Check if the mul can be folded into the instruction.
1218   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1219     if (isMulPowOf2(RHS)) {
1220       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1221       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1222
1223       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1224         if (C->getValue().isPowerOf2())
1225           std::swap(MulLHS, MulRHS);
1226
1227       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1228       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1229       unsigned RHSReg = getRegForValue(MulLHS);
1230       if (!RHSReg)
1231         return 0;
1232       bool RHSIsKill = hasTrivialKill(MulLHS);
1233       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1234                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1235                                 WantResult);
1236       if (ResultReg)
1237         return ResultReg;
1238     }
1239   }
1240
1241   // Check if the shift can be folded into the instruction.
1242   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1244       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1245         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1246         switch (SI->getOpcode()) {
1247         default: break;
1248         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1249         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1250         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1251         }
1252         uint64_t ShiftVal = C->getZExtValue();
1253         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1254           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1255           if (!RHSReg)
1256             return 0;
1257           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1258           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1259                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1260                                     WantResult);
1261           if (ResultReg)
1262             return ResultReg;
1263         }
1264       }
1265     }
1266   }
1267
1268   unsigned RHSReg = getRegForValue(RHS);
1269   if (!RHSReg)
1270     return 0;
1271   bool RHSIsKill = hasTrivialKill(RHS);
1272
1273   if (NeedExtend)
1274     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1275
1276   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1277                        SetFlags, WantResult);
1278 }
1279
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281                                         bool LHSIsKill, unsigned RHSReg,
1282                                         bool RHSIsKill, bool SetFlags,
1283                                         bool WantResult) {
1284   assert(LHSReg && RHSReg && "Invalid register number.");
1285
1286   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1287       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1288     return 0;
1289
1290   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1291     return 0;
1292
1293   static const unsigned OpcTable[2][2][2] = {
1294     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1295       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1296     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1298   };
1299   bool Is64Bit = RetVT == MVT::i64;
1300   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1301   const TargetRegisterClass *RC =
1302       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1303   unsigned ResultReg;
1304   if (WantResult)
1305     ResultReg = createResultReg(RC);
1306   else
1307     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1308
1309   const MCInstrDesc &II = TII.get(Opc);
1310   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1311   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1312   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1313       .addReg(LHSReg, getKillRegState(LHSIsKill))
1314       .addReg(RHSReg, getKillRegState(RHSIsKill));
1315   return ResultReg;
1316 }
1317
1318 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1319                                         bool LHSIsKill, uint64_t Imm,
1320                                         bool SetFlags, bool WantResult) {
1321   assert(LHSReg && "Invalid register number.");
1322
1323   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1324     return 0;
1325
1326   unsigned ShiftImm;
1327   if (isUInt<12>(Imm))
1328     ShiftImm = 0;
1329   else if ((Imm & 0xfff000) == Imm) {
1330     ShiftImm = 12;
1331     Imm >>= 12;
1332   } else
1333     return 0;
1334
1335   static const unsigned OpcTable[2][2][2] = {
1336     { { AArch64::SUBWri,  AArch64::SUBXri  },
1337       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1338     { { AArch64::SUBSWri, AArch64::SUBSXri },
1339       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1340   };
1341   bool Is64Bit = RetVT == MVT::i64;
1342   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343   const TargetRegisterClass *RC;
1344   if (SetFlags)
1345     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1346   else
1347     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1348   unsigned ResultReg;
1349   if (WantResult)
1350     ResultReg = createResultReg(RC);
1351   else
1352     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1353
1354   const MCInstrDesc &II = TII.get(Opc);
1355   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357       .addReg(LHSReg, getKillRegState(LHSIsKill))
1358       .addImm(Imm)
1359       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1360   return ResultReg;
1361 }
1362
1363 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364                                         bool LHSIsKill, unsigned RHSReg,
1365                                         bool RHSIsKill,
1366                                         AArch64_AM::ShiftExtendType ShiftType,
1367                                         uint64_t ShiftImm, bool SetFlags,
1368                                         bool WantResult) {
1369   assert(LHSReg && RHSReg && "Invalid register number.");
1370   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1371          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1372
1373   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1374     return 0;
1375
1376   // Don't deal with undefined shifts.
1377   if (ShiftImm >= RetVT.getSizeInBits())
1378     return 0;
1379
1380   static const unsigned OpcTable[2][2][2] = {
1381     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1382       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1383     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1384       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1385   };
1386   bool Is64Bit = RetVT == MVT::i64;
1387   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1388   const TargetRegisterClass *RC =
1389       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1390   unsigned ResultReg;
1391   if (WantResult)
1392     ResultReg = createResultReg(RC);
1393   else
1394     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1395
1396   const MCInstrDesc &II = TII.get(Opc);
1397   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1398   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1399   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1400       .addReg(LHSReg, getKillRegState(LHSIsKill))
1401       .addReg(RHSReg, getKillRegState(RHSIsKill))
1402       .addImm(getShifterImm(ShiftType, ShiftImm));
1403   return ResultReg;
1404 }
1405
1406 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1407                                         bool LHSIsKill, unsigned RHSReg,
1408                                         bool RHSIsKill,
1409                                         AArch64_AM::ShiftExtendType ExtType,
1410                                         uint64_t ShiftImm, bool SetFlags,
1411                                         bool WantResult) {
1412   assert(LHSReg && RHSReg && "Invalid register number.");
1413   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1414          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1415
1416   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1417     return 0;
1418
1419   if (ShiftImm >= 4)
1420     return 0;
1421
1422   static const unsigned OpcTable[2][2][2] = {
1423     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1424       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1425     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1426       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1427   };
1428   bool Is64Bit = RetVT == MVT::i64;
1429   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1430   const TargetRegisterClass *RC = nullptr;
1431   if (SetFlags)
1432     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1433   else
1434     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1435   unsigned ResultReg;
1436   if (WantResult)
1437     ResultReg = createResultReg(RC);
1438   else
1439     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1440
1441   const MCInstrDesc &II = TII.get(Opc);
1442   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1443   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1444   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1445       .addReg(LHSReg, getKillRegState(LHSIsKill))
1446       .addReg(RHSReg, getKillRegState(RHSIsKill))
1447       .addImm(getArithExtendImm(ExtType, ShiftImm));
1448   return ResultReg;
1449 }
1450
1451 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1452   Type *Ty = LHS->getType();
1453   EVT EVT = TLI.getValueType(DL, Ty, true);
1454   if (!EVT.isSimple())
1455     return false;
1456   MVT VT = EVT.getSimpleVT();
1457
1458   switch (VT.SimpleTy) {
1459   default:
1460     return false;
1461   case MVT::i1:
1462   case MVT::i8:
1463   case MVT::i16:
1464   case MVT::i32:
1465   case MVT::i64:
1466     return emitICmp(VT, LHS, RHS, IsZExt);
1467   case MVT::f32:
1468   case MVT::f64:
1469     return emitFCmp(VT, LHS, RHS);
1470   }
1471 }
1472
1473 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1474                                bool IsZExt) {
1475   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1476                  IsZExt) != 0;
1477 }
1478
1479 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1480                                   uint64_t Imm) {
1481   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1482                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1483 }
1484
1485 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1486   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1487     return false;
1488
1489   // Check to see if the 2nd operand is a constant that we can encode directly
1490   // in the compare.
1491   bool UseImm = false;
1492   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1493     if (CFP->isZero() && !CFP->isNegative())
1494       UseImm = true;
1495
1496   unsigned LHSReg = getRegForValue(LHS);
1497   if (!LHSReg)
1498     return false;
1499   bool LHSIsKill = hasTrivialKill(LHS);
1500
1501   if (UseImm) {
1502     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1503     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1504         .addReg(LHSReg, getKillRegState(LHSIsKill));
1505     return true;
1506   }
1507
1508   unsigned RHSReg = getRegForValue(RHS);
1509   if (!RHSReg)
1510     return false;
1511   bool RHSIsKill = hasTrivialKill(RHS);
1512
1513   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1514   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1515       .addReg(LHSReg, getKillRegState(LHSIsKill))
1516       .addReg(RHSReg, getKillRegState(RHSIsKill));
1517   return true;
1518 }
1519
1520 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1521                                   bool SetFlags, bool WantResult, bool IsZExt) {
1522   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1523                     IsZExt);
1524 }
1525
1526 /// \brief This method is a wrapper to simplify add emission.
1527 ///
1528 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1529 /// that fails, then try to materialize the immediate into a register and use
1530 /// emitAddSub_rr instead.
1531 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1532                                       int64_t Imm) {
1533   unsigned ResultReg;
1534   if (Imm < 0)
1535     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1536   else
1537     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1538
1539   if (ResultReg)
1540     return ResultReg;
1541
1542   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1543   if (!CReg)
1544     return 0;
1545
1546   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1547   return ResultReg;
1548 }
1549
1550 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1551                                   bool SetFlags, bool WantResult, bool IsZExt) {
1552   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1553                     IsZExt);
1554 }
1555
1556 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1557                                       bool LHSIsKill, unsigned RHSReg,
1558                                       bool RHSIsKill, bool WantResult) {
1559   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1560                        RHSIsKill, /*SetFlags=*/true, WantResult);
1561 }
1562
1563 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1564                                       bool LHSIsKill, unsigned RHSReg,
1565                                       bool RHSIsKill,
1566                                       AArch64_AM::ShiftExtendType ShiftType,
1567                                       uint64_t ShiftImm, bool WantResult) {
1568   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1569                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1570                        WantResult);
1571 }
1572
1573 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1574                                         const Value *LHS, const Value *RHS) {
1575   // Canonicalize immediates to the RHS first.
1576   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1577     std::swap(LHS, RHS);
1578
1579   // Canonicalize mul by power-of-2 to the RHS.
1580   if (LHS->hasOneUse() && isValueAvailable(LHS))
1581     if (isMulPowOf2(LHS))
1582       std::swap(LHS, RHS);
1583
1584   // Canonicalize shift immediate to the RHS.
1585   if (LHS->hasOneUse() && isValueAvailable(LHS))
1586     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1587       if (isa<ConstantInt>(SI->getOperand(1)))
1588         std::swap(LHS, RHS);
1589
1590   unsigned LHSReg = getRegForValue(LHS);
1591   if (!LHSReg)
1592     return 0;
1593   bool LHSIsKill = hasTrivialKill(LHS);
1594
1595   unsigned ResultReg = 0;
1596   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1597     uint64_t Imm = C->getZExtValue();
1598     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1599   }
1600   if (ResultReg)
1601     return ResultReg;
1602
1603   // Check if the mul can be folded into the instruction.
1604   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1605     if (isMulPowOf2(RHS)) {
1606       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1607       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1608
1609       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1610         if (C->getValue().isPowerOf2())
1611           std::swap(MulLHS, MulRHS);
1612
1613       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1614       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1615
1616       unsigned RHSReg = getRegForValue(MulLHS);
1617       if (!RHSReg)
1618         return 0;
1619       bool RHSIsKill = hasTrivialKill(MulLHS);
1620       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1621                                    RHSIsKill, ShiftVal);
1622       if (ResultReg)
1623         return ResultReg;
1624     }
1625   }
1626
1627   // Check if the shift can be folded into the instruction.
1628   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1629     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1630       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1631         uint64_t ShiftVal = C->getZExtValue();
1632         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1633         if (!RHSReg)
1634           return 0;
1635         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1636         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1637                                      RHSIsKill, ShiftVal);
1638         if (ResultReg)
1639           return ResultReg;
1640       }
1641   }
1642
1643   unsigned RHSReg = getRegForValue(RHS);
1644   if (!RHSReg)
1645     return 0;
1646   bool RHSIsKill = hasTrivialKill(RHS);
1647
1648   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1649   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1650   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1651     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1652     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1653   }
1654   return ResultReg;
1655 }
1656
1657 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1658                                            unsigned LHSReg, bool LHSIsKill,
1659                                            uint64_t Imm) {
1660   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1661                 "ISD nodes are not consecutive!");
1662   static const unsigned OpcTable[3][2] = {
1663     { AArch64::ANDWri, AArch64::ANDXri },
1664     { AArch64::ORRWri, AArch64::ORRXri },
1665     { AArch64::EORWri, AArch64::EORXri }
1666   };
1667   const TargetRegisterClass *RC;
1668   unsigned Opc;
1669   unsigned RegSize;
1670   switch (RetVT.SimpleTy) {
1671   default:
1672     return 0;
1673   case MVT::i1:
1674   case MVT::i8:
1675   case MVT::i16:
1676   case MVT::i32: {
1677     unsigned Idx = ISDOpc - ISD::AND;
1678     Opc = OpcTable[Idx][0];
1679     RC = &AArch64::GPR32spRegClass;
1680     RegSize = 32;
1681     break;
1682   }
1683   case MVT::i64:
1684     Opc = OpcTable[ISDOpc - ISD::AND][1];
1685     RC = &AArch64::GPR64spRegClass;
1686     RegSize = 64;
1687     break;
1688   }
1689
1690   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1691     return 0;
1692
1693   unsigned ResultReg =
1694       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1695                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1696   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1697     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1698     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1699   }
1700   return ResultReg;
1701 }
1702
1703 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1704                                            unsigned LHSReg, bool LHSIsKill,
1705                                            unsigned RHSReg, bool RHSIsKill,
1706                                            uint64_t ShiftImm) {
1707   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708                 "ISD nodes are not consecutive!");
1709   static const unsigned OpcTable[3][2] = {
1710     { AArch64::ANDWrs, AArch64::ANDXrs },
1711     { AArch64::ORRWrs, AArch64::ORRXrs },
1712     { AArch64::EORWrs, AArch64::EORXrs }
1713   };
1714
1715   // Don't deal with undefined shifts.
1716   if (ShiftImm >= RetVT.getSizeInBits())
1717     return 0;
1718
1719   const TargetRegisterClass *RC;
1720   unsigned Opc;
1721   switch (RetVT.SimpleTy) {
1722   default:
1723     return 0;
1724   case MVT::i1:
1725   case MVT::i8:
1726   case MVT::i16:
1727   case MVT::i32:
1728     Opc = OpcTable[ISDOpc - ISD::AND][0];
1729     RC = &AArch64::GPR32RegClass;
1730     break;
1731   case MVT::i64:
1732     Opc = OpcTable[ISDOpc - ISD::AND][1];
1733     RC = &AArch64::GPR64RegClass;
1734     break;
1735   }
1736   unsigned ResultReg =
1737       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1738                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1739   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1742   }
1743   return ResultReg;
1744 }
1745
1746 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1747                                      uint64_t Imm) {
1748   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1749 }
1750
1751 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752                                    bool WantZExt, MachineMemOperand *MMO) {
1753   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754     return 0;
1755
1756   // Simplify this down to something we can handle.
1757   if (!simplifyAddress(Addr, VT))
1758     return 0;
1759
1760   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761   if (!ScaleFactor)
1762     llvm_unreachable("Unexpected value type.");
1763
1764   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766   bool UseScaled = true;
1767   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768     UseScaled = false;
1769     ScaleFactor = 1;
1770   }
1771
1772   static const unsigned GPOpcTable[2][8][4] = {
1773     // Sign-extend.
1774     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1775         AArch64::LDURXi  },
1776       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1777         AArch64::LDURXi  },
1778       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1779         AArch64::LDRXui  },
1780       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1781         AArch64::LDRXui  },
1782       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783         AArch64::LDRXroX },
1784       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785         AArch64::LDRXroX },
1786       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787         AArch64::LDRXroW },
1788       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789         AArch64::LDRXroW }
1790     },
1791     // Zero-extend.
1792     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1793         AArch64::LDURXi  },
1794       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1795         AArch64::LDURXi  },
1796       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1797         AArch64::LDRXui  },
1798       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1799         AArch64::LDRXui  },
1800       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1801         AArch64::LDRXroX },
1802       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1803         AArch64::LDRXroX },
1804       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1805         AArch64::LDRXroW },
1806       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1807         AArch64::LDRXroW }
1808     }
1809   };
1810
1811   static const unsigned FPOpcTable[4][2] = {
1812     { AArch64::LDURSi,  AArch64::LDURDi  },
1813     { AArch64::LDRSui,  AArch64::LDRDui  },
1814     { AArch64::LDRSroX, AArch64::LDRDroX },
1815     { AArch64::LDRSroW, AArch64::LDRDroW }
1816   };
1817
1818   unsigned Opc;
1819   const TargetRegisterClass *RC;
1820   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821                       Addr.getOffsetReg();
1822   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824       Addr.getExtendType() == AArch64_AM::SXTW)
1825     Idx++;
1826
1827   bool IsRet64Bit = RetVT == MVT::i64;
1828   switch (VT.SimpleTy) {
1829   default:
1830     llvm_unreachable("Unexpected value type.");
1831   case MVT::i1: // Intentional fall-through.
1832   case MVT::i8:
1833     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834     RC = (IsRet64Bit && !WantZExt) ?
1835              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836     break;
1837   case MVT::i16:
1838     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839     RC = (IsRet64Bit && !WantZExt) ?
1840              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841     break;
1842   case MVT::i32:
1843     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844     RC = (IsRet64Bit && !WantZExt) ?
1845              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846     break;
1847   case MVT::i64:
1848     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849     RC = &AArch64::GPR64RegClass;
1850     break;
1851   case MVT::f32:
1852     Opc = FPOpcTable[Idx][0];
1853     RC = &AArch64::FPR32RegClass;
1854     break;
1855   case MVT::f64:
1856     Opc = FPOpcTable[Idx][1];
1857     RC = &AArch64::FPR64RegClass;
1858     break;
1859   }
1860
1861   // Create the base instruction, then add the operands.
1862   unsigned ResultReg = createResultReg(RC);
1863   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1864                                     TII.get(Opc), ResultReg);
1865   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866
1867   // Loading an i1 requires special handling.
1868   if (VT == MVT::i1) {
1869     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1870     assert(ANDReg && "Unexpected AND instruction emission failure.");
1871     ResultReg = ANDReg;
1872   }
1873
1874   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875   // the 32bit reg to a 64bit reg.
1876   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1879             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880         .addImm(0)
1881         .addReg(ResultReg, getKillRegState(true))
1882         .addImm(AArch64::sub_32);
1883     ResultReg = Reg64;
1884   }
1885   return ResultReg;
1886 }
1887
1888 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889   MVT VT;
1890   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891     return false;
1892
1893   if (VT.isVector())
1894     return selectOperator(I, I->getOpcode());
1895
1896   unsigned ResultReg;
1897   switch (I->getOpcode()) {
1898   default:
1899     llvm_unreachable("Unexpected instruction.");
1900   case Instruction::Add:
1901     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902     break;
1903   case Instruction::Sub:
1904     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905     break;
1906   }
1907   if (!ResultReg)
1908     return false;
1909
1910   updateValueMap(I, ResultReg);
1911   return true;
1912 }
1913
1914 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915   MVT VT;
1916   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917     return false;
1918
1919   if (VT.isVector())
1920     return selectOperator(I, I->getOpcode());
1921
1922   unsigned ResultReg;
1923   switch (I->getOpcode()) {
1924   default:
1925     llvm_unreachable("Unexpected instruction.");
1926   case Instruction::And:
1927     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928     break;
1929   case Instruction::Or:
1930     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931     break;
1932   case Instruction::Xor:
1933     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934     break;
1935   }
1936   if (!ResultReg)
1937     return false;
1938
1939   updateValueMap(I, ResultReg);
1940   return true;
1941 }
1942
1943 bool AArch64FastISel::selectLoad(const Instruction *I) {
1944   MVT VT;
1945   // Verify we have a legal type before going any further.  Currently, we handle
1946   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949       cast<LoadInst>(I)->isAtomic())
1950     return false;
1951
1952   const Value *SV = I->getOperand(0);
1953   if (TLI.supportSwiftError()) {
1954     // Swifterror values can come from either a function parameter with
1955     // swifterror attribute or an alloca with swifterror attribute.
1956     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957       if (Arg->hasSwiftErrorAttr())
1958         return false;
1959     }
1960
1961     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962       if (Alloca->isSwiftError())
1963         return false;
1964     }
1965   }
1966
1967   // See if we can handle this address.
1968   Address Addr;
1969   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970     return false;
1971
1972   // Fold the following sign-/zero-extend into the load instruction.
1973   bool WantZExt = true;
1974   MVT RetVT = VT;
1975   const Value *IntExtVal = nullptr;
1976   if (I->hasOneUse()) {
1977     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978       if (isTypeSupported(ZE->getType(), RetVT))
1979         IntExtVal = ZE;
1980       else
1981         RetVT = VT;
1982     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983       if (isTypeSupported(SE->getType(), RetVT))
1984         IntExtVal = SE;
1985       else
1986         RetVT = VT;
1987       WantZExt = false;
1988     }
1989   }
1990
1991   unsigned ResultReg =
1992       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993   if (!ResultReg)
1994     return false;
1995
1996   // There are a few different cases we have to handle, because the load or the
1997   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998   // SelectionDAG. There is also an ordering issue when both instructions are in
1999   // different basic blocks.
2000   // 1.) The load instruction is selected by FastISel, but the integer extend
2001   //     not. This usually happens when the integer extend is in a different
2002   //     basic block and SelectionDAG took over for that basic block.
2003   // 2.) The load instruction is selected before the integer extend. This only
2004   //     happens when the integer extend is in a different basic block.
2005   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006   //     by FastISel. This happens if there are instructions between the load
2007   //     and the integer extend that couldn't be selected by FastISel.
2008   if (IntExtVal) {
2009     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011     // it when it selects the integer extend.
2012     unsigned Reg = lookUpRegForValue(IntExtVal);
2013     auto *MI = MRI.getUniqueVRegDef(Reg);
2014     if (!MI) {
2015       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016         if (WantZExt) {
2017           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018           std::prev(FuncInfo.InsertPt)->eraseFromParent();
2019           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2020         } else
2021           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2022                                                  /*IsKill=*/true,
2023                                                  AArch64::sub_32);
2024       }
2025       updateValueMap(I, ResultReg);
2026       return true;
2027     }
2028
2029     // The integer extend has already been emitted - delete all the instructions
2030     // that have been emitted by the integer extend lowering code and use the
2031     // result from the load instruction directly.
2032     while (MI) {
2033       Reg = 0;
2034       for (auto &Opnd : MI->uses()) {
2035         if (Opnd.isReg()) {
2036           Reg = Opnd.getReg();
2037           break;
2038         }
2039       }
2040       MI->eraseFromParent();
2041       MI = nullptr;
2042       if (Reg)
2043         MI = MRI.getUniqueVRegDef(Reg);
2044     }
2045     updateValueMap(IntExtVal, ResultReg);
2046     return true;
2047   }
2048
2049   updateValueMap(I, ResultReg);
2050   return true;
2051 }
2052
2053 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2054                                        unsigned AddrReg,
2055                                        MachineMemOperand *MMO) {
2056   unsigned Opc;
2057   switch (VT.SimpleTy) {
2058   default: return false;
2059   case MVT::i8:  Opc = AArch64::STLRB; break;
2060   case MVT::i16: Opc = AArch64::STLRH; break;
2061   case MVT::i32: Opc = AArch64::STLRW; break;
2062   case MVT::i64: Opc = AArch64::STLRX; break;
2063   }
2064
2065   const MCInstrDesc &II = TII.get(Opc);
2066   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2067   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2068   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2069       .addReg(SrcReg)
2070       .addReg(AddrReg)
2071       .addMemOperand(MMO);
2072   return true;
2073 }
2074
2075 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2076                                 MachineMemOperand *MMO) {
2077   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2078     return false;
2079
2080   // Simplify this down to something we can handle.
2081   if (!simplifyAddress(Addr, VT))
2082     return false;
2083
2084   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2085   if (!ScaleFactor)
2086     llvm_unreachable("Unexpected value type.");
2087
2088   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2089   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2090   bool UseScaled = true;
2091   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2092     UseScaled = false;
2093     ScaleFactor = 1;
2094   }
2095
2096   static const unsigned OpcTable[4][6] = {
2097     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2098       AArch64::STURSi,   AArch64::STURDi },
2099     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2100       AArch64::STRSui,   AArch64::STRDui },
2101     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2102       AArch64::STRSroX,  AArch64::STRDroX },
2103     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2104       AArch64::STRSroW,  AArch64::STRDroW }
2105   };
2106
2107   unsigned Opc;
2108   bool VTIsi1 = false;
2109   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2110                       Addr.getOffsetReg();
2111   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2112   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2113       Addr.getExtendType() == AArch64_AM::SXTW)
2114     Idx++;
2115
2116   switch (VT.SimpleTy) {
2117   default: llvm_unreachable("Unexpected value type.");
2118   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2119   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2120   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2121   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2122   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2123   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2124   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2125   }
2126
2127   // Storing an i1 requires special handling.
2128   if (VTIsi1 && SrcReg != AArch64::WZR) {
2129     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2130     assert(ANDReg && "Unexpected AND instruction emission failure.");
2131     SrcReg = ANDReg;
2132   }
2133   // Create the base instruction, then add the operands.
2134   const MCInstrDesc &II = TII.get(Opc);
2135   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2136   MachineInstrBuilder MIB =
2137       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2138   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2139
2140   return true;
2141 }
2142
2143 bool AArch64FastISel::selectStore(const Instruction *I) {
2144   MVT VT;
2145   const Value *Op0 = I->getOperand(0);
2146   // Verify we have a legal type before going any further.  Currently, we handle
2147   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2148   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2149   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2150     return false;
2151
2152   const Value *PtrV = I->getOperand(1);
2153   if (TLI.supportSwiftError()) {
2154     // Swifterror values can come from either a function parameter with
2155     // swifterror attribute or an alloca with swifterror attribute.
2156     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2157       if (Arg->hasSwiftErrorAttr())
2158         return false;
2159     }
2160
2161     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2162       if (Alloca->isSwiftError())
2163         return false;
2164     }
2165   }
2166
2167   // Get the value to be stored into a register. Use the zero register directly
2168   // when possible to avoid an unnecessary copy and a wasted register.
2169   unsigned SrcReg = 0;
2170   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2171     if (CI->isZero())
2172       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2173   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2174     if (CF->isZero() && !CF->isNegative()) {
2175       VT = MVT::getIntegerVT(VT.getSizeInBits());
2176       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2177     }
2178   }
2179
2180   if (!SrcReg)
2181     SrcReg = getRegForValue(Op0);
2182
2183   if (!SrcReg)
2184     return false;
2185
2186   auto *SI = cast<StoreInst>(I);
2187
2188   // Try to emit a STLR for seq_cst/release.
2189   if (SI->isAtomic()) {
2190     AtomicOrdering Ord = SI->getOrdering();
2191     // The non-atomic instructions are sufficient for relaxed stores.
2192     if (isReleaseOrStronger(Ord)) {
2193       // The STLR addressing mode only supports a base reg; pass that directly.
2194       unsigned AddrReg = getRegForValue(PtrV);
2195       return emitStoreRelease(VT, SrcReg, AddrReg,
2196                               createMachineMemOperandFor(I));
2197     }
2198   }
2199
2200   // See if we can handle this address.
2201   Address Addr;
2202   if (!computeAddress(PtrV, Addr, Op0->getType()))
2203     return false;
2204
2205   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2206     return false;
2207   return true;
2208 }
2209
2210 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2211   switch (Pred) {
2212   case CmpInst::FCMP_ONE:
2213   case CmpInst::FCMP_UEQ:
2214   default:
2215     // AL is our "false" for now. The other two need more compares.
2216     return AArch64CC::AL;
2217   case CmpInst::ICMP_EQ:
2218   case CmpInst::FCMP_OEQ:
2219     return AArch64CC::EQ;
2220   case CmpInst::ICMP_SGT:
2221   case CmpInst::FCMP_OGT:
2222     return AArch64CC::GT;
2223   case CmpInst::ICMP_SGE:
2224   case CmpInst::FCMP_OGE:
2225     return AArch64CC::GE;
2226   case CmpInst::ICMP_UGT:
2227   case CmpInst::FCMP_UGT:
2228     return AArch64CC::HI;
2229   case CmpInst::FCMP_OLT:
2230     return AArch64CC::MI;
2231   case CmpInst::ICMP_ULE:
2232   case CmpInst::FCMP_OLE:
2233     return AArch64CC::LS;
2234   case CmpInst::FCMP_ORD:
2235     return AArch64CC::VC;
2236   case CmpInst::FCMP_UNO:
2237     return AArch64CC::VS;
2238   case CmpInst::FCMP_UGE:
2239     return AArch64CC::PL;
2240   case CmpInst::ICMP_SLT:
2241   case CmpInst::FCMP_ULT:
2242     return AArch64CC::LT;
2243   case CmpInst::ICMP_SLE:
2244   case CmpInst::FCMP_ULE:
2245     return AArch64CC::LE;
2246   case CmpInst::FCMP_UNE:
2247   case CmpInst::ICMP_NE:
2248     return AArch64CC::NE;
2249   case CmpInst::ICMP_UGE:
2250     return AArch64CC::HS;
2251   case CmpInst::ICMP_ULT:
2252     return AArch64CC::LO;
2253   }
2254 }
2255
2256 /// \brief Try to emit a combined compare-and-branch instruction.
2257 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2258   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2259   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2260   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2261
2262   const Value *LHS = CI->getOperand(0);
2263   const Value *RHS = CI->getOperand(1);
2264
2265   MVT VT;
2266   if (!isTypeSupported(LHS->getType(), VT))
2267     return false;
2268
2269   unsigned BW = VT.getSizeInBits();
2270   if (BW > 64)
2271     return false;
2272
2273   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2274   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2275
2276   // Try to take advantage of fallthrough opportunities.
2277   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2278     std::swap(TBB, FBB);
2279     Predicate = CmpInst::getInversePredicate(Predicate);
2280   }
2281
2282   int TestBit = -1;
2283   bool IsCmpNE;
2284   switch (Predicate) {
2285   default:
2286     return false;
2287   case CmpInst::ICMP_EQ:
2288   case CmpInst::ICMP_NE:
2289     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2290       std::swap(LHS, RHS);
2291
2292     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2293       return false;
2294
2295     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2296       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2297         const Value *AndLHS = AI->getOperand(0);
2298         const Value *AndRHS = AI->getOperand(1);
2299
2300         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2301           if (C->getValue().isPowerOf2())
2302             std::swap(AndLHS, AndRHS);
2303
2304         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2305           if (C->getValue().isPowerOf2()) {
2306             TestBit = C->getValue().logBase2();
2307             LHS = AndLHS;
2308           }
2309       }
2310
2311     if (VT == MVT::i1)
2312       TestBit = 0;
2313
2314     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2315     break;
2316   case CmpInst::ICMP_SLT:
2317   case CmpInst::ICMP_SGE:
2318     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2319       return false;
2320
2321     TestBit = BW - 1;
2322     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2323     break;
2324   case CmpInst::ICMP_SGT:
2325   case CmpInst::ICMP_SLE:
2326     if (!isa<ConstantInt>(RHS))
2327       return false;
2328
2329     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2330       return false;
2331
2332     TestBit = BW - 1;
2333     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2334     break;
2335   } // end switch
2336
2337   static const unsigned OpcTable[2][2][2] = {
2338     { {AArch64::CBZW,  AArch64::CBZX },
2339       {AArch64::CBNZW, AArch64::CBNZX} },
2340     { {AArch64::TBZW,  AArch64::TBZX },
2341       {AArch64::TBNZW, AArch64::TBNZX} }
2342   };
2343
2344   bool IsBitTest = TestBit != -1;
2345   bool Is64Bit = BW == 64;
2346   if (TestBit < 32 && TestBit >= 0)
2347     Is64Bit = false;
2348
2349   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2350   const MCInstrDesc &II = TII.get(Opc);
2351
2352   unsigned SrcReg = getRegForValue(LHS);
2353   if (!SrcReg)
2354     return false;
2355   bool SrcIsKill = hasTrivialKill(LHS);
2356
2357   if (BW == 64 && !Is64Bit)
2358     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2359                                         AArch64::sub_32);
2360
2361   if ((BW < 32) && !IsBitTest)
2362     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2363
2364   // Emit the combined compare and branch instruction.
2365   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2366   MachineInstrBuilder MIB =
2367       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2368           .addReg(SrcReg, getKillRegState(SrcIsKill));
2369   if (IsBitTest)
2370     MIB.addImm(TestBit);
2371   MIB.addMBB(TBB);
2372
2373   finishCondBranch(BI->getParent(), TBB, FBB);
2374   return true;
2375 }
2376
2377 bool AArch64FastISel::selectBranch(const Instruction *I) {
2378   const BranchInst *BI = cast<BranchInst>(I);
2379   if (BI->isUnconditional()) {
2380     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2381     fastEmitBranch(MSucc, BI->getDebugLoc());
2382     return true;
2383   }
2384
2385   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2386   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2387
2388   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2389     if (CI->hasOneUse() && isValueAvailable(CI)) {
2390       // Try to optimize or fold the cmp.
2391       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2392       switch (Predicate) {
2393       default:
2394         break;
2395       case CmpInst::FCMP_FALSE:
2396         fastEmitBranch(FBB, DbgLoc);
2397         return true;
2398       case CmpInst::FCMP_TRUE:
2399         fastEmitBranch(TBB, DbgLoc);
2400         return true;
2401       }
2402
2403       // Try to emit a combined compare-and-branch first.
2404       if (emitCompareAndBranch(BI))
2405         return true;
2406
2407       // Try to take advantage of fallthrough opportunities.
2408       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2409         std::swap(TBB, FBB);
2410         Predicate = CmpInst::getInversePredicate(Predicate);
2411       }
2412
2413       // Emit the cmp.
2414       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2415         return false;
2416
2417       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2418       // instruction.
2419       AArch64CC::CondCode CC = getCompareCC(Predicate);
2420       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2421       switch (Predicate) {
2422       default:
2423         break;
2424       case CmpInst::FCMP_UEQ:
2425         ExtraCC = AArch64CC::EQ;
2426         CC = AArch64CC::VS;
2427         break;
2428       case CmpInst::FCMP_ONE:
2429         ExtraCC = AArch64CC::MI;
2430         CC = AArch64CC::GT;
2431         break;
2432       }
2433       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2434
2435       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2436       if (ExtraCC != AArch64CC::AL) {
2437         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2438             .addImm(ExtraCC)
2439             .addMBB(TBB);
2440       }
2441
2442       // Emit the branch.
2443       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444           .addImm(CC)
2445           .addMBB(TBB);
2446
2447       finishCondBranch(BI->getParent(), TBB, FBB);
2448       return true;
2449     }
2450   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2451     uint64_t Imm = CI->getZExtValue();
2452     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2453     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2454         .addMBB(Target);
2455
2456     // Obtain the branch probability and add the target to the successor list.
2457     if (FuncInfo.BPI) {
2458       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2459           BI->getParent(), Target->getBasicBlock());
2460       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2461     } else
2462       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2463     return true;
2464   } else {
2465     AArch64CC::CondCode CC = AArch64CC::NE;
2466     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2467       // Fake request the condition, otherwise the intrinsic might be completely
2468       // optimized away.
2469       unsigned CondReg = getRegForValue(BI->getCondition());
2470       if (!CondReg)
2471         return false;
2472
2473       // Emit the branch.
2474       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2475         .addImm(CC)
2476         .addMBB(TBB);
2477
2478       finishCondBranch(BI->getParent(), TBB, FBB);
2479       return true;
2480     }
2481   }
2482
2483   unsigned CondReg = getRegForValue(BI->getCondition());
2484   if (CondReg == 0)
2485     return false;
2486   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2487
2488   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2489   unsigned Opcode = AArch64::TBNZW;
2490   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2491     std::swap(TBB, FBB);
2492     Opcode = AArch64::TBZW;
2493   }
2494
2495   const MCInstrDesc &II = TII.get(Opcode);
2496   unsigned ConstrainedCondReg
2497     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2498   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2499       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2500       .addImm(0)
2501       .addMBB(TBB);
2502
2503   finishCondBranch(BI->getParent(), TBB, FBB);
2504   return true;
2505 }
2506
2507 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2508   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2509   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2510   if (AddrReg == 0)
2511     return false;
2512
2513   // Emit the indirect branch.
2514   const MCInstrDesc &II = TII.get(AArch64::BR);
2515   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2516   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2517
2518   // Make sure the CFG is up-to-date.
2519   for (auto *Succ : BI->successors())
2520     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2521
2522   return true;
2523 }
2524
2525 bool AArch64FastISel::selectCmp(const Instruction *I) {
2526   const CmpInst *CI = cast<CmpInst>(I);
2527
2528   // Vectors of i1 are weird: bail out.
2529   if (CI->getType()->isVectorTy())
2530     return false;
2531
2532   // Try to optimize or fold the cmp.
2533   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2534   unsigned ResultReg = 0;
2535   switch (Predicate) {
2536   default:
2537     break;
2538   case CmpInst::FCMP_FALSE:
2539     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2540     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2541             TII.get(TargetOpcode::COPY), ResultReg)
2542         .addReg(AArch64::WZR, getKillRegState(true));
2543     break;
2544   case CmpInst::FCMP_TRUE:
2545     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2546     break;
2547   }
2548
2549   if (ResultReg) {
2550     updateValueMap(I, ResultReg);
2551     return true;
2552   }
2553
2554   // Emit the cmp.
2555   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2556     return false;
2557
2558   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2559
2560   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2561   // condition codes are inverted, because they are used by CSINC.
2562   static unsigned CondCodeTable[2][2] = {
2563     { AArch64CC::NE, AArch64CC::VC },
2564     { AArch64CC::PL, AArch64CC::LE }
2565   };
2566   unsigned *CondCodes = nullptr;
2567   switch (Predicate) {
2568   default:
2569     break;
2570   case CmpInst::FCMP_UEQ:
2571     CondCodes = &CondCodeTable[0][0];
2572     break;
2573   case CmpInst::FCMP_ONE:
2574     CondCodes = &CondCodeTable[1][0];
2575     break;
2576   }
2577
2578   if (CondCodes) {
2579     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2580     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2581             TmpReg1)
2582         .addReg(AArch64::WZR, getKillRegState(true))
2583         .addReg(AArch64::WZR, getKillRegState(true))
2584         .addImm(CondCodes[0]);
2585     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2586             ResultReg)
2587         .addReg(TmpReg1, getKillRegState(true))
2588         .addReg(AArch64::WZR, getKillRegState(true))
2589         .addImm(CondCodes[1]);
2590
2591     updateValueMap(I, ResultReg);
2592     return true;
2593   }
2594
2595   // Now set a register based on the comparison.
2596   AArch64CC::CondCode CC = getCompareCC(Predicate);
2597   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2598   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2599   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2600           ResultReg)
2601       .addReg(AArch64::WZR, getKillRegState(true))
2602       .addReg(AArch64::WZR, getKillRegState(true))
2603       .addImm(invertedCC);
2604
2605   updateValueMap(I, ResultReg);
2606   return true;
2607 }
2608
2609 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2610 /// value.
2611 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2612   if (!SI->getType()->isIntegerTy(1))
2613     return false;
2614
2615   const Value *Src1Val, *Src2Val;
2616   unsigned Opc = 0;
2617   bool NeedExtraOp = false;
2618   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2619     if (CI->isOne()) {
2620       Src1Val = SI->getCondition();
2621       Src2Val = SI->getFalseValue();
2622       Opc = AArch64::ORRWrr;
2623     } else {
2624       assert(CI->isZero());
2625       Src1Val = SI->getFalseValue();
2626       Src2Val = SI->getCondition();
2627       Opc = AArch64::BICWrr;
2628     }
2629   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2630     if (CI->isOne()) {
2631       Src1Val = SI->getCondition();
2632       Src2Val = SI->getTrueValue();
2633       Opc = AArch64::ORRWrr;
2634       NeedExtraOp = true;
2635     } else {
2636       assert(CI->isZero());
2637       Src1Val = SI->getCondition();
2638       Src2Val = SI->getTrueValue();
2639       Opc = AArch64::ANDWrr;
2640     }
2641   }
2642
2643   if (!Opc)
2644     return false;
2645
2646   unsigned Src1Reg = getRegForValue(Src1Val);
2647   if (!Src1Reg)
2648     return false;
2649   bool Src1IsKill = hasTrivialKill(Src1Val);
2650
2651   unsigned Src2Reg = getRegForValue(Src2Val);
2652   if (!Src2Reg)
2653     return false;
2654   bool Src2IsKill = hasTrivialKill(Src2Val);
2655
2656   if (NeedExtraOp) {
2657     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2658     Src1IsKill = true;
2659   }
2660   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2661                                        Src1IsKill, Src2Reg, Src2IsKill);
2662   updateValueMap(SI, ResultReg);
2663   return true;
2664 }
2665
2666 bool AArch64FastISel::selectSelect(const Instruction *I) {
2667   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2668   MVT VT;
2669   if (!isTypeSupported(I->getType(), VT))
2670     return false;
2671
2672   unsigned Opc;
2673   const TargetRegisterClass *RC;
2674   switch (VT.SimpleTy) {
2675   default:
2676     return false;
2677   case MVT::i1:
2678   case MVT::i8:
2679   case MVT::i16:
2680   case MVT::i32:
2681     Opc = AArch64::CSELWr;
2682     RC = &AArch64::GPR32RegClass;
2683     break;
2684   case MVT::i64:
2685     Opc = AArch64::CSELXr;
2686     RC = &AArch64::GPR64RegClass;
2687     break;
2688   case MVT::f32:
2689     Opc = AArch64::FCSELSrrr;
2690     RC = &AArch64::FPR32RegClass;
2691     break;
2692   case MVT::f64:
2693     Opc = AArch64::FCSELDrrr;
2694     RC = &AArch64::FPR64RegClass;
2695     break;
2696   }
2697
2698   const SelectInst *SI = cast<SelectInst>(I);
2699   const Value *Cond = SI->getCondition();
2700   AArch64CC::CondCode CC = AArch64CC::NE;
2701   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2702
2703   if (optimizeSelect(SI))
2704     return true;
2705
2706   // Try to pickup the flags, so we don't have to emit another compare.
2707   if (foldXALUIntrinsic(CC, I, Cond)) {
2708     // Fake request the condition to force emission of the XALU intrinsic.
2709     unsigned CondReg = getRegForValue(Cond);
2710     if (!CondReg)
2711       return false;
2712   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2713              isValueAvailable(Cond)) {
2714     const auto *Cmp = cast<CmpInst>(Cond);
2715     // Try to optimize or fold the cmp.
2716     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2717     const Value *FoldSelect = nullptr;
2718     switch (Predicate) {
2719     default:
2720       break;
2721     case CmpInst::FCMP_FALSE:
2722       FoldSelect = SI->getFalseValue();
2723       break;
2724     case CmpInst::FCMP_TRUE:
2725       FoldSelect = SI->getTrueValue();
2726       break;
2727     }
2728
2729     if (FoldSelect) {
2730       unsigned SrcReg = getRegForValue(FoldSelect);
2731       if (!SrcReg)
2732         return false;
2733       unsigned UseReg = lookUpRegForValue(SI);
2734       if (UseReg)
2735         MRI.clearKillFlags(UseReg);
2736
2737       updateValueMap(I, SrcReg);
2738       return true;
2739     }
2740
2741     // Emit the cmp.
2742     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2743       return false;
2744
2745     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2746     CC = getCompareCC(Predicate);
2747     switch (Predicate) {
2748     default:
2749       break;
2750     case CmpInst::FCMP_UEQ:
2751       ExtraCC = AArch64CC::EQ;
2752       CC = AArch64CC::VS;
2753       break;
2754     case CmpInst::FCMP_ONE:
2755       ExtraCC = AArch64CC::MI;
2756       CC = AArch64CC::GT;
2757       break;
2758     }
2759     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2760   } else {
2761     unsigned CondReg = getRegForValue(Cond);
2762     if (!CondReg)
2763       return false;
2764     bool CondIsKill = hasTrivialKill(Cond);
2765
2766     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2767     CondReg = constrainOperandRegClass(II, CondReg, 1);
2768
2769     // Emit a TST instruction (ANDS wzr, reg, #imm).
2770     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2771             AArch64::WZR)
2772         .addReg(CondReg, getKillRegState(CondIsKill))
2773         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2774   }
2775
2776   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2777   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2778
2779   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2780   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2781
2782   if (!Src1Reg || !Src2Reg)
2783     return false;
2784
2785   if (ExtraCC != AArch64CC::AL) {
2786     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2787                                Src2IsKill, ExtraCC);
2788     Src2IsKill = true;
2789   }
2790   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2791                                         Src2IsKill, CC);
2792   updateValueMap(I, ResultReg);
2793   return true;
2794 }
2795
2796 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2797   Value *V = I->getOperand(0);
2798   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2799     return false;
2800
2801   unsigned Op = getRegForValue(V);
2802   if (Op == 0)
2803     return false;
2804
2805   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2806   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2807           ResultReg).addReg(Op);
2808   updateValueMap(I, ResultReg);
2809   return true;
2810 }
2811
2812 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2813   Value *V = I->getOperand(0);
2814   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2815     return false;
2816
2817   unsigned Op = getRegForValue(V);
2818   if (Op == 0)
2819     return false;
2820
2821   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2822   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2823           ResultReg).addReg(Op);
2824   updateValueMap(I, ResultReg);
2825   return true;
2826 }
2827
2828 // FPToUI and FPToSI
2829 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2830   MVT DestVT;
2831   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2832     return false;
2833
2834   unsigned SrcReg = getRegForValue(I->getOperand(0));
2835   if (SrcReg == 0)
2836     return false;
2837
2838   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2839   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2840     return false;
2841
2842   unsigned Opc;
2843   if (SrcVT == MVT::f64) {
2844     if (Signed)
2845       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2846     else
2847       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2848   } else {
2849     if (Signed)
2850       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2851     else
2852       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2853   }
2854   unsigned ResultReg = createResultReg(
2855       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2856   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2857       .addReg(SrcReg);
2858   updateValueMap(I, ResultReg);
2859   return true;
2860 }
2861
2862 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2863   MVT DestVT;
2864   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2865     return false;
2866   // Let regular ISEL handle FP16
2867   if (DestVT == MVT::f16)
2868     return false;
2869
2870   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2871          "Unexpected value type.");
2872
2873   unsigned SrcReg = getRegForValue(I->getOperand(0));
2874   if (!SrcReg)
2875     return false;
2876   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2877
2878   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2879
2880   // Handle sign-extension.
2881   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2882     SrcReg =
2883         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2884     if (!SrcReg)
2885       return false;
2886     SrcIsKill = true;
2887   }
2888
2889   unsigned Opc;
2890   if (SrcVT == MVT::i64) {
2891     if (Signed)
2892       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2893     else
2894       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2895   } else {
2896     if (Signed)
2897       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2898     else
2899       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2900   }
2901
2902   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2903                                       SrcIsKill);
2904   updateValueMap(I, ResultReg);
2905   return true;
2906 }
2907
2908 bool AArch64FastISel::fastLowerArguments() {
2909   if (!FuncInfo.CanLowerReturn)
2910     return false;
2911
2912   const Function *F = FuncInfo.Fn;
2913   if (F->isVarArg())
2914     return false;
2915
2916   CallingConv::ID CC = F->getCallingConv();
2917   if (CC != CallingConv::C && CC != CallingConv::Swift)
2918     return false;
2919
2920   // Only handle simple cases of up to 8 GPR and FPR each.
2921   unsigned GPRCnt = 0;
2922   unsigned FPRCnt = 0;
2923   for (auto const &Arg : F->args()) {
2924     if (Arg.hasAttribute(Attribute::ByVal) ||
2925         Arg.hasAttribute(Attribute::InReg) ||
2926         Arg.hasAttribute(Attribute::StructRet) ||
2927         Arg.hasAttribute(Attribute::SwiftSelf) ||
2928         Arg.hasAttribute(Attribute::SwiftError) ||
2929         Arg.hasAttribute(Attribute::Nest))
2930       return false;
2931
2932     Type *ArgTy = Arg.getType();
2933     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2934       return false;
2935
2936     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2937     if (!ArgVT.isSimple())
2938       return false;
2939
2940     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2941     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2942       return false;
2943
2944     if (VT.isVector() &&
2945         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2946       return false;
2947
2948     if (VT >= MVT::i1 && VT <= MVT::i64)
2949       ++GPRCnt;
2950     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2951              VT.is128BitVector())
2952       ++FPRCnt;
2953     else
2954       return false;
2955
2956     if (GPRCnt > 8 || FPRCnt > 8)
2957       return false;
2958   }
2959
2960   static const MCPhysReg Registers[6][8] = {
2961     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962       AArch64::W5, AArch64::W6, AArch64::W7 },
2963     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964       AArch64::X5, AArch64::X6, AArch64::X7 },
2965     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966       AArch64::H5, AArch64::H6, AArch64::H7 },
2967     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968       AArch64::S5, AArch64::S6, AArch64::S7 },
2969     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970       AArch64::D5, AArch64::D6, AArch64::D7 },
2971     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2973   };
2974
2975   unsigned GPRIdx = 0;
2976   unsigned FPRIdx = 0;
2977   for (auto const &Arg : F->args()) {
2978     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2979     unsigned SrcReg;
2980     const TargetRegisterClass *RC;
2981     if (VT >= MVT::i1 && VT <= MVT::i32) {
2982       SrcReg = Registers[0][GPRIdx++];
2983       RC = &AArch64::GPR32RegClass;
2984       VT = MVT::i32;
2985     } else if (VT == MVT::i64) {
2986       SrcReg = Registers[1][GPRIdx++];
2987       RC = &AArch64::GPR64RegClass;
2988     } else if (VT == MVT::f16) {
2989       SrcReg = Registers[2][FPRIdx++];
2990       RC = &AArch64::FPR16RegClass;
2991     } else if (VT ==  MVT::f32) {
2992       SrcReg = Registers[3][FPRIdx++];
2993       RC = &AArch64::FPR32RegClass;
2994     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2995       SrcReg = Registers[4][FPRIdx++];
2996       RC = &AArch64::FPR64RegClass;
2997     } else if (VT.is128BitVector()) {
2998       SrcReg = Registers[5][FPRIdx++];
2999       RC = &AArch64::FPR128RegClass;
3000     } else
3001       llvm_unreachable("Unexpected value type.");
3002
3003     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3004     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3005     // Without this, EmitLiveInCopies may eliminate the livein if its only
3006     // use is a bitcast (which isn't turned into an instruction).
3007     unsigned ResultReg = createResultReg(RC);
3008     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3009             TII.get(TargetOpcode::COPY), ResultReg)
3010         .addReg(DstReg, getKillRegState(true));
3011     updateValueMap(&Arg, ResultReg);
3012   }
3013   return true;
3014 }
3015
3016 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3017                                       SmallVectorImpl<MVT> &OutVTs,
3018                                       unsigned &NumBytes) {
3019   CallingConv::ID CC = CLI.CallConv;
3020   SmallVector<CCValAssign, 16> ArgLocs;
3021   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3022   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3023
3024   // Get a count of how many bytes are to be pushed on the stack.
3025   NumBytes = CCInfo.getNextStackOffset();
3026
3027   // Issue CALLSEQ_START
3028   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3029   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3030     .addImm(NumBytes).addImm(0);
3031
3032   // Process the args.
3033   for (CCValAssign &VA : ArgLocs) {
3034     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035     MVT ArgVT = OutVTs[VA.getValNo()];
3036
3037     unsigned ArgReg = getRegForValue(ArgVal);
3038     if (!ArgReg)
3039       return false;
3040
3041     // Handle arg promotion: SExt, ZExt, AExt.
3042     switch (VA.getLocInfo()) {
3043     case CCValAssign::Full:
3044       break;
3045     case CCValAssign::SExt: {
3046       MVT DestVT = VA.getLocVT();
3047       MVT SrcVT = ArgVT;
3048       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3049       if (!ArgReg)
3050         return false;
3051       break;
3052     }
3053     case CCValAssign::AExt:
3054     // Intentional fall-through.
3055     case CCValAssign::ZExt: {
3056       MVT DestVT = VA.getLocVT();
3057       MVT SrcVT = ArgVT;
3058       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3059       if (!ArgReg)
3060         return false;
3061       break;
3062     }
3063     default:
3064       llvm_unreachable("Unknown arg promotion!");
3065     }
3066
3067     // Now copy/store arg to correct locations.
3068     if (VA.isRegLoc() && !VA.needsCustom()) {
3069       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3070               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3071       CLI.OutRegs.push_back(VA.getLocReg());
3072     } else if (VA.needsCustom()) {
3073       // FIXME: Handle custom args.
3074       return false;
3075     } else {
3076       assert(VA.isMemLoc() && "Assuming store on stack.");
3077
3078       // Don't emit stores for undef values.
3079       if (isa<UndefValue>(ArgVal))
3080         continue;
3081
3082       // Need to store on the stack.
3083       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3084
3085       unsigned BEAlign = 0;
3086       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3087         BEAlign = 8 - ArgSize;
3088
3089       Address Addr;
3090       Addr.setKind(Address::RegBase);
3091       Addr.setReg(AArch64::SP);
3092       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3093
3094       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3095       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3096           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3097           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3098
3099       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3100         return false;
3101     }
3102   }
3103   return true;
3104 }
3105
3106 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3107                                  unsigned NumBytes) {
3108   CallingConv::ID CC = CLI.CallConv;
3109
3110   // Issue CALLSEQ_END
3111   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3112   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3113     .addImm(NumBytes).addImm(0);
3114
3115   // Now the return value.
3116   if (RetVT != MVT::isVoid) {
3117     SmallVector<CCValAssign, 16> RVLocs;
3118     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3119     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3120
3121     // Only handle a single return value.
3122     if (RVLocs.size() != 1)
3123       return false;
3124
3125     // Copy all of the result registers out of their specified physreg.
3126     MVT CopyVT = RVLocs[0].getValVT();
3127
3128     // TODO: Handle big-endian results
3129     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3130       return false;
3131
3132     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3133     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3134             TII.get(TargetOpcode::COPY), ResultReg)
3135         .addReg(RVLocs[0].getLocReg());
3136     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3137
3138     CLI.ResultReg = ResultReg;
3139     CLI.NumResultRegs = 1;
3140   }
3141
3142   return true;
3143 }
3144
3145 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3146   CallingConv::ID CC  = CLI.CallConv;
3147   bool IsTailCall     = CLI.IsTailCall;
3148   bool IsVarArg       = CLI.IsVarArg;
3149   const Value *Callee = CLI.Callee;
3150   MCSymbol *Symbol = CLI.Symbol;
3151
3152   if (!Callee && !Symbol)
3153     return false;
3154
3155   // Allow SelectionDAG isel to handle tail calls.
3156   if (IsTailCall)
3157     return false;
3158
3159   CodeModel::Model CM = TM.getCodeModel();
3160   // Only support the small-addressing and large code models.
3161   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3162     return false;
3163
3164   // FIXME: Add large code model support for ELF.
3165   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3166     return false;
3167
3168   // Let SDISel handle vararg functions.
3169   if (IsVarArg)
3170     return false;
3171
3172   // FIXME: Only handle *simple* calls for now.
3173   MVT RetVT;
3174   if (CLI.RetTy->isVoidTy())
3175     RetVT = MVT::isVoid;
3176   else if (!isTypeLegal(CLI.RetTy, RetVT))
3177     return false;
3178
3179   for (auto Flag : CLI.OutFlags)
3180     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3181         Flag.isSwiftSelf() || Flag.isSwiftError())
3182       return false;
3183
3184   // Set up the argument vectors.
3185   SmallVector<MVT, 16> OutVTs;
3186   OutVTs.reserve(CLI.OutVals.size());
3187
3188   for (auto *Val : CLI.OutVals) {
3189     MVT VT;
3190     if (!isTypeLegal(Val->getType(), VT) &&
3191         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3192       return false;
3193
3194     // We don't handle vector parameters yet.
3195     if (VT.isVector() || VT.getSizeInBits() > 64)
3196       return false;
3197
3198     OutVTs.push_back(VT);
3199   }
3200
3201   Address Addr;
3202   if (Callee && !computeCallAddress(Callee, Addr))
3203     return false;
3204
3205   // Handle the arguments now that we've gotten them.
3206   unsigned NumBytes;
3207   if (!processCallArgs(CLI, OutVTs, NumBytes))
3208     return false;
3209
3210   // Issue the call.
3211   MachineInstrBuilder MIB;
3212   if (Subtarget->useSmallAddressing()) {
3213     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3214     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3215     if (Symbol)
3216       MIB.addSym(Symbol, 0);
3217     else if (Addr.getGlobalValue())
3218       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3219     else if (Addr.getReg()) {
3220       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3221       MIB.addReg(Reg);
3222     } else
3223       return false;
3224   } else {
3225     unsigned CallReg = 0;
3226     if (Symbol) {
3227       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3228       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3229               ADRPReg)
3230           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3231
3232       CallReg = createResultReg(&AArch64::GPR64RegClass);
3233       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234               TII.get(AArch64::LDRXui), CallReg)
3235           .addReg(ADRPReg)
3236           .addSym(Symbol,
3237                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3238     } else if (Addr.getGlobalValue())
3239       CallReg = materializeGV(Addr.getGlobalValue());
3240     else if (Addr.getReg())
3241       CallReg = Addr.getReg();
3242
3243     if (!CallReg)
3244       return false;
3245
3246     const MCInstrDesc &II = TII.get(AArch64::BLR);
3247     CallReg = constrainOperandRegClass(II, CallReg, 0);
3248     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3249   }
3250
3251   // Add implicit physical register uses to the call.
3252   for (auto Reg : CLI.OutRegs)
3253     MIB.addReg(Reg, RegState::Implicit);
3254
3255   // Add a register mask with the call-preserved registers.
3256   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3257   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3258
3259   CLI.Call = MIB;
3260
3261   // Finish off the call including any return values.
3262   return finishCall(CLI, RetVT, NumBytes);
3263 }
3264
3265 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3266   if (Alignment)
3267     return Len / Alignment <= 4;
3268   else
3269     return Len < 32;
3270 }
3271
3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273                                          uint64_t Len, unsigned Alignment) {
3274   // Make sure we don't bloat code by inlining very large memcpy's.
3275   if (!isMemCpySmall(Len, Alignment))
3276     return false;
3277
3278   int64_t UnscaledOffset = 0;
3279   Address OrigDest = Dest;
3280   Address OrigSrc = Src;
3281
3282   while (Len) {
3283     MVT VT;
3284     if (!Alignment || Alignment >= 8) {
3285       if (Len >= 8)
3286         VT = MVT::i64;
3287       else if (Len >= 4)
3288         VT = MVT::i32;
3289       else if (Len >= 2)
3290         VT = MVT::i16;
3291       else {
3292         VT = MVT::i8;
3293       }
3294     } else {
3295       // Bound based on alignment.
3296       if (Len >= 4 && Alignment == 4)
3297         VT = MVT::i32;
3298       else if (Len >= 2 && Alignment == 2)
3299         VT = MVT::i16;
3300       else {
3301         VT = MVT::i8;
3302       }
3303     }
3304
3305     unsigned ResultReg = emitLoad(VT, VT, Src);
3306     if (!ResultReg)
3307       return false;
3308
3309     if (!emitStore(VT, ResultReg, Dest))
3310       return false;
3311
3312     int64_t Size = VT.getSizeInBits() / 8;
3313     Len -= Size;
3314     UnscaledOffset += Size;
3315
3316     // We need to recompute the unscaled offset for each iteration.
3317     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3319   }
3320
3321   return true;
3322 }
3323
3324 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3325 /// into the user. The condition code will only be updated on success.
3326 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3327                                         const Instruction *I,
3328                                         const Value *Cond) {
3329   if (!isa<ExtractValueInst>(Cond))
3330     return false;
3331
3332   const auto *EV = cast<ExtractValueInst>(Cond);
3333   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3334     return false;
3335
3336   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3337   MVT RetVT;
3338   const Function *Callee = II->getCalledFunction();
3339   Type *RetTy =
3340   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3341   if (!isTypeLegal(RetTy, RetVT))
3342     return false;
3343
3344   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3345     return false;
3346
3347   const Value *LHS = II->getArgOperand(0);
3348   const Value *RHS = II->getArgOperand(1);
3349
3350   // Canonicalize immediate to the RHS.
3351   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3352       isCommutativeIntrinsic(II))
3353     std::swap(LHS, RHS);
3354
3355   // Simplify multiplies.
3356   Intrinsic::ID IID = II->getIntrinsicID();
3357   switch (IID) {
3358   default:
3359     break;
3360   case Intrinsic::smul_with_overflow:
3361     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3362       if (C->getValue() == 2)
3363         IID = Intrinsic::sadd_with_overflow;
3364     break;
3365   case Intrinsic::umul_with_overflow:
3366     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3367       if (C->getValue() == 2)
3368         IID = Intrinsic::uadd_with_overflow;
3369     break;
3370   }
3371
3372   AArch64CC::CondCode TmpCC;
3373   switch (IID) {
3374   default:
3375     return false;
3376   case Intrinsic::sadd_with_overflow:
3377   case Intrinsic::ssub_with_overflow:
3378     TmpCC = AArch64CC::VS;
3379     break;
3380   case Intrinsic::uadd_with_overflow:
3381     TmpCC = AArch64CC::HS;
3382     break;
3383   case Intrinsic::usub_with_overflow:
3384     TmpCC = AArch64CC::LO;
3385     break;
3386   case Intrinsic::smul_with_overflow:
3387   case Intrinsic::umul_with_overflow:
3388     TmpCC = AArch64CC::NE;
3389     break;
3390   }
3391
3392   // Check if both instructions are in the same basic block.
3393   if (!isValueAvailable(II))
3394     return false;
3395
3396   // Make sure nothing is in the way
3397   BasicBlock::const_iterator Start(I);
3398   BasicBlock::const_iterator End(II);
3399   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3400     // We only expect extractvalue instructions between the intrinsic and the
3401     // instruction to be selected.
3402     if (!isa<ExtractValueInst>(Itr))
3403       return false;
3404
3405     // Check that the extractvalue operand comes from the intrinsic.
3406     const auto *EVI = cast<ExtractValueInst>(Itr);
3407     if (EVI->getAggregateOperand() != II)
3408       return false;
3409   }
3410
3411   CC = TmpCC;
3412   return true;
3413 }
3414
3415 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3416   // FIXME: Handle more intrinsics.
3417   switch (II->getIntrinsicID()) {
3418   default: return false;
3419   case Intrinsic::frameaddress: {
3420     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3421     MFI.setFrameAddressIsTaken(true);
3422
3423     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3424     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3425     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3427             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3428     // Recursively load frame address
3429     // ldr x0, [fp]
3430     // ldr x0, [x0]
3431     // ldr x0, [x0]
3432     // ...
3433     unsigned DestReg;
3434     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3435     while (Depth--) {
3436       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3437                                 SrcReg, /*IsKill=*/true, 0);
3438       assert(DestReg && "Unexpected LDR instruction emission failure.");
3439       SrcReg = DestReg;
3440     }
3441
3442     updateValueMap(II, SrcReg);
3443     return true;
3444   }
3445   case Intrinsic::memcpy:
3446   case Intrinsic::memmove: {
3447     const auto *MTI = cast<MemTransferInst>(II);
3448     // Don't handle volatile.
3449     if (MTI->isVolatile())
3450       return false;
3451
3452     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3453     // we would emit dead code because we don't currently handle memmoves.
3454     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3455     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3456       // Small memcpy's are common enough that we want to do them without a call
3457       // if possible.
3458       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3459       unsigned Alignment = MTI->getAlignment();
3460       if (isMemCpySmall(Len, Alignment)) {
3461         Address Dest, Src;
3462         if (!computeAddress(MTI->getRawDest(), Dest) ||
3463             !computeAddress(MTI->getRawSource(), Src))
3464           return false;
3465         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3466           return true;
3467       }
3468     }
3469
3470     if (!MTI->getLength()->getType()->isIntegerTy(64))
3471       return false;
3472
3473     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3474       // Fast instruction selection doesn't support the special
3475       // address spaces.
3476       return false;
3477
3478     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3479     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3480   }
3481   case Intrinsic::memset: {
3482     const MemSetInst *MSI = cast<MemSetInst>(II);
3483     // Don't handle volatile.
3484     if (MSI->isVolatile())
3485       return false;
3486
3487     if (!MSI->getLength()->getType()->isIntegerTy(64))
3488       return false;
3489
3490     if (MSI->getDestAddressSpace() > 255)
3491       // Fast instruction selection doesn't support the special
3492       // address spaces.
3493       return false;
3494
3495     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3496   }
3497   case Intrinsic::sin:
3498   case Intrinsic::cos:
3499   case Intrinsic::pow: {
3500     MVT RetVT;
3501     if (!isTypeLegal(II->getType(), RetVT))
3502       return false;
3503
3504     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3505       return false;
3506
3507     static const RTLIB::Libcall LibCallTable[3][2] = {
3508       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3509       { RTLIB::COS_F32, RTLIB::COS_F64 },
3510       { RTLIB::POW_F32, RTLIB::POW_F64 }
3511     };
3512     RTLIB::Libcall LC;
3513     bool Is64Bit = RetVT == MVT::f64;
3514     switch (II->getIntrinsicID()) {
3515     default:
3516       llvm_unreachable("Unexpected intrinsic.");
3517     case Intrinsic::sin:
3518       LC = LibCallTable[0][Is64Bit];
3519       break;
3520     case Intrinsic::cos:
3521       LC = LibCallTable[1][Is64Bit];
3522       break;
3523     case Intrinsic::pow:
3524       LC = LibCallTable[2][Is64Bit];
3525       break;
3526     }
3527
3528     ArgListTy Args;
3529     Args.reserve(II->getNumArgOperands());
3530
3531     // Populate the argument list.
3532     for (auto &Arg : II->arg_operands()) {
3533       ArgListEntry Entry;
3534       Entry.Val = Arg;
3535       Entry.Ty = Arg->getType();
3536       Args.push_back(Entry);
3537     }
3538
3539     CallLoweringInfo CLI;
3540     MCContext &Ctx = MF->getContext();
3541     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3542                   TLI.getLibcallName(LC), std::move(Args));
3543     if (!lowerCallTo(CLI))
3544       return false;
3545     updateValueMap(II, CLI.ResultReg);
3546     return true;
3547   }
3548   case Intrinsic::fabs: {
3549     MVT VT;
3550     if (!isTypeLegal(II->getType(), VT))
3551       return false;
3552
3553     unsigned Opc;
3554     switch (VT.SimpleTy) {
3555     default:
3556       return false;
3557     case MVT::f32:
3558       Opc = AArch64::FABSSr;
3559       break;
3560     case MVT::f64:
3561       Opc = AArch64::FABSDr;
3562       break;
3563     }
3564     unsigned SrcReg = getRegForValue(II->getOperand(0));
3565     if (!SrcReg)
3566       return false;
3567     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3568     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3569     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3570       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3571     updateValueMap(II, ResultReg);
3572     return true;
3573   }
3574   case Intrinsic::trap:
3575     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3576         .addImm(1);
3577     return true;
3578
3579   case Intrinsic::sqrt: {
3580     Type *RetTy = II->getCalledFunction()->getReturnType();
3581
3582     MVT VT;
3583     if (!isTypeLegal(RetTy, VT))
3584       return false;
3585
3586     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3587     if (!Op0Reg)
3588       return false;
3589     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3590
3591     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3592     if (!ResultReg)
3593       return false;
3594
3595     updateValueMap(II, ResultReg);
3596     return true;
3597   }
3598   case Intrinsic::sadd_with_overflow:
3599   case Intrinsic::uadd_with_overflow:
3600   case Intrinsic::ssub_with_overflow:
3601   case Intrinsic::usub_with_overflow:
3602   case Intrinsic::smul_with_overflow:
3603   case Intrinsic::umul_with_overflow: {
3604     // This implements the basic lowering of the xalu with overflow intrinsics.
3605     const Function *Callee = II->getCalledFunction();
3606     auto *Ty = cast<StructType>(Callee->getReturnType());
3607     Type *RetTy = Ty->getTypeAtIndex(0U);
3608
3609     MVT VT;
3610     if (!isTypeLegal(RetTy, VT))
3611       return false;
3612
3613     if (VT != MVT::i32 && VT != MVT::i64)
3614       return false;
3615
3616     const Value *LHS = II->getArgOperand(0);
3617     const Value *RHS = II->getArgOperand(1);
3618     // Canonicalize immediate to the RHS.
3619     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3620         isCommutativeIntrinsic(II))
3621       std::swap(LHS, RHS);
3622
3623     // Simplify multiplies.
3624     Intrinsic::ID IID = II->getIntrinsicID();
3625     switch (IID) {
3626     default:
3627       break;
3628     case Intrinsic::smul_with_overflow:
3629       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3630         if (C->getValue() == 2) {
3631           IID = Intrinsic::sadd_with_overflow;
3632           RHS = LHS;
3633         }
3634       break;
3635     case Intrinsic::umul_with_overflow:
3636       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3637         if (C->getValue() == 2) {
3638           IID = Intrinsic::uadd_with_overflow;
3639           RHS = LHS;
3640         }
3641       break;
3642     }
3643
3644     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3645     AArch64CC::CondCode CC = AArch64CC::Invalid;
3646     switch (IID) {
3647     default: llvm_unreachable("Unexpected intrinsic!");
3648     case Intrinsic::sadd_with_overflow:
3649       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3650       CC = AArch64CC::VS;
3651       break;
3652     case Intrinsic::uadd_with_overflow:
3653       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3654       CC = AArch64CC::HS;
3655       break;
3656     case Intrinsic::ssub_with_overflow:
3657       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3658       CC = AArch64CC::VS;
3659       break;
3660     case Intrinsic::usub_with_overflow:
3661       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3662       CC = AArch64CC::LO;
3663       break;
3664     case Intrinsic::smul_with_overflow: {
3665       CC = AArch64CC::NE;
3666       unsigned LHSReg = getRegForValue(LHS);
3667       if (!LHSReg)
3668         return false;
3669       bool LHSIsKill = hasTrivialKill(LHS);
3670
3671       unsigned RHSReg = getRegForValue(RHS);
3672       if (!RHSReg)
3673         return false;
3674       bool RHSIsKill = hasTrivialKill(RHS);
3675
3676       if (VT == MVT::i32) {
3677         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3678         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3679                                        /*IsKill=*/false, 32);
3680         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3681                                             AArch64::sub_32);
3682         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3683                                               AArch64::sub_32);
3684         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3685                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3686       } else {
3687         assert(VT == MVT::i64 && "Unexpected value type.");
3688         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3689         // reused in the next instruction.
3690         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3691                             /*IsKill=*/false);
3692         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3693                                         RHSReg, RHSIsKill);
3694         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3695                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3696       }
3697       break;
3698     }
3699     case Intrinsic::umul_with_overflow: {
3700       CC = AArch64CC::NE;
3701       unsigned LHSReg = getRegForValue(LHS);
3702       if (!LHSReg)
3703         return false;
3704       bool LHSIsKill = hasTrivialKill(LHS);
3705
3706       unsigned RHSReg = getRegForValue(RHS);
3707       if (!RHSReg)
3708         return false;
3709       bool RHSIsKill = hasTrivialKill(RHS);
3710
3711       if (VT == MVT::i32) {
3712         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3713         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3714                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3715                     /*WantResult=*/false);
3716         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3717                                             AArch64::sub_32);
3718       } else {
3719         assert(VT == MVT::i64 && "Unexpected value type.");
3720         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3721         // reused in the next instruction.
3722         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3723                             /*IsKill=*/false);
3724         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3725                                         RHSReg, RHSIsKill);
3726         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3727                     /*IsKill=*/false, /*WantResult=*/false);
3728       }
3729       break;
3730     }
3731     }
3732
3733     if (MulReg) {
3734       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3735       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3736               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3737     }
3738
3739     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3740                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3741                                   /*IsKill=*/true, getInvertedCondCode(CC));
3742     (void)ResultReg2;
3743     assert((ResultReg1 + 1) == ResultReg2 &&
3744            "Nonconsecutive result registers.");
3745     updateValueMap(II, ResultReg1, 2);
3746     return true;
3747   }
3748   }
3749   return false;
3750 }
3751
3752 bool AArch64FastISel::selectRet(const Instruction *I) {
3753   const ReturnInst *Ret = cast<ReturnInst>(I);
3754   const Function &F = *I->getParent()->getParent();
3755
3756   if (!FuncInfo.CanLowerReturn)
3757     return false;
3758
3759   if (F.isVarArg())
3760     return false;
3761
3762   if (TLI.supportSwiftError() &&
3763       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3764     return false;
3765
3766   if (TLI.supportSplitCSR(FuncInfo.MF))
3767     return false;
3768
3769   // Build a list of return value registers.
3770   SmallVector<unsigned, 4> RetRegs;
3771
3772   if (Ret->getNumOperands() > 0) {
3773     CallingConv::ID CC = F.getCallingConv();
3774     SmallVector<ISD::OutputArg, 4> Outs;
3775     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3776
3777     // Analyze operands of the call, assigning locations to each operand.
3778     SmallVector<CCValAssign, 16> ValLocs;
3779     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3780     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3781                                                      : RetCC_AArch64_AAPCS;
3782     CCInfo.AnalyzeReturn(Outs, RetCC);
3783
3784     // Only handle a single return value for now.
3785     if (ValLocs.size() != 1)
3786       return false;
3787
3788     CCValAssign &VA = ValLocs[0];
3789     const Value *RV = Ret->getOperand(0);
3790
3791     // Don't bother handling odd stuff for now.
3792     if ((VA.getLocInfo() != CCValAssign::Full) &&
3793         (VA.getLocInfo() != CCValAssign::BCvt))
3794       return false;
3795
3796     // Only handle register returns for now.
3797     if (!VA.isRegLoc())
3798       return false;
3799
3800     unsigned Reg = getRegForValue(RV);
3801     if (Reg == 0)
3802       return false;
3803
3804     unsigned SrcReg = Reg + VA.getValNo();
3805     unsigned DestReg = VA.getLocReg();
3806     // Avoid a cross-class copy. This is very unlikely.
3807     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3808       return false;
3809
3810     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3811     if (!RVEVT.isSimple())
3812       return false;
3813
3814     // Vectors (of > 1 lane) in big endian need tricky handling.
3815     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3816         !Subtarget->isLittleEndian())
3817       return false;
3818
3819     MVT RVVT = RVEVT.getSimpleVT();
3820     if (RVVT == MVT::f128)
3821       return false;
3822
3823     MVT DestVT = VA.getValVT();
3824     // Special handling for extended integers.
3825     if (RVVT != DestVT) {
3826       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3827         return false;
3828
3829       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3830         return false;
3831
3832       bool IsZExt = Outs[0].Flags.isZExt();
3833       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3834       if (SrcReg == 0)
3835         return false;
3836     }
3837
3838     // Make the copy.
3839     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3840             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3841
3842     // Add register to return instruction.
3843     RetRegs.push_back(VA.getLocReg());
3844   }
3845
3846   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3847                                     TII.get(AArch64::RET_ReallyLR));
3848   for (unsigned RetReg : RetRegs)
3849     MIB.addReg(RetReg, RegState::Implicit);
3850   return true;
3851 }
3852
3853 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3854   Type *DestTy = I->getType();
3855   Value *Op = I->getOperand(0);
3856   Type *SrcTy = Op->getType();
3857
3858   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3859   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3860   if (!SrcEVT.isSimple())
3861     return false;
3862   if (!DestEVT.isSimple())
3863     return false;
3864
3865   MVT SrcVT = SrcEVT.getSimpleVT();
3866   MVT DestVT = DestEVT.getSimpleVT();
3867
3868   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3869       SrcVT != MVT::i8)
3870     return false;
3871   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3872       DestVT != MVT::i1)
3873     return false;
3874
3875   unsigned SrcReg = getRegForValue(Op);
3876   if (!SrcReg)
3877     return false;
3878   bool SrcIsKill = hasTrivialKill(Op);
3879
3880   // If we're truncating from i64 to a smaller non-legal type then generate an
3881   // AND. Otherwise, we know the high bits are undefined and a truncate only
3882   // generate a COPY. We cannot mark the source register also as result
3883   // register, because this can incorrectly transfer the kill flag onto the
3884   // source register.
3885   unsigned ResultReg;
3886   if (SrcVT == MVT::i64) {
3887     uint64_t Mask = 0;
3888     switch (DestVT.SimpleTy) {
3889     default:
3890       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3891       return false;
3892     case MVT::i1:
3893       Mask = 0x1;
3894       break;
3895     case MVT::i8:
3896       Mask = 0xff;
3897       break;
3898     case MVT::i16:
3899       Mask = 0xffff;
3900       break;
3901     }
3902     // Issue an extract_subreg to get the lower 32-bits.
3903     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3904                                                 AArch64::sub_32);
3905     // Create the AND instruction which performs the actual truncation.
3906     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3907     assert(ResultReg && "Unexpected AND instruction emission failure.");
3908   } else {
3909     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3910     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3911             TII.get(TargetOpcode::COPY), ResultReg)
3912         .addReg(SrcReg, getKillRegState(SrcIsKill));
3913   }
3914
3915   updateValueMap(I, ResultReg);
3916   return true;
3917 }
3918
3919 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3920   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3921           DestVT == MVT::i64) &&
3922          "Unexpected value type.");
3923   // Handle i8 and i16 as i32.
3924   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3925     DestVT = MVT::i32;
3926
3927   if (IsZExt) {
3928     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3929     assert(ResultReg && "Unexpected AND instruction emission failure.");
3930     if (DestVT == MVT::i64) {
3931       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3932       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3933       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3934       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3935               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3936           .addImm(0)
3937           .addReg(ResultReg)
3938           .addImm(AArch64::sub_32);
3939       ResultReg = Reg64;
3940     }
3941     return ResultReg;
3942   } else {
3943     if (DestVT == MVT::i64) {
3944       // FIXME: We're SExt i1 to i64.
3945       return 0;
3946     }
3947     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3948                             /*TODO:IsKill=*/false, 0, 0);
3949   }
3950 }
3951
3952 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3953                                       unsigned Op1, bool Op1IsKill) {
3954   unsigned Opc, ZReg;
3955   switch (RetVT.SimpleTy) {
3956   default: return 0;
3957   case MVT::i8:
3958   case MVT::i16:
3959   case MVT::i32:
3960     RetVT = MVT::i32;
3961     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3962   case MVT::i64:
3963     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3964   }
3965
3966   const TargetRegisterClass *RC =
3967       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3968   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3969                           /*IsKill=*/ZReg, true);
3970 }
3971
3972 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3973                                         unsigned Op1, bool Op1IsKill) {
3974   if (RetVT != MVT::i64)
3975     return 0;
3976
3977   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3978                           Op0, Op0IsKill, Op1, Op1IsKill,
3979                           AArch64::XZR, /*IsKill=*/true);
3980 }
3981
3982 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3983                                         unsigned Op1, bool Op1IsKill) {
3984   if (RetVT != MVT::i64)
3985     return 0;
3986
3987   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3988                           Op0, Op0IsKill, Op1, Op1IsKill,
3989                           AArch64::XZR, /*IsKill=*/true);
3990 }
3991
3992 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3993                                      unsigned Op1Reg, bool Op1IsKill) {
3994   unsigned Opc = 0;
3995   bool NeedTrunc = false;
3996   uint64_t Mask = 0;
3997   switch (RetVT.SimpleTy) {
3998   default: return 0;
3999   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4000   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4001   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4002   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4003   }
4004
4005   const TargetRegisterClass *RC =
4006       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4007   if (NeedTrunc) {
4008     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4009     Op1IsKill = true;
4010   }
4011   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4012                                        Op1IsKill);
4013   if (NeedTrunc)
4014     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4015   return ResultReg;
4016 }
4017
4018 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4019                                      bool Op0IsKill, uint64_t Shift,
4020                                      bool IsZExt) {
4021   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4022          "Unexpected source/return type pair.");
4023   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4024           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4025          "Unexpected source value type.");
4026   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4027           RetVT == MVT::i64) && "Unexpected return value type.");
4028
4029   bool Is64Bit = (RetVT == MVT::i64);
4030   unsigned RegSize = Is64Bit ? 64 : 32;
4031   unsigned DstBits = RetVT.getSizeInBits();
4032   unsigned SrcBits = SrcVT.getSizeInBits();
4033   const TargetRegisterClass *RC =
4034       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4035
4036   // Just emit a copy for "zero" shifts.
4037   if (Shift == 0) {
4038     if (RetVT == SrcVT) {
4039       unsigned ResultReg = createResultReg(RC);
4040       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4041               TII.get(TargetOpcode::COPY), ResultReg)
4042           .addReg(Op0, getKillRegState(Op0IsKill));
4043       return ResultReg;
4044     } else
4045       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4046   }
4047
4048   // Don't deal with undefined shifts.
4049   if (Shift >= DstBits)
4050     return 0;
4051
4052   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4053   // {S|U}BFM Wd, Wn, #r, #s
4054   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4055
4056   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4057   // %2 = shl i16 %1, 4
4058   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4059   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4060   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4061   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4062
4063   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4064   // %2 = shl i16 %1, 8
4065   // Wd<32+7-24,32-24> = Wn<7:0>
4066   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4067   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4068   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4069
4070   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4071   // %2 = shl i16 %1, 12
4072   // Wd<32+3-20,32-20> = Wn<3:0>
4073   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4074   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4075   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4076
4077   unsigned ImmR = RegSize - Shift;
4078   // Limit the width to the length of the source type.
4079   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4080   static const unsigned OpcTable[2][2] = {
4081     {AArch64::SBFMWri, AArch64::SBFMXri},
4082     {AArch64::UBFMWri, AArch64::UBFMXri}
4083   };
4084   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4085   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4086     unsigned TmpReg = MRI.createVirtualRegister(RC);
4087     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4088             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4089         .addImm(0)
4090         .addReg(Op0, getKillRegState(Op0IsKill))
4091         .addImm(AArch64::sub_32);
4092     Op0 = TmpReg;
4093     Op0IsKill = true;
4094   }
4095   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4096 }
4097
4098 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4099                                      unsigned Op1Reg, bool Op1IsKill) {
4100   unsigned Opc = 0;
4101   bool NeedTrunc = false;
4102   uint64_t Mask = 0;
4103   switch (RetVT.SimpleTy) {
4104   default: return 0;
4105   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4106   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4107   case MVT::i32: Opc = AArch64::LSRVWr; break;
4108   case MVT::i64: Opc = AArch64::LSRVXr; break;
4109   }
4110
4111   const TargetRegisterClass *RC =
4112       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4113   if (NeedTrunc) {
4114     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4115     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4116     Op0IsKill = Op1IsKill = true;
4117   }
4118   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4119                                        Op1IsKill);
4120   if (NeedTrunc)
4121     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4122   return ResultReg;
4123 }
4124
4125 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4126                                      bool Op0IsKill, uint64_t Shift,
4127                                      bool IsZExt) {
4128   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4129          "Unexpected source/return type pair.");
4130   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4131           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4132          "Unexpected source value type.");
4133   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4134           RetVT == MVT::i64) && "Unexpected return value type.");
4135
4136   bool Is64Bit = (RetVT == MVT::i64);
4137   unsigned RegSize = Is64Bit ? 64 : 32;
4138   unsigned DstBits = RetVT.getSizeInBits();
4139   unsigned SrcBits = SrcVT.getSizeInBits();
4140   const TargetRegisterClass *RC =
4141       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4142
4143   // Just emit a copy for "zero" shifts.
4144   if (Shift == 0) {
4145     if (RetVT == SrcVT) {
4146       unsigned ResultReg = createResultReg(RC);
4147       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4148               TII.get(TargetOpcode::COPY), ResultReg)
4149       .addReg(Op0, getKillRegState(Op0IsKill));
4150       return ResultReg;
4151     } else
4152       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4153   }
4154
4155   // Don't deal with undefined shifts.
4156   if (Shift >= DstBits)
4157     return 0;
4158
4159   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4160   // {S|U}BFM Wd, Wn, #r, #s
4161   // Wd<s-r:0> = Wn<s:r> when r <= s
4162
4163   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164   // %2 = lshr i16 %1, 4
4165   // Wd<7-4:0> = Wn<7:4>
4166   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4167   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4169
4170   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171   // %2 = lshr i16 %1, 8
4172   // Wd<7-7,0> = Wn<7:7>
4173   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4174   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176
4177   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4178   // %2 = lshr i16 %1, 12
4179   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4180   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4181   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4182   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4183
4184   if (Shift >= SrcBits && IsZExt)
4185     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4186
4187   // It is not possible to fold a sign-extend into the LShr instruction. In this
4188   // case emit a sign-extend.
4189   if (!IsZExt) {
4190     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4191     if (!Op0)
4192       return 0;
4193     Op0IsKill = true;
4194     SrcVT = RetVT;
4195     SrcBits = SrcVT.getSizeInBits();
4196     IsZExt = true;
4197   }
4198
4199   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4200   unsigned ImmS = SrcBits - 1;
4201   static const unsigned OpcTable[2][2] = {
4202     {AArch64::SBFMWri, AArch64::SBFMXri},
4203     {AArch64::UBFMWri, AArch64::UBFMXri}
4204   };
4205   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4206   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4207     unsigned TmpReg = MRI.createVirtualRegister(RC);
4208     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4210         .addImm(0)
4211         .addReg(Op0, getKillRegState(Op0IsKill))
4212         .addImm(AArch64::sub_32);
4213     Op0 = TmpReg;
4214     Op0IsKill = true;
4215   }
4216   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4217 }
4218
4219 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4220                                      unsigned Op1Reg, bool Op1IsKill) {
4221   unsigned Opc = 0;
4222   bool NeedTrunc = false;
4223   uint64_t Mask = 0;
4224   switch (RetVT.SimpleTy) {
4225   default: return 0;
4226   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4227   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4228   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4229   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4230   }
4231
4232   const TargetRegisterClass *RC =
4233       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4234   if (NeedTrunc) {
4235     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4236     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4237     Op0IsKill = Op1IsKill = true;
4238   }
4239   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4240                                        Op1IsKill);
4241   if (NeedTrunc)
4242     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4243   return ResultReg;
4244 }
4245
4246 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4247                                      bool Op0IsKill, uint64_t Shift,
4248                                      bool IsZExt) {
4249   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4250          "Unexpected source/return type pair.");
4251   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4252           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4253          "Unexpected source value type.");
4254   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4255           RetVT == MVT::i64) && "Unexpected return value type.");
4256
4257   bool Is64Bit = (RetVT == MVT::i64);
4258   unsigned RegSize = Is64Bit ? 64 : 32;
4259   unsigned DstBits = RetVT.getSizeInBits();
4260   unsigned SrcBits = SrcVT.getSizeInBits();
4261   const TargetRegisterClass *RC =
4262       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4263
4264   // Just emit a copy for "zero" shifts.
4265   if (Shift == 0) {
4266     if (RetVT == SrcVT) {
4267       unsigned ResultReg = createResultReg(RC);
4268       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4269               TII.get(TargetOpcode::COPY), ResultReg)
4270       .addReg(Op0, getKillRegState(Op0IsKill));
4271       return ResultReg;
4272     } else
4273       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4274   }
4275
4276   // Don't deal with undefined shifts.
4277   if (Shift >= DstBits)
4278     return 0;
4279
4280   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4281   // {S|U}BFM Wd, Wn, #r, #s
4282   // Wd<s-r:0> = Wn<s:r> when r <= s
4283
4284   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4285   // %2 = ashr i16 %1, 4
4286   // Wd<7-4:0> = Wn<7:4>
4287   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4288   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4289   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4290
4291   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4292   // %2 = ashr i16 %1, 8
4293   // Wd<7-7,0> = Wn<7:7>
4294   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4295   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4296   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4297
4298   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4299   // %2 = ashr i16 %1, 12
4300   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4301   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4302   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4303   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4304
4305   if (Shift >= SrcBits && IsZExt)
4306     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4307
4308   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4309   unsigned ImmS = SrcBits - 1;
4310   static const unsigned OpcTable[2][2] = {
4311     {AArch64::SBFMWri, AArch64::SBFMXri},
4312     {AArch64::UBFMWri, AArch64::UBFMXri}
4313   };
4314   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4315   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4316     unsigned TmpReg = MRI.createVirtualRegister(RC);
4317     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4318             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4319         .addImm(0)
4320         .addReg(Op0, getKillRegState(Op0IsKill))
4321         .addImm(AArch64::sub_32);
4322     Op0 = TmpReg;
4323     Op0IsKill = true;
4324   }
4325   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4326 }
4327
4328 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4329                                      bool IsZExt) {
4330   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4331
4332   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4333   // DestVT are odd things, so test to make sure that they are both types we can
4334   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4335   // bail out to SelectionDAG.
4336   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4337        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4338       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4339        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4340     return 0;
4341
4342   unsigned Opc;
4343   unsigned Imm = 0;
4344
4345   switch (SrcVT.SimpleTy) {
4346   default:
4347     return 0;
4348   case MVT::i1:
4349     return emiti1Ext(SrcReg, DestVT, IsZExt);
4350   case MVT::i8:
4351     if (DestVT == MVT::i64)
4352       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4353     else
4354       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4355     Imm = 7;
4356     break;
4357   case MVT::i16:
4358     if (DestVT == MVT::i64)
4359       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4360     else
4361       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4362     Imm = 15;
4363     break;
4364   case MVT::i32:
4365     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4366     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4367     Imm = 31;
4368     break;
4369   }
4370
4371   // Handle i8 and i16 as i32.
4372   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4373     DestVT = MVT::i32;
4374   else if (DestVT == MVT::i64) {
4375     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4376     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4377             TII.get(AArch64::SUBREG_TO_REG), Src64)
4378         .addImm(0)
4379         .addReg(SrcReg)
4380         .addImm(AArch64::sub_32);
4381     SrcReg = Src64;
4382   }
4383
4384   const TargetRegisterClass *RC =
4385       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4386   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4387 }
4388
4389 static bool isZExtLoad(const MachineInstr *LI) {
4390   switch (LI->getOpcode()) {
4391   default:
4392     return false;
4393   case AArch64::LDURBBi:
4394   case AArch64::LDURHHi:
4395   case AArch64::LDURWi:
4396   case AArch64::LDRBBui:
4397   case AArch64::LDRHHui:
4398   case AArch64::LDRWui:
4399   case AArch64::LDRBBroX:
4400   case AArch64::LDRHHroX:
4401   case AArch64::LDRWroX:
4402   case AArch64::LDRBBroW:
4403   case AArch64::LDRHHroW:
4404   case AArch64::LDRWroW:
4405     return true;
4406   }
4407 }
4408
4409 static bool isSExtLoad(const MachineInstr *LI) {
4410   switch (LI->getOpcode()) {
4411   default:
4412     return false;
4413   case AArch64::LDURSBWi:
4414   case AArch64::LDURSHWi:
4415   case AArch64::LDURSBXi:
4416   case AArch64::LDURSHXi:
4417   case AArch64::LDURSWi:
4418   case AArch64::LDRSBWui:
4419   case AArch64::LDRSHWui:
4420   case AArch64::LDRSBXui:
4421   case AArch64::LDRSHXui:
4422   case AArch64::LDRSWui:
4423   case AArch64::LDRSBWroX:
4424   case AArch64::LDRSHWroX:
4425   case AArch64::LDRSBXroX:
4426   case AArch64::LDRSHXroX:
4427   case AArch64::LDRSWroX:
4428   case AArch64::LDRSBWroW:
4429   case AArch64::LDRSHWroW:
4430   case AArch64::LDRSBXroW:
4431   case AArch64::LDRSHXroW:
4432   case AArch64::LDRSWroW:
4433     return true;
4434   }
4435 }
4436
4437 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4438                                          MVT SrcVT) {
4439   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4440   if (!LI || !LI->hasOneUse())
4441     return false;
4442
4443   // Check if the load instruction has already been selected.
4444   unsigned Reg = lookUpRegForValue(LI);
4445   if (!Reg)
4446     return false;
4447
4448   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4449   if (!MI)
4450     return false;
4451
4452   // Check if the correct load instruction has been emitted - SelectionDAG might
4453   // have emitted a zero-extending load, but we need a sign-extending load.
4454   bool IsZExt = isa<ZExtInst>(I);
4455   const auto *LoadMI = MI;
4456   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4457       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4458     unsigned LoadReg = MI->getOperand(1).getReg();
4459     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4460     assert(LoadMI && "Expected valid instruction");
4461   }
4462   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4463     return false;
4464
4465   // Nothing to be done.
4466   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4467     updateValueMap(I, Reg);
4468     return true;
4469   }
4470
4471   if (IsZExt) {
4472     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4473     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4474             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4475         .addImm(0)
4476         .addReg(Reg, getKillRegState(true))
4477         .addImm(AArch64::sub_32);
4478     Reg = Reg64;
4479   } else {
4480     assert((MI->getOpcode() == TargetOpcode::COPY &&
4481             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4482            "Expected copy instruction");
4483     Reg = MI->getOperand(1).getReg();
4484     MI->eraseFromParent();
4485   }
4486   updateValueMap(I, Reg);
4487   return true;
4488 }
4489
4490 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4491   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4492          "Unexpected integer extend instruction.");
4493   MVT RetVT;
4494   MVT SrcVT;
4495   if (!isTypeSupported(I->getType(), RetVT))
4496     return false;
4497
4498   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4499     return false;
4500
4501   // Try to optimize already sign-/zero-extended values from load instructions.
4502   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4503     return true;
4504
4505   unsigned SrcReg = getRegForValue(I->getOperand(0));
4506   if (!SrcReg)
4507     return false;
4508   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4509
4510   // Try to optimize already sign-/zero-extended values from function arguments.
4511   bool IsZExt = isa<ZExtInst>(I);
4512   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4513     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4514       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4515         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4516         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4517                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4518             .addImm(0)
4519             .addReg(SrcReg, getKillRegState(SrcIsKill))
4520             .addImm(AArch64::sub_32);
4521         SrcReg = ResultReg;
4522       }
4523       // Conservatively clear all kill flags from all uses, because we are
4524       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4525       // level. The result of the instruction at IR level might have been
4526       // trivially dead, which is now not longer true.
4527       unsigned UseReg = lookUpRegForValue(I);
4528       if (UseReg)
4529         MRI.clearKillFlags(UseReg);
4530
4531       updateValueMap(I, SrcReg);
4532       return true;
4533     }
4534   }
4535
4536   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4537   if (!ResultReg)
4538     return false;
4539
4540   updateValueMap(I, ResultReg);
4541   return true;
4542 }
4543
4544 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4545   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4546   if (!DestEVT.isSimple())
4547     return false;
4548
4549   MVT DestVT = DestEVT.getSimpleVT();
4550   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4551     return false;
4552
4553   unsigned DivOpc;
4554   bool Is64bit = (DestVT == MVT::i64);
4555   switch (ISDOpcode) {
4556   default:
4557     return false;
4558   case ISD::SREM:
4559     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4560     break;
4561   case ISD::UREM:
4562     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4563     break;
4564   }
4565   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4566   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4567   if (!Src0Reg)
4568     return false;
4569   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4570
4571   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4572   if (!Src1Reg)
4573     return false;
4574   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4575
4576   const TargetRegisterClass *RC =
4577       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4578   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4579                                      Src1Reg, /*IsKill=*/false);
4580   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4581   // The remainder is computed as numerator - (quotient * denominator) using the
4582   // MSUB instruction.
4583   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4584                                         Src1Reg, Src1IsKill, Src0Reg,
4585                                         Src0IsKill);
4586   updateValueMap(I, ResultReg);
4587   return true;
4588 }
4589
4590 bool AArch64FastISel::selectMul(const Instruction *I) {
4591   MVT VT;
4592   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4593     return false;
4594
4595   if (VT.isVector())
4596     return selectBinaryOp(I, ISD::MUL);
4597
4598   const Value *Src0 = I->getOperand(0);
4599   const Value *Src1 = I->getOperand(1);
4600   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4601     if (C->getValue().isPowerOf2())
4602       std::swap(Src0, Src1);
4603
4604   // Try to simplify to a shift instruction.
4605   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4606     if (C->getValue().isPowerOf2()) {
4607       uint64_t ShiftVal = C->getValue().logBase2();
4608       MVT SrcVT = VT;
4609       bool IsZExt = true;
4610       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4611         if (!isIntExtFree(ZExt)) {
4612           MVT VT;
4613           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4614             SrcVT = VT;
4615             IsZExt = true;
4616             Src0 = ZExt->getOperand(0);
4617           }
4618         }
4619       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4620         if (!isIntExtFree(SExt)) {
4621           MVT VT;
4622           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4623             SrcVT = VT;
4624             IsZExt = false;
4625             Src0 = SExt->getOperand(0);
4626           }
4627         }
4628       }
4629
4630       unsigned Src0Reg = getRegForValue(Src0);
4631       if (!Src0Reg)
4632         return false;
4633       bool Src0IsKill = hasTrivialKill(Src0);
4634
4635       unsigned ResultReg =
4636           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4637
4638       if (ResultReg) {
4639         updateValueMap(I, ResultReg);
4640         return true;
4641       }
4642     }
4643
4644   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4645   if (!Src0Reg)
4646     return false;
4647   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4648
4649   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4650   if (!Src1Reg)
4651     return false;
4652   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4653
4654   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4655
4656   if (!ResultReg)
4657     return false;
4658
4659   updateValueMap(I, ResultReg);
4660   return true;
4661 }
4662
4663 bool AArch64FastISel::selectShift(const Instruction *I) {
4664   MVT RetVT;
4665   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4666     return false;
4667
4668   if (RetVT.isVector())
4669     return selectOperator(I, I->getOpcode());
4670
4671   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4672     unsigned ResultReg = 0;
4673     uint64_t ShiftVal = C->getZExtValue();
4674     MVT SrcVT = RetVT;
4675     bool IsZExt = I->getOpcode() != Instruction::AShr;
4676     const Value *Op0 = I->getOperand(0);
4677     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4678       if (!isIntExtFree(ZExt)) {
4679         MVT TmpVT;
4680         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4681           SrcVT = TmpVT;
4682           IsZExt = true;
4683           Op0 = ZExt->getOperand(0);
4684         }
4685       }
4686     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4687       if (!isIntExtFree(SExt)) {
4688         MVT TmpVT;
4689         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4690           SrcVT = TmpVT;
4691           IsZExt = false;
4692           Op0 = SExt->getOperand(0);
4693         }
4694       }
4695     }
4696
4697     unsigned Op0Reg = getRegForValue(Op0);
4698     if (!Op0Reg)
4699       return false;
4700     bool Op0IsKill = hasTrivialKill(Op0);
4701
4702     switch (I->getOpcode()) {
4703     default: llvm_unreachable("Unexpected instruction.");
4704     case Instruction::Shl:
4705       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4706       break;
4707     case Instruction::AShr:
4708       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4709       break;
4710     case Instruction::LShr:
4711       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4712       break;
4713     }
4714     if (!ResultReg)
4715       return false;
4716
4717     updateValueMap(I, ResultReg);
4718     return true;
4719   }
4720
4721   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4722   if (!Op0Reg)
4723     return false;
4724   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4725
4726   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4727   if (!Op1Reg)
4728     return false;
4729   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4730
4731   unsigned ResultReg = 0;
4732   switch (I->getOpcode()) {
4733   default: llvm_unreachable("Unexpected instruction.");
4734   case Instruction::Shl:
4735     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4736     break;
4737   case Instruction::AShr:
4738     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4739     break;
4740   case Instruction::LShr:
4741     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4742     break;
4743   }
4744
4745   if (!ResultReg)
4746     return false;
4747
4748   updateValueMap(I, ResultReg);
4749   return true;
4750 }
4751
4752 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4753   MVT RetVT, SrcVT;
4754
4755   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4756     return false;
4757   if (!isTypeLegal(I->getType(), RetVT))
4758     return false;
4759
4760   unsigned Opc;
4761   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4762     Opc = AArch64::FMOVWSr;
4763   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4764     Opc = AArch64::FMOVXDr;
4765   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4766     Opc = AArch64::FMOVSWr;
4767   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4768     Opc = AArch64::FMOVDXr;
4769   else
4770     return false;
4771
4772   const TargetRegisterClass *RC = nullptr;
4773   switch (RetVT.SimpleTy) {
4774   default: llvm_unreachable("Unexpected value type.");
4775   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4776   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4777   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4778   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4779   }
4780   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4781   if (!Op0Reg)
4782     return false;
4783   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4784   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4785
4786   if (!ResultReg)
4787     return false;
4788
4789   updateValueMap(I, ResultReg);
4790   return true;
4791 }
4792
4793 bool AArch64FastISel::selectFRem(const Instruction *I) {
4794   MVT RetVT;
4795   if (!isTypeLegal(I->getType(), RetVT))
4796     return false;
4797
4798   RTLIB::Libcall LC;
4799   switch (RetVT.SimpleTy) {
4800   default:
4801     return false;
4802   case MVT::f32:
4803     LC = RTLIB::REM_F32;
4804     break;
4805   case MVT::f64:
4806     LC = RTLIB::REM_F64;
4807     break;
4808   }
4809
4810   ArgListTy Args;
4811   Args.reserve(I->getNumOperands());
4812
4813   // Populate the argument list.
4814   for (auto &Arg : I->operands()) {
4815     ArgListEntry Entry;
4816     Entry.Val = Arg;
4817     Entry.Ty = Arg->getType();
4818     Args.push_back(Entry);
4819   }
4820
4821   CallLoweringInfo CLI;
4822   MCContext &Ctx = MF->getContext();
4823   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4824                 TLI.getLibcallName(LC), std::move(Args));
4825   if (!lowerCallTo(CLI))
4826     return false;
4827   updateValueMap(I, CLI.ResultReg);
4828   return true;
4829 }
4830
4831 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4832   MVT VT;
4833   if (!isTypeLegal(I->getType(), VT))
4834     return false;
4835
4836   if (!isa<ConstantInt>(I->getOperand(1)))
4837     return selectBinaryOp(I, ISD::SDIV);
4838
4839   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4840   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4841       !(C.isPowerOf2() || (-C).isPowerOf2()))
4842     return selectBinaryOp(I, ISD::SDIV);
4843
4844   unsigned Lg2 = C.countTrailingZeros();
4845   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4846   if (!Src0Reg)
4847     return false;
4848   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4849
4850   if (cast<BinaryOperator>(I)->isExact()) {
4851     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4852     if (!ResultReg)
4853       return false;
4854     updateValueMap(I, ResultReg);
4855     return true;
4856   }
4857
4858   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4859   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4860   if (!AddReg)
4861     return false;
4862
4863   // (Src0 < 0) ? Pow2 - 1 : 0;
4864   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4865     return false;
4866
4867   unsigned SelectOpc;
4868   const TargetRegisterClass *RC;
4869   if (VT == MVT::i64) {
4870     SelectOpc = AArch64::CSELXr;
4871     RC = &AArch64::GPR64RegClass;
4872   } else {
4873     SelectOpc = AArch64::CSELWr;
4874     RC = &AArch64::GPR32RegClass;
4875   }
4876   unsigned SelectReg =
4877       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4878                        Src0IsKill, AArch64CC::LT);
4879   if (!SelectReg)
4880     return false;
4881
4882   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4883   // negate the result.
4884   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4885   unsigned ResultReg;
4886   if (C.isNegative())
4887     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4888                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4889   else
4890     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4891
4892   if (!ResultReg)
4893     return false;
4894
4895   updateValueMap(I, ResultReg);
4896   return true;
4897 }
4898
4899 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4900 /// have to duplicate it for AArch64, because otherwise we would fail during the
4901 /// sign-extend emission.
4902 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4903   unsigned IdxN = getRegForValue(Idx);
4904   if (IdxN == 0)
4905     // Unhandled operand. Halt "fast" selection and bail.
4906     return std::pair<unsigned, bool>(0, false);
4907
4908   bool IdxNIsKill = hasTrivialKill(Idx);
4909
4910   // If the index is smaller or larger than intptr_t, truncate or extend it.
4911   MVT PtrVT = TLI.getPointerTy(DL);
4912   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4913   if (IdxVT.bitsLT(PtrVT)) {
4914     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4915     IdxNIsKill = true;
4916   } else if (IdxVT.bitsGT(PtrVT))
4917     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4918   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4919 }
4920
4921 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4922 /// duplicate it for AArch64, because otherwise we would bail out even for
4923 /// simple cases. This is because the standard fastEmit functions don't cover
4924 /// MUL at all and ADD is lowered very inefficientily.
4925 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4926   unsigned N = getRegForValue(I->getOperand(0));
4927   if (!N)
4928     return false;
4929   bool NIsKill = hasTrivialKill(I->getOperand(0));
4930
4931   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4932   // into a single N = N + TotalOffset.
4933   uint64_t TotalOffs = 0;
4934   MVT VT = TLI.getPointerTy(DL);
4935   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4936        GTI != E; ++GTI) {
4937     const Value *Idx = GTI.getOperand();
4938     if (auto *StTy = GTI.getStructTypeOrNull()) {
4939       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4940       // N = N + Offset
4941       if (Field)
4942         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4943     } else {
4944       Type *Ty = GTI.getIndexedType();
4945
4946       // If this is a constant subscript, handle it quickly.
4947       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4948         if (CI->isZero())
4949           continue;
4950         // N = N + Offset
4951         TotalOffs +=
4952             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4953         continue;
4954       }
4955       if (TotalOffs) {
4956         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4957         if (!N)
4958           return false;
4959         NIsKill = true;
4960         TotalOffs = 0;
4961       }
4962
4963       // N = N + Idx * ElementSize;
4964       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4965       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4966       unsigned IdxN = Pair.first;
4967       bool IdxNIsKill = Pair.second;
4968       if (!IdxN)
4969         return false;
4970
4971       if (ElementSize != 1) {
4972         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4973         if (!C)
4974           return false;
4975         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4976         if (!IdxN)
4977           return false;
4978         IdxNIsKill = true;
4979       }
4980       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4981       if (!N)
4982         return false;
4983     }
4984   }
4985   if (TotalOffs) {
4986     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4987     if (!N)
4988       return false;
4989   }
4990   updateValueMap(I, N);
4991   return true;
4992 }
4993
4994 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995   assert(TM.getOptLevel() == CodeGenOpt::None &&
4996          "cmpxchg survived AtomicExpand at optlevel > -O0");
4997
4998   auto *RetPairTy = cast<StructType>(I->getType());
4999   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001          "cmpxchg has a non-i1 status result");
5002
5003   MVT VT;
5004   if (!isTypeLegal(RetTy, VT))
5005     return false;
5006
5007   const TargetRegisterClass *ResRC;
5008   unsigned Opc, CmpOpc;
5009   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010   // extractvalue selection doesn't support that.
5011   if (VT == MVT::i32) {
5012     Opc = AArch64::CMP_SWAP_32;
5013     CmpOpc = AArch64::SUBSWrs;
5014     ResRC = &AArch64::GPR32RegClass;
5015   } else if (VT == MVT::i64) {
5016     Opc = AArch64::CMP_SWAP_64;
5017     CmpOpc = AArch64::SUBSXrs;
5018     ResRC = &AArch64::GPR64RegClass;
5019   } else {
5020     return false;
5021   }
5022
5023   const MCInstrDesc &II = TII.get(Opc);
5024
5025   const unsigned AddrReg = constrainOperandRegClass(
5026       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027   const unsigned DesiredReg = constrainOperandRegClass(
5028       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029   const unsigned NewReg = constrainOperandRegClass(
5030       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031
5032   const unsigned ResultReg1 = createResultReg(ResRC);
5033   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035
5036   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5038       .addDef(ResultReg1)
5039       .addDef(ScratchReg)
5040       .addUse(AddrReg)
5041       .addUse(DesiredReg)
5042       .addUse(NewReg);
5043
5044   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5045       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5046       .addUse(ResultReg1)
5047       .addUse(DesiredReg)
5048       .addImm(0);
5049
5050   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5051       .addDef(ResultReg2)
5052       .addUse(AArch64::WZR)
5053       .addUse(AArch64::WZR)
5054       .addImm(AArch64CC::NE);
5055
5056   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5057   updateValueMap(I, ResultReg1, 2);
5058   return true;
5059 }
5060
5061 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5062   switch (I->getOpcode()) {
5063   default:
5064     break;
5065   case Instruction::Add:
5066   case Instruction::Sub:
5067     return selectAddSub(I);
5068   case Instruction::Mul:
5069     return selectMul(I);
5070   case Instruction::SDiv:
5071     return selectSDiv(I);
5072   case Instruction::SRem:
5073     if (!selectBinaryOp(I, ISD::SREM))
5074       return selectRem(I, ISD::SREM);
5075     return true;
5076   case Instruction::URem:
5077     if (!selectBinaryOp(I, ISD::UREM))
5078       return selectRem(I, ISD::UREM);
5079     return true;
5080   case Instruction::Shl:
5081   case Instruction::LShr:
5082   case Instruction::AShr:
5083     return selectShift(I);
5084   case Instruction::And:
5085   case Instruction::Or:
5086   case Instruction::Xor:
5087     return selectLogicalOp(I);
5088   case Instruction::Br:
5089     return selectBranch(I);
5090   case Instruction::IndirectBr:
5091     return selectIndirectBr(I);
5092   case Instruction::BitCast:
5093     if (!FastISel::selectBitCast(I))
5094       return selectBitCast(I);
5095     return true;
5096   case Instruction::FPToSI:
5097     if (!selectCast(I, ISD::FP_TO_SINT))
5098       return selectFPToInt(I, /*Signed=*/true);
5099     return true;
5100   case Instruction::FPToUI:
5101     return selectFPToInt(I, /*Signed=*/false);
5102   case Instruction::ZExt:
5103   case Instruction::SExt:
5104     return selectIntExt(I);
5105   case Instruction::Trunc:
5106     if (!selectCast(I, ISD::TRUNCATE))
5107       return selectTrunc(I);
5108     return true;
5109   case Instruction::FPExt:
5110     return selectFPExt(I);
5111   case Instruction::FPTrunc:
5112     return selectFPTrunc(I);
5113   case Instruction::SIToFP:
5114     if (!selectCast(I, ISD::SINT_TO_FP))
5115       return selectIntToFP(I, /*Signed=*/true);
5116     return true;
5117   case Instruction::UIToFP:
5118     return selectIntToFP(I, /*Signed=*/false);
5119   case Instruction::Load:
5120     return selectLoad(I);
5121   case Instruction::Store:
5122     return selectStore(I);
5123   case Instruction::FCmp:
5124   case Instruction::ICmp:
5125     return selectCmp(I);
5126   case Instruction::Select:
5127     return selectSelect(I);
5128   case Instruction::Ret:
5129     return selectRet(I);
5130   case Instruction::FRem:
5131     return selectFRem(I);
5132   case Instruction::GetElementPtr:
5133     return selectGetElementPtr(I);
5134   case Instruction::AtomicCmpXchg:
5135     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5136   }
5137
5138   // fall-back to target-independent instruction selection.
5139   return selectOperator(I, I->getOpcode());
5140   // Silence warnings.
5141   (void)&CC_AArch64_DarwinPCS_VarArg;
5142   (void)&CC_AArch64_Win64_VarArg;
5143 }
5144
5145 namespace llvm {
5146
5147 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5148                                         const TargetLibraryInfo *LibInfo) {
5149   return new AArch64FastISel(FuncInfo, LibInfo);
5150 }
5151
5152 } // end namespace llvm