contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64CallingConvention.h"
  18 #include "AArch64RegisterInfo.h"
  19 #include "AArch64Subtarget.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "Utils/AArch64BaseInfo.h"
  22 #include "llvm/ADT/APFloat.h"
  23 #include "llvm/ADT/APInt.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/Analysis/BranchProbabilityInfo.h"
  27 #include "llvm/CodeGen/CallingConvLower.h"
  28 #include "llvm/CodeGen/FastISel.h"
  29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  30 #include "llvm/CodeGen/ISDOpcodes.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineConstantPool.h"
  33 #include "llvm/CodeGen/MachineFrameInfo.h"
  34 #include "llvm/CodeGen/MachineInstr.h"
  35 #include "llvm/CodeGen/MachineInstrBuilder.h"
  36 #include "llvm/CodeGen/MachineMemOperand.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/MachineValueType.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/ValueTypes.h"
  41 #include "llvm/IR/Argument.h"
  42 #include "llvm/IR/Attributes.h"
  43 #include "llvm/IR/BasicBlock.h"
  44 #include "llvm/IR/CallingConv.h"
  45 #include "llvm/IR/Constant.h"
  46 #include "llvm/IR/Constants.h"
  47 #include "llvm/IR/DataLayout.h"
  48 #include "llvm/IR/DerivedTypes.h"
  49 #include "llvm/IR/Function.h"
  50 #include "llvm/IR/GetElementPtrTypeIterator.h"
  51 #include "llvm/IR/GlobalValue.h"
  52 #include "llvm/IR/InstrTypes.h"
  53 #include "llvm/IR/Instruction.h"
  54 #include "llvm/IR/Instructions.h"
  55 #include "llvm/IR/IntrinsicInst.h"
  56 #include "llvm/IR/Operator.h"
  57 #include "llvm/IR/Type.h"
  58 #include "llvm/IR/User.h"
  59 #include "llvm/IR/Value.h"
  60 #include "llvm/MC/MCInstrDesc.h"
  61 #include "llvm/MC/MCRegisterInfo.h"
  62 #include "llvm/MC/MCSymbol.h"
  63 #include "llvm/Support/AtomicOrdering.h"
  64 #include "llvm/Support/Casting.h"
  65 #include "llvm/Support/CodeGen.h"
  66 #include "llvm/Support/ErrorHandling.h"
  67 #include "llvm/Support/MathExtras.h"
  68 #include <algorithm>
  69 #include <cassert>
  70 #include <cstdint>
  71 #include <iterator>
  72 #include <utility>
  73
  74 using namespace llvm;
  75
  76 namespace {
  77
  78 class AArch64FastISel final : public FastISel {
  79   class Address {
  80   public:
  81     typedef enum {
  82       RegBase,
  83       FrameIndexBase
  84     } BaseKind;
  85
  86   private:
  87     BaseKind Kind = RegBase;
  88     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
  89     union {
  90       unsigned Reg;
  91       int FI;
  92     } Base;
  93     unsigned OffsetReg = 0;
  94     unsigned Shift = 0;
  95     int64_t Offset = 0;
  96     const GlobalValue *GV = nullptr;
  97
  98   public:
  99     Address() { Base.Reg = 0; }
 100
 101     void setKind(BaseKind K) { Kind = K; }
 102     BaseKind getKind() const { return Kind; }
 103     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
 104     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
 105     bool isRegBase() const { return Kind == RegBase; }
 106     bool isFIBase() const { return Kind == FrameIndexBase; }
 107
 108     void setReg(unsigned Reg) {
 109       assert(isRegBase() && "Invalid base register access!");
 110       Base.Reg = Reg;
 111     }
 112
 113     unsigned getReg() const {
 114       assert(isRegBase() && "Invalid base register access!");
 115       return Base.Reg;
 116     }
 117
 118     void setOffsetReg(unsigned Reg) {
 119       OffsetReg = Reg;
 120     }
 121
 122     unsigned getOffsetReg() const {
 123       return OffsetReg;
 124     }
 125
 126     void setFI(unsigned FI) {
 127       assert(isFIBase() && "Invalid base frame index  access!");
 128       Base.FI = FI;
 129     }
 130
 131     unsigned getFI() const {
 132       assert(isFIBase() && "Invalid base frame index access!");
 133       return Base.FI;
 134     }
 135
 136     void setOffset(int64_t O) { Offset = O; }
 137     int64_t getOffset() { return Offset; }
 138     void setShift(unsigned S) { Shift = S; }
 139     unsigned getShift() { return Shift; }
 140
 141     void setGlobalValue(const GlobalValue *G) { GV = G; }
 142     const GlobalValue *getGlobalValue() { return GV; }
 143   };
 144
 145   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 146   /// make the right decision when generating code for different targets.
 147   const AArch64Subtarget *Subtarget;
 148   LLVMContext *Context;
 149
 150   bool fastLowerArguments() override;
 151   bool fastLowerCall(CallLoweringInfo &CLI) override;
 152   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 153
 154 private:
 155   // Selection routines.
 156   bool selectAddSub(const Instruction *I);
 157   bool selectLogicalOp(const Instruction *I);
 158   bool selectLoad(const Instruction *I);
 159   bool selectStore(const Instruction *I);
 160   bool selectBranch(const Instruction *I);
 161   bool selectIndirectBr(const Instruction *I);
 162   bool selectCmp(const Instruction *I);
 163   bool selectSelect(const Instruction *I);
 164   bool selectFPExt(const Instruction *I);
 165   bool selectFPTrunc(const Instruction *I);
 166   bool selectFPToInt(const Instruction *I, bool Signed);
 167   bool selectIntToFP(const Instruction *I, bool Signed);
 168   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 169   bool selectRet(const Instruction *I);
 170   bool selectTrunc(const Instruction *I);
 171   bool selectIntExt(const Instruction *I);
 172   bool selectMul(const Instruction *I);
 173   bool selectShift(const Instruction *I);
 174   bool selectBitCast(const Instruction *I);
 175   bool selectFRem(const Instruction *I);
 176   bool selectSDiv(const Instruction *I);
 177   bool selectGetElementPtr(const Instruction *I);
 178   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 179
 180   // Utility helper routines.
 181   bool isTypeLegal(Type *Ty, MVT &VT);
 182   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 183   bool isValueAvailable(const Value *V) const;
 184   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 185   bool computeCallAddress(const Value *V, Address &Addr);
 186   bool simplifyAddress(Address &Addr, MVT VT);
 187   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 188                             MachineMemOperand::Flags Flags,
 189                             unsigned ScaleFactor, MachineMemOperand *MMO);
 190   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 191   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 192                           unsigned Alignment);
 193   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 194                          const Value *Cond);
 195   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 196   bool optimizeSelect(const SelectInst *SI);
 197   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 198
 199   // Emit helper routines.
 200   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 201                       const Value *RHS, bool SetFlags = false,
 202                       bool WantResult = true,  bool IsZExt = false);
 203   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 204                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 205                          bool SetFlags = false, bool WantResult = true);
 206   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 207                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 208                          bool WantResult = true);
 209   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 210                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 211                          AArch64_AM::ShiftExtendType ShiftType,
 212                          uint64_t ShiftImm, bool SetFlags = false,
 213                          bool WantResult = true);
 214   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 215                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 216                           AArch64_AM::ShiftExtendType ExtType,
 217                           uint64_t ShiftImm, bool SetFlags = false,
 218                          bool WantResult = true);
 219
 220   // Emit functions.
 221   bool emitCompareAndBranch(const BranchInst *BI);
 222   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 223   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 224   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 225   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 226   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 227                     MachineMemOperand *MMO = nullptr);
 228   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 229                  MachineMemOperand *MMO = nullptr);
 230   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
 231                         MachineMemOperand *MMO = nullptr);
 232   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 233   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 234   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 235                    bool SetFlags = false, bool WantResult = true,
 236                    bool IsZExt = false);
 237   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 238   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 239                    bool SetFlags = false, bool WantResult = true,
 240                    bool IsZExt = false);
 241   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 242                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 243   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 244                        unsigned RHSReg, bool RHSIsKill,
 245                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 246                        bool WantResult = true);
 247   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 248                          const Value *RHS);
 249   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 250                             bool LHSIsKill, uint64_t Imm);
 251   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 252                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 253                             uint64_t ShiftImm);
 254   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 255   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 256                       unsigned Op1, bool Op1IsKill);
 257   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 258                         unsigned Op1, bool Op1IsKill);
 259   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 260                         unsigned Op1, bool Op1IsKill);
 261   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 262                       unsigned Op1Reg, bool Op1IsKill);
 263   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 264                       uint64_t Imm, bool IsZExt = true);
 265   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 266                       unsigned Op1Reg, bool Op1IsKill);
 267   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 268                       uint64_t Imm, bool IsZExt = true);
 269   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 270                       unsigned Op1Reg, bool Op1IsKill);
 271   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 272                       uint64_t Imm, bool IsZExt = false);
 273
 274   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 275   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 276   unsigned materializeGV(const GlobalValue *GV);
 277
 278   // Call handling routines.
 279 private:
 280   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 281   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 282                        unsigned &NumBytes);
 283   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 284
 285 public:
 286   // Backend specific FastISel code.
 287   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 288   unsigned fastMaterializeConstant(const Constant *C) override;
 289   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 290
 291   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 292                            const TargetLibraryInfo *LibInfo)
 293       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 294     Subtarget =
 295         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
 296     Context = &FuncInfo.Fn->getContext();
 297   }
 298
 299   bool fastSelectInstruction(const Instruction *I) override;
 300
 301 #include "AArch64GenFastISel.inc"
 302 };
 303
 304 } // end anonymous namespace
 305
 306 #include "AArch64GenCallingConv.inc"
 307
 308 /// \brief Check if the sign-/zero-extend will be a noop.
 309 static bool isIntExtFree(const Instruction *I) {
 310   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 311          "Unexpected integer extend instruction.");
 312   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 313          "Unexpected value type.");
 314   bool IsZExt = isa<ZExtInst>(I);
 315
 316   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 317     if (LI->hasOneUse())
 318       return true;
 319
 320   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 321     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 322       return true;
 323
 324   return false;
 325 }
 326
 327 /// \brief Determine the implicit scale factor that is applied by a memory
 328 /// operation for a given value type.
 329 static unsigned getImplicitScaleFactor(MVT VT) {
 330   switch (VT.SimpleTy) {
 331   default:
 332     return 0;    // invalid
 333   case MVT::i1:  // fall-through
 334   case MVT::i8:
 335     return 1;
 336   case MVT::i16:
 337     return 2;
 338   case MVT::i32: // fall-through
 339   case MVT::f32:
 340     return 4;
 341   case MVT::i64: // fall-through
 342   case MVT::f64:
 343     return 8;
 344   }
 345 }
 346
 347 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 348   if (CC == CallingConv::WebKit_JS)
 349     return CC_AArch64_WebKit_JS;
 350   if (CC == CallingConv::GHC)
 351     return CC_AArch64_GHC;
 352   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 353 }
 354
 355 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 356   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
 357          "Alloca should always return a pointer.");
 358
 359   // Don't handle dynamic allocas.
 360   if (!FuncInfo.StaticAllocaMap.count(AI))
 361     return 0;
 362
 363   DenseMap<const AllocaInst *, int>::iterator SI =
 364       FuncInfo.StaticAllocaMap.find(AI);
 365
 366   if (SI != FuncInfo.StaticAllocaMap.end()) {
 367     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 368     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 369             ResultReg)
 370         .addFrameIndex(SI->second)
 371         .addImm(0)
 372         .addImm(0);
 373     return ResultReg;
 374   }
 375
 376   return 0;
 377 }
 378
 379 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 380   if (VT > MVT::i64)
 381     return 0;
 382
 383   if (!CI->isZero())
 384     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 385
 386   // Create a copy from the zero register to materialize a "0" value.
 387   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 388                                                    : &AArch64::GPR32RegClass;
 389   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 390   unsigned ResultReg = createResultReg(RC);
 391   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 392           ResultReg).addReg(ZeroReg, getKillRegState(true));
 393   return ResultReg;
 394 }
 395
 396 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 397   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 398   // register, because the immediate version of fmov cannot encode zero.
 399   if (CFP->isNullValue())
 400     return fastMaterializeFloatZero(CFP);
 401
 402   if (VT != MVT::f32 && VT != MVT::f64)
 403     return 0;
 404
 405   const APFloat Val = CFP->getValueAPF();
 406   bool Is64Bit = (VT == MVT::f64);
 407   // This checks to see if we can use FMOV instructions to materialize
 408   // a constant, otherwise we have to materialize via the constant pool.
 409   if (TLI.isFPImmLegal(Val, VT)) {
 410     int Imm =
 411         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 412     assert((Imm != -1) && "Cannot encode floating-point constant.");
 413     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 414     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 415   }
 416
 417   // For the MachO large code model materialize the FP constant in code.
 418   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
 419     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 420     const TargetRegisterClass *RC = Is64Bit ?
 421         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 422
 423     unsigned TmpReg = createResultReg(RC);
 424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
 425         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
 426
 427     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 428     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 429             TII.get(TargetOpcode::COPY), ResultReg)
 430         .addReg(TmpReg, getKillRegState(true));
 431
 432     return ResultReg;
 433   }
 434
 435   // Materialize via constant pool.  MachineConstantPool wants an explicit
 436   // alignment.
 437   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 438   if (Align == 0)
 439     Align = DL.getTypeAllocSize(CFP->getType());
 440
 441   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 442   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 443   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 444           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 445
 446   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 447   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 448   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 449       .addReg(ADRPReg)
 450       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 451   return ResultReg;
 452 }
 453
 454 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 455   // We can't handle thread-local variables quickly yet.
 456   if (GV->isThreadLocal())
 457     return 0;
 458
 459   // MachO still uses GOT for large code-model accesses, but ELF requires
 460   // movz/movk sequences, which FastISel doesn't handle yet.
 461   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
 462     return 0;
 463
 464   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 465
 466   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
 467   if (!DestEVT.isSimple())
 468     return 0;
 469
 470   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 471   unsigned ResultReg;
 472
 473   if (OpFlags & AArch64II::MO_GOT) {
 474     // ADRP + LDRX
 475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 476             ADRPReg)
 477       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 478
 479     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 480     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 481             ResultReg)
 482       .addReg(ADRPReg)
 483       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 484                         AArch64II::MO_NC);
 485   } else {
 486     // ADRP + ADDX
 487     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 488             ADRPReg)
 489       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 490
 491     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 492     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 493             ResultReg)
 494       .addReg(ADRPReg)
 495       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 496       .addImm(0);
 497   }
 498   return ResultReg;
 499 }
 500
 501 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 502   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 503
 504   // Only handle simple types.
 505   if (!CEVT.isSimple())
 506     return 0;
 507   MVT VT = CEVT.getSimpleVT();
 508
 509   if (const auto *CI = dyn_cast<ConstantInt>(C))
 510     return materializeInt(CI, VT);
 511   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 512     return materializeFP(CFP, VT);
 513   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 514     return materializeGV(GV);
 515
 516   return 0;
 517 }
 518
 519 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 520   assert(CFP->isNullValue() &&
 521          "Floating-point constant is not a positive zero.");
 522   MVT VT;
 523   if (!isTypeLegal(CFP->getType(), VT))
 524     return 0;
 525
 526   if (VT != MVT::f32 && VT != MVT::f64)
 527     return 0;
 528
 529   bool Is64Bit = (VT == MVT::f64);
 530   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 531   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 532   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 533 }
 534
 535 /// \brief Check if the multiply is by a power-of-2 constant.
 536 static bool isMulPowOf2(const Value *I) {
 537   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 538     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 539       if (C->getValue().isPowerOf2())
 540         return true;
 541     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 542       if (C->getValue().isPowerOf2())
 543         return true;
 544   }
 545   return false;
 546 }
 547
 548 // Computes the address to get to an object.
 549 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 550 {
 551   const User *U = nullptr;
 552   unsigned Opcode = Instruction::UserOp1;
 553   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 554     // Don't walk into other basic blocks unless the object is an alloca from
 555     // another block, otherwise it may not have a virtual register assigned.
 556     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 557         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 558       Opcode = I->getOpcode();
 559       U = I;
 560     }
 561   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 562     Opcode = C->getOpcode();
 563     U = C;
 564   }
 565
 566   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
 567     if (Ty->getAddressSpace() > 255)
 568       // Fast instruction selection doesn't support the special
 569       // address spaces.
 570       return false;
 571
 572   switch (Opcode) {
 573   default:
 574     break;
 575   case Instruction::BitCast:
 576     // Look through bitcasts.
 577     return computeAddress(U->getOperand(0), Addr, Ty);
 578
 579   case Instruction::IntToPtr:
 580     // Look past no-op inttoptrs.
 581     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 582         TLI.getPointerTy(DL))
 583       return computeAddress(U->getOperand(0), Addr, Ty);
 584     break;
 585
 586   case Instruction::PtrToInt:
 587     // Look past no-op ptrtoints.
 588     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 589       return computeAddress(U->getOperand(0), Addr, Ty);
 590     break;
 591
 592   case Instruction::GetElementPtr: {
 593     Address SavedAddr = Addr;
 594     uint64_t TmpOffset = Addr.getOffset();
 595
 596     // Iterate through the GEP folding the constants into offsets where
 597     // we can.
 598     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
 599          GTI != E; ++GTI) {
 600       const Value *Op = GTI.getOperand();
 601       if (StructType *STy = GTI.getStructTypeOrNull()) {
 602         const StructLayout *SL = DL.getStructLayout(STy);
 603         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 604         TmpOffset += SL->getElementOffset(Idx);
 605       } else {
 606         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 607         while (true) {
 608           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 609             // Constant-offset addressing.
 610             TmpOffset += CI->getSExtValue() * S;
 611             break;
 612           }
 613           if (canFoldAddIntoGEP(U, Op)) {
 614             // A compatible add with a constant operand. Fold the constant.
 615             ConstantInt *CI =
 616                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 617             TmpOffset += CI->getSExtValue() * S;
 618             // Iterate on the other operand.
 619             Op = cast<AddOperator>(Op)->getOperand(0);
 620             continue;
 621           }
 622           // Unsupported
 623           goto unsupported_gep;
 624         }
 625       }
 626     }
 627
 628     // Try to grab the base operand now.
 629     Addr.setOffset(TmpOffset);
 630     if (computeAddress(U->getOperand(0), Addr, Ty))
 631       return true;
 632
 633     // We failed, restore everything and try the other options.
 634     Addr = SavedAddr;
 635
 636   unsupported_gep:
 637     break;
 638   }
 639   case Instruction::Alloca: {
 640     const AllocaInst *AI = cast<AllocaInst>(Obj);
 641     DenseMap<const AllocaInst *, int>::iterator SI =
 642         FuncInfo.StaticAllocaMap.find(AI);
 643     if (SI != FuncInfo.StaticAllocaMap.end()) {
 644       Addr.setKind(Address::FrameIndexBase);
 645       Addr.setFI(SI->second);
 646       return true;
 647     }
 648     break;
 649   }
 650   case Instruction::Add: {
 651     // Adds of constants are common and easy enough.
 652     const Value *LHS = U->getOperand(0);
 653     const Value *RHS = U->getOperand(1);
 654
 655     if (isa<ConstantInt>(LHS))
 656       std::swap(LHS, RHS);
 657
 658     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 659       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 660       return computeAddress(LHS, Addr, Ty);
 661     }
 662
 663     Address Backup = Addr;
 664     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 665       return true;
 666     Addr = Backup;
 667
 668     break;
 669   }
 670   case Instruction::Sub: {
 671     // Subs of constants are common and easy enough.
 672     const Value *LHS = U->getOperand(0);
 673     const Value *RHS = U->getOperand(1);
 674
 675     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 676       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 677       return computeAddress(LHS, Addr, Ty);
 678     }
 679     break;
 680   }
 681   case Instruction::Shl: {
 682     if (Addr.getOffsetReg())
 683       break;
 684
 685     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 686     if (!CI)
 687       break;
 688
 689     unsigned Val = CI->getZExtValue();
 690     if (Val < 1 || Val > 3)
 691       break;
 692
 693     uint64_t NumBytes = 0;
 694     if (Ty && Ty->isSized()) {
 695       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 696       NumBytes = NumBits / 8;
 697       if (!isPowerOf2_64(NumBits))
 698         NumBytes = 0;
 699     }
 700
 701     if (NumBytes != (1ULL << Val))
 702       break;
 703
 704     Addr.setShift(Val);
 705     Addr.setExtendType(AArch64_AM::LSL);
 706
 707     const Value *Src = U->getOperand(0);
 708     if (const auto *I = dyn_cast<Instruction>(Src)) {
 709       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 710         // Fold the zext or sext when it won't become a noop.
 711         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 712           if (!isIntExtFree(ZE) &&
 713               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 714             Addr.setExtendType(AArch64_AM::UXTW);
 715             Src = ZE->getOperand(0);
 716           }
 717         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 718           if (!isIntExtFree(SE) &&
 719               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 720             Addr.setExtendType(AArch64_AM::SXTW);
 721             Src = SE->getOperand(0);
 722           }
 723         }
 724       }
 725     }
 726
 727     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 728       if (AI->getOpcode() == Instruction::And) {
 729         const Value *LHS = AI->getOperand(0);
 730         const Value *RHS = AI->getOperand(1);
 731
 732         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 733           if (C->getValue() == 0xffffffff)
 734             std::swap(LHS, RHS);
 735
 736         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 737           if (C->getValue() == 0xffffffff) {
 738             Addr.setExtendType(AArch64_AM::UXTW);
 739             unsigned Reg = getRegForValue(LHS);
 740             if (!Reg)
 741               return false;
 742             bool RegIsKill = hasTrivialKill(LHS);
 743             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 744                                              AArch64::sub_32);
 745             Addr.setOffsetReg(Reg);
 746             return true;
 747           }
 748       }
 749
 750     unsigned Reg = getRegForValue(Src);
 751     if (!Reg)
 752       return false;
 753     Addr.setOffsetReg(Reg);
 754     return true;
 755   }
 756   case Instruction::Mul: {
 757     if (Addr.getOffsetReg())
 758       break;
 759
 760     if (!isMulPowOf2(U))
 761       break;
 762
 763     const Value *LHS = U->getOperand(0);
 764     const Value *RHS = U->getOperand(1);
 765
 766     // Canonicalize power-of-2 value to the RHS.
 767     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 768       if (C->getValue().isPowerOf2())
 769         std::swap(LHS, RHS);
 770
 771     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 772     const auto *C = cast<ConstantInt>(RHS);
 773     unsigned Val = C->getValue().logBase2();
 774     if (Val < 1 || Val > 3)
 775       break;
 776
 777     uint64_t NumBytes = 0;
 778     if (Ty && Ty->isSized()) {
 779       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 780       NumBytes = NumBits / 8;
 781       if (!isPowerOf2_64(NumBits))
 782         NumBytes = 0;
 783     }
 784
 785     if (NumBytes != (1ULL << Val))
 786       break;
 787
 788     Addr.setShift(Val);
 789     Addr.setExtendType(AArch64_AM::LSL);
 790
 791     const Value *Src = LHS;
 792     if (const auto *I = dyn_cast<Instruction>(Src)) {
 793       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 794         // Fold the zext or sext when it won't become a noop.
 795         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 796           if (!isIntExtFree(ZE) &&
 797               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 798             Addr.setExtendType(AArch64_AM::UXTW);
 799             Src = ZE->getOperand(0);
 800           }
 801         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 802           if (!isIntExtFree(SE) &&
 803               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 804             Addr.setExtendType(AArch64_AM::SXTW);
 805             Src = SE->getOperand(0);
 806           }
 807         }
 808       }
 809     }
 810
 811     unsigned Reg = getRegForValue(Src);
 812     if (!Reg)
 813       return false;
 814     Addr.setOffsetReg(Reg);
 815     return true;
 816   }
 817   case Instruction::And: {
 818     if (Addr.getOffsetReg())
 819       break;
 820
 821     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
 822       break;
 823
 824     const Value *LHS = U->getOperand(0);
 825     const Value *RHS = U->getOperand(1);
 826
 827     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 828       if (C->getValue() == 0xffffffff)
 829         std::swap(LHS, RHS);
 830
 831     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 832       if (C->getValue() == 0xffffffff) {
 833         Addr.setShift(0);
 834         Addr.setExtendType(AArch64_AM::LSL);
 835         Addr.setExtendType(AArch64_AM::UXTW);
 836
 837         unsigned Reg = getRegForValue(LHS);
 838         if (!Reg)
 839           return false;
 840         bool RegIsKill = hasTrivialKill(LHS);
 841         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 842                                          AArch64::sub_32);
 843         Addr.setOffsetReg(Reg);
 844         return true;
 845       }
 846     break;
 847   }
 848   case Instruction::SExt:
 849   case Instruction::ZExt: {
 850     if (!Addr.getReg() || Addr.getOffsetReg())
 851       break;
 852
 853     const Value *Src = nullptr;
 854     // Fold the zext or sext when it won't become a noop.
 855     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 856       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 857         Addr.setExtendType(AArch64_AM::UXTW);
 858         Src = ZE->getOperand(0);
 859       }
 860     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 861       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 862         Addr.setExtendType(AArch64_AM::SXTW);
 863         Src = SE->getOperand(0);
 864       }
 865     }
 866
 867     if (!Src)
 868       break;
 869
 870     Addr.setShift(0);
 871     unsigned Reg = getRegForValue(Src);
 872     if (!Reg)
 873       return false;
 874     Addr.setOffsetReg(Reg);
 875     return true;
 876   }
 877   } // end switch
 878
 879   if (Addr.isRegBase() && !Addr.getReg()) {
 880     unsigned Reg = getRegForValue(Obj);
 881     if (!Reg)
 882       return false;
 883     Addr.setReg(Reg);
 884     return true;
 885   }
 886
 887   if (!Addr.getOffsetReg()) {
 888     unsigned Reg = getRegForValue(Obj);
 889     if (!Reg)
 890       return false;
 891     Addr.setOffsetReg(Reg);
 892     return true;
 893   }
 894
 895   return false;
 896 }
 897
 898 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 899   const User *U = nullptr;
 900   unsigned Opcode = Instruction::UserOp1;
 901   bool InMBB = true;
 902
 903   if (const auto *I = dyn_cast<Instruction>(V)) {
 904     Opcode = I->getOpcode();
 905     U = I;
 906     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 907   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 908     Opcode = C->getOpcode();
 909     U = C;
 910   }
 911
 912   switch (Opcode) {
 913   default: break;
 914   case Instruction::BitCast:
 915     // Look past bitcasts if its operand is in the same BB.
 916     if (InMBB)
 917       return computeCallAddress(U->getOperand(0), Addr);
 918     break;
 919   case Instruction::IntToPtr:
 920     // Look past no-op inttoptrs if its operand is in the same BB.
 921     if (InMBB &&
 922         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 923             TLI.getPointerTy(DL))
 924       return computeCallAddress(U->getOperand(0), Addr);
 925     break;
 926   case Instruction::PtrToInt:
 927     // Look past no-op ptrtoints if its operand is in the same BB.
 928     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 929       return computeCallAddress(U->getOperand(0), Addr);
 930     break;
 931   }
 932
 933   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 934     Addr.setGlobalValue(GV);
 935     return true;
 936   }
 937
 938   // If all else fails, try to materialize the value in a register.
 939   if (!Addr.getGlobalValue()) {
 940     Addr.setReg(getRegForValue(V));
 941     return Addr.getReg() != 0;
 942   }
 943
 944   return false;
 945 }
 946
 947
 948 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 949   EVT evt = TLI.getValueType(DL, Ty, true);
 950
 951   // Only handle simple types.
 952   if (evt == MVT::Other || !evt.isSimple())
 953     return false;
 954   VT = evt.getSimpleVT();
 955
 956   // This is a legal type, but it's not something we handle in fast-isel.
 957   if (VT == MVT::f128)
 958     return false;
 959
 960   // Handle all other legal types, i.e. a register that will directly hold this
 961   // value.
 962   return TLI.isTypeLegal(VT);
 963 }
 964
 965 /// \brief Determine if the value type is supported by FastISel.
 966 ///
 967 /// FastISel for AArch64 can handle more value types than are legal. This adds
 968 /// simple value type such as i1, i8, and i16.
 969 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 970   if (Ty->isVectorTy() && !IsVectorAllowed)
 971     return false;
 972
 973   if (isTypeLegal(Ty, VT))
 974     return true;
 975
 976   // If this is a type than can be sign or zero-extended to a basic operation
 977   // go ahead and accept it now.
 978   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 979     return true;
 980
 981   return false;
 982 }
 983
 984 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 985   if (!isa<Instruction>(V))
 986     return true;
 987
 988   const auto *I = cast<Instruction>(V);
 989   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 990 }
 991
 992 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 993   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 994   if (!ScaleFactor)
 995     return false;
 996
 997   bool ImmediateOffsetNeedsLowering = false;
 998   bool RegisterOffsetNeedsLowering = false;
 999   int64_t Offset = Addr.getOffset();
1000   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1001     ImmediateOffsetNeedsLowering = true;
1002   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1003            !isUInt<12>(Offset / ScaleFactor))
1004     ImmediateOffsetNeedsLowering = true;
1005
1006   // Cannot encode an offset register and an immediate offset in the same
1007   // instruction. Fold the immediate offset into the load/store instruction and
1008   // emit an additional add to take care of the offset register.
1009   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1010     RegisterOffsetNeedsLowering = true;
1011
1012   // Cannot encode zero register as base.
1013   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1014     RegisterOffsetNeedsLowering = true;
1015
1016   // If this is a stack pointer and the offset needs to be simplified then put
1017   // the alloca address into a register, set the base type back to register and
1018   // continue. This should almost never happen.
1019   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1020   {
1021     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1022     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1023             ResultReg)
1024       .addFrameIndex(Addr.getFI())
1025       .addImm(0)
1026       .addImm(0);
1027     Addr.setKind(Address::RegBase);
1028     Addr.setReg(ResultReg);
1029   }
1030
1031   if (RegisterOffsetNeedsLowering) {
1032     unsigned ResultReg = 0;
1033     if (Addr.getReg()) {
1034       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1035           Addr.getExtendType() == AArch64_AM::UXTW   )
1036         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1037                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1038                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1039                                   Addr.getShift());
1040       else
1041         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1042                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1043                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1044                                   Addr.getShift());
1045     } else {
1046       if (Addr.getExtendType() == AArch64_AM::UXTW)
1047         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1048                                /*Op0IsKill=*/false, Addr.getShift(),
1049                                /*IsZExt=*/true);
1050       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1051         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1052                                /*Op0IsKill=*/false, Addr.getShift(),
1053                                /*IsZExt=*/false);
1054       else
1055         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1056                                /*Op0IsKill=*/false, Addr.getShift());
1057     }
1058     if (!ResultReg)
1059       return false;
1060
1061     Addr.setReg(ResultReg);
1062     Addr.setOffsetReg(0);
1063     Addr.setShift(0);
1064     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1065   }
1066
1067   // Since the offset is too large for the load/store instruction get the
1068   // reg+offset into a register.
1069   if (ImmediateOffsetNeedsLowering) {
1070     unsigned ResultReg;
1071     if (Addr.getReg())
1072       // Try to fold the immediate into the add instruction.
1073       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1074     else
1075       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1076
1077     if (!ResultReg)
1078       return false;
1079     Addr.setReg(ResultReg);
1080     Addr.setOffset(0);
1081   }
1082   return true;
1083 }
1084
1085 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1086                                            const MachineInstrBuilder &MIB,
1087                                            MachineMemOperand::Flags Flags,
1088                                            unsigned ScaleFactor,
1089                                            MachineMemOperand *MMO) {
1090   int64_t Offset = Addr.getOffset() / ScaleFactor;
1091   // Frame base works a bit differently. Handle it separately.
1092   if (Addr.isFIBase()) {
1093     int FI = Addr.getFI();
1094     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1095     // and alignment should be based on the VT.
1096     MMO = FuncInfo.MF->getMachineMemOperand(
1097         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1098         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1099     // Now add the rest of the operands.
1100     MIB.addFrameIndex(FI).addImm(Offset);
1101   } else {
1102     assert(Addr.isRegBase() && "Unexpected address kind.");
1103     const MCInstrDesc &II = MIB->getDesc();
1104     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1105     Addr.setReg(
1106       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1107     Addr.setOffsetReg(
1108       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1109     if (Addr.getOffsetReg()) {
1110       assert(Addr.getOffset() == 0 && "Unexpected offset");
1111       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1112                       Addr.getExtendType() == AArch64_AM::SXTX;
1113       MIB.addReg(Addr.getReg());
1114       MIB.addReg(Addr.getOffsetReg());
1115       MIB.addImm(IsSigned);
1116       MIB.addImm(Addr.getShift() != 0);
1117     } else
1118       MIB.addReg(Addr.getReg()).addImm(Offset);
1119   }
1120
1121   if (MMO)
1122     MIB.addMemOperand(MMO);
1123 }
1124
1125 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1126                                      const Value *RHS, bool SetFlags,
1127                                      bool WantResult,  bool IsZExt) {
1128   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1129   bool NeedExtend = false;
1130   switch (RetVT.SimpleTy) {
1131   default:
1132     return 0;
1133   case MVT::i1:
1134     NeedExtend = true;
1135     break;
1136   case MVT::i8:
1137     NeedExtend = true;
1138     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1139     break;
1140   case MVT::i16:
1141     NeedExtend = true;
1142     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1143     break;
1144   case MVT::i32:  // fall-through
1145   case MVT::i64:
1146     break;
1147   }
1148   MVT SrcVT = RetVT;
1149   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1150
1151   // Canonicalize immediates to the RHS first.
1152   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1153     std::swap(LHS, RHS);
1154
1155   // Canonicalize mul by power of 2 to the RHS.
1156   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1157     if (isMulPowOf2(LHS))
1158       std::swap(LHS, RHS);
1159
1160   // Canonicalize shift immediate to the RHS.
1161   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1162     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1163       if (isa<ConstantInt>(SI->getOperand(1)))
1164         if (SI->getOpcode() == Instruction::Shl  ||
1165             SI->getOpcode() == Instruction::LShr ||
1166             SI->getOpcode() == Instruction::AShr   )
1167           std::swap(LHS, RHS);
1168
1169   unsigned LHSReg = getRegForValue(LHS);
1170   if (!LHSReg)
1171     return 0;
1172   bool LHSIsKill = hasTrivialKill(LHS);
1173
1174   if (NeedExtend)
1175     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1176
1177   unsigned ResultReg = 0;
1178   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1179     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1180     if (C->isNegative())
1181       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1182                                 SetFlags, WantResult);
1183     else
1184       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1185                                 WantResult);
1186   } else if (const auto *C = dyn_cast<Constant>(RHS))
1187     if (C->isNullValue())
1188       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1189                                 WantResult);
1190
1191   if (ResultReg)
1192     return ResultReg;
1193
1194   // Only extend the RHS within the instruction if there is a valid extend type.
1195   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1196       isValueAvailable(RHS)) {
1197     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1198       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1199         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1200           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1201           if (!RHSReg)
1202             return 0;
1203           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1204           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1205                                RHSIsKill, ExtendType, C->getZExtValue(),
1206                                SetFlags, WantResult);
1207         }
1208     unsigned RHSReg = getRegForValue(RHS);
1209     if (!RHSReg)
1210       return 0;
1211     bool RHSIsKill = hasTrivialKill(RHS);
1212     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1213                          ExtendType, 0, SetFlags, WantResult);
1214   }
1215
1216   // Check if the mul can be folded into the instruction.
1217   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1218     if (isMulPowOf2(RHS)) {
1219       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1220       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1221
1222       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1223         if (C->getValue().isPowerOf2())
1224           std::swap(MulLHS, MulRHS);
1225
1226       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1227       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1228       unsigned RHSReg = getRegForValue(MulLHS);
1229       if (!RHSReg)
1230         return 0;
1231       bool RHSIsKill = hasTrivialKill(MulLHS);
1232       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1233                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1234                                 WantResult);
1235       if (ResultReg)
1236         return ResultReg;
1237     }
1238   }
1239
1240   // Check if the shift can be folded into the instruction.
1241   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1242     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1243       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1244         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1245         switch (SI->getOpcode()) {
1246         default: break;
1247         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1248         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1249         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1250         }
1251         uint64_t ShiftVal = C->getZExtValue();
1252         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1253           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1254           if (!RHSReg)
1255             return 0;
1256           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1257           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1258                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1259                                     WantResult);
1260           if (ResultReg)
1261             return ResultReg;
1262         }
1263       }
1264     }
1265   }
1266
1267   unsigned RHSReg = getRegForValue(RHS);
1268   if (!RHSReg)
1269     return 0;
1270   bool RHSIsKill = hasTrivialKill(RHS);
1271
1272   if (NeedExtend)
1273     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1274
1275   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1276                        SetFlags, WantResult);
1277 }
1278
1279 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1280                                         bool LHSIsKill, unsigned RHSReg,
1281                                         bool RHSIsKill, bool SetFlags,
1282                                         bool WantResult) {
1283   assert(LHSReg && RHSReg && "Invalid register number.");
1284
1285   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1286     return 0;
1287
1288   static const unsigned OpcTable[2][2][2] = {
1289     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1290       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1291     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1292       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1293   };
1294   bool Is64Bit = RetVT == MVT::i64;
1295   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1296   const TargetRegisterClass *RC =
1297       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1298   unsigned ResultReg;
1299   if (WantResult)
1300     ResultReg = createResultReg(RC);
1301   else
1302     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1303
1304   const MCInstrDesc &II = TII.get(Opc);
1305   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1306   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1307   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1308       .addReg(LHSReg, getKillRegState(LHSIsKill))
1309       .addReg(RHSReg, getKillRegState(RHSIsKill));
1310   return ResultReg;
1311 }
1312
1313 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1314                                         bool LHSIsKill, uint64_t Imm,
1315                                         bool SetFlags, bool WantResult) {
1316   assert(LHSReg && "Invalid register number.");
1317
1318   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1319     return 0;
1320
1321   unsigned ShiftImm;
1322   if (isUInt<12>(Imm))
1323     ShiftImm = 0;
1324   else if ((Imm & 0xfff000) == Imm) {
1325     ShiftImm = 12;
1326     Imm >>= 12;
1327   } else
1328     return 0;
1329
1330   static const unsigned OpcTable[2][2][2] = {
1331     { { AArch64::SUBWri,  AArch64::SUBXri  },
1332       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1333     { { AArch64::SUBSWri, AArch64::SUBSXri },
1334       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1335   };
1336   bool Is64Bit = RetVT == MVT::i64;
1337   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1338   const TargetRegisterClass *RC;
1339   if (SetFlags)
1340     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1341   else
1342     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1343   unsigned ResultReg;
1344   if (WantResult)
1345     ResultReg = createResultReg(RC);
1346   else
1347     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1348
1349   const MCInstrDesc &II = TII.get(Opc);
1350   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1351   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1352       .addReg(LHSReg, getKillRegState(LHSIsKill))
1353       .addImm(Imm)
1354       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1355   return ResultReg;
1356 }
1357
1358 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1359                                         bool LHSIsKill, unsigned RHSReg,
1360                                         bool RHSIsKill,
1361                                         AArch64_AM::ShiftExtendType ShiftType,
1362                                         uint64_t ShiftImm, bool SetFlags,
1363                                         bool WantResult) {
1364   assert(LHSReg && RHSReg && "Invalid register number.");
1365
1366   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1367     return 0;
1368
1369   // Don't deal with undefined shifts.
1370   if (ShiftImm >= RetVT.getSizeInBits())
1371     return 0;
1372
1373   static const unsigned OpcTable[2][2][2] = {
1374     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1375       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1376     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1377       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1378   };
1379   bool Is64Bit = RetVT == MVT::i64;
1380   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1381   const TargetRegisterClass *RC =
1382       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1383   unsigned ResultReg;
1384   if (WantResult)
1385     ResultReg = createResultReg(RC);
1386   else
1387     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1388
1389   const MCInstrDesc &II = TII.get(Opc);
1390   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1391   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1392   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1393       .addReg(LHSReg, getKillRegState(LHSIsKill))
1394       .addReg(RHSReg, getKillRegState(RHSIsKill))
1395       .addImm(getShifterImm(ShiftType, ShiftImm));
1396   return ResultReg;
1397 }
1398
1399 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1400                                         bool LHSIsKill, unsigned RHSReg,
1401                                         bool RHSIsKill,
1402                                         AArch64_AM::ShiftExtendType ExtType,
1403                                         uint64_t ShiftImm, bool SetFlags,
1404                                         bool WantResult) {
1405   assert(LHSReg && RHSReg && "Invalid register number.");
1406
1407   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1408     return 0;
1409
1410   if (ShiftImm >= 4)
1411     return 0;
1412
1413   static const unsigned OpcTable[2][2][2] = {
1414     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1415       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1416     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1417       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1418   };
1419   bool Is64Bit = RetVT == MVT::i64;
1420   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1421   const TargetRegisterClass *RC = nullptr;
1422   if (SetFlags)
1423     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1424   else
1425     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1426   unsigned ResultReg;
1427   if (WantResult)
1428     ResultReg = createResultReg(RC);
1429   else
1430     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1431
1432   const MCInstrDesc &II = TII.get(Opc);
1433   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1434   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1435   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1436       .addReg(LHSReg, getKillRegState(LHSIsKill))
1437       .addReg(RHSReg, getKillRegState(RHSIsKill))
1438       .addImm(getArithExtendImm(ExtType, ShiftImm));
1439   return ResultReg;
1440 }
1441
1442 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1443   Type *Ty = LHS->getType();
1444   EVT EVT = TLI.getValueType(DL, Ty, true);
1445   if (!EVT.isSimple())
1446     return false;
1447   MVT VT = EVT.getSimpleVT();
1448
1449   switch (VT.SimpleTy) {
1450   default:
1451     return false;
1452   case MVT::i1:
1453   case MVT::i8:
1454   case MVT::i16:
1455   case MVT::i32:
1456   case MVT::i64:
1457     return emitICmp(VT, LHS, RHS, IsZExt);
1458   case MVT::f32:
1459   case MVT::f64:
1460     return emitFCmp(VT, LHS, RHS);
1461   }
1462 }
1463
1464 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1465                                bool IsZExt) {
1466   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1467                  IsZExt) != 0;
1468 }
1469
1470 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1471                                   uint64_t Imm) {
1472   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1473                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1474 }
1475
1476 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1477   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1478     return false;
1479
1480   // Check to see if the 2nd operand is a constant that we can encode directly
1481   // in the compare.
1482   bool UseImm = false;
1483   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1484     if (CFP->isZero() && !CFP->isNegative())
1485       UseImm = true;
1486
1487   unsigned LHSReg = getRegForValue(LHS);
1488   if (!LHSReg)
1489     return false;
1490   bool LHSIsKill = hasTrivialKill(LHS);
1491
1492   if (UseImm) {
1493     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1494     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1495         .addReg(LHSReg, getKillRegState(LHSIsKill));
1496     return true;
1497   }
1498
1499   unsigned RHSReg = getRegForValue(RHS);
1500   if (!RHSReg)
1501     return false;
1502   bool RHSIsKill = hasTrivialKill(RHS);
1503
1504   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1505   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1506       .addReg(LHSReg, getKillRegState(LHSIsKill))
1507       .addReg(RHSReg, getKillRegState(RHSIsKill));
1508   return true;
1509 }
1510
1511 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1512                                   bool SetFlags, bool WantResult, bool IsZExt) {
1513   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1514                     IsZExt);
1515 }
1516
1517 /// \brief This method is a wrapper to simplify add emission.
1518 ///
1519 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1520 /// that fails, then try to materialize the immediate into a register and use
1521 /// emitAddSub_rr instead.
1522 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1523                                       int64_t Imm) {
1524   unsigned ResultReg;
1525   if (Imm < 0)
1526     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1527   else
1528     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1529
1530   if (ResultReg)
1531     return ResultReg;
1532
1533   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1534   if (!CReg)
1535     return 0;
1536
1537   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1538   return ResultReg;
1539 }
1540
1541 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1542                                   bool SetFlags, bool WantResult, bool IsZExt) {
1543   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1544                     IsZExt);
1545 }
1546
1547 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1548                                       bool LHSIsKill, unsigned RHSReg,
1549                                       bool RHSIsKill, bool WantResult) {
1550   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1551                        RHSIsKill, /*SetFlags=*/true, WantResult);
1552 }
1553
1554 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1555                                       bool LHSIsKill, unsigned RHSReg,
1556                                       bool RHSIsKill,
1557                                       AArch64_AM::ShiftExtendType ShiftType,
1558                                       uint64_t ShiftImm, bool WantResult) {
1559   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1560                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1561                        WantResult);
1562 }
1563
1564 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1565                                         const Value *LHS, const Value *RHS) {
1566   // Canonicalize immediates to the RHS first.
1567   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1568     std::swap(LHS, RHS);
1569
1570   // Canonicalize mul by power-of-2 to the RHS.
1571   if (LHS->hasOneUse() && isValueAvailable(LHS))
1572     if (isMulPowOf2(LHS))
1573       std::swap(LHS, RHS);
1574
1575   // Canonicalize shift immediate to the RHS.
1576   if (LHS->hasOneUse() && isValueAvailable(LHS))
1577     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1578       if (isa<ConstantInt>(SI->getOperand(1)))
1579         std::swap(LHS, RHS);
1580
1581   unsigned LHSReg = getRegForValue(LHS);
1582   if (!LHSReg)
1583     return 0;
1584   bool LHSIsKill = hasTrivialKill(LHS);
1585
1586   unsigned ResultReg = 0;
1587   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1588     uint64_t Imm = C->getZExtValue();
1589     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1590   }
1591   if (ResultReg)
1592     return ResultReg;
1593
1594   // Check if the mul can be folded into the instruction.
1595   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1596     if (isMulPowOf2(RHS)) {
1597       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1598       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1599
1600       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1601         if (C->getValue().isPowerOf2())
1602           std::swap(MulLHS, MulRHS);
1603
1604       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1605       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1606
1607       unsigned RHSReg = getRegForValue(MulLHS);
1608       if (!RHSReg)
1609         return 0;
1610       bool RHSIsKill = hasTrivialKill(MulLHS);
1611       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1612                                    RHSIsKill, ShiftVal);
1613       if (ResultReg)
1614         return ResultReg;
1615     }
1616   }
1617
1618   // Check if the shift can be folded into the instruction.
1619   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1620     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1621       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1622         uint64_t ShiftVal = C->getZExtValue();
1623         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1624         if (!RHSReg)
1625           return 0;
1626         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1627         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1628                                      RHSIsKill, ShiftVal);
1629         if (ResultReg)
1630           return ResultReg;
1631       }
1632   }
1633
1634   unsigned RHSReg = getRegForValue(RHS);
1635   if (!RHSReg)
1636     return 0;
1637   bool RHSIsKill = hasTrivialKill(RHS);
1638
1639   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1640   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1641   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1642     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1643     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1644   }
1645   return ResultReg;
1646 }
1647
1648 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1649                                            unsigned LHSReg, bool LHSIsKill,
1650                                            uint64_t Imm) {
1651   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1652                 "ISD nodes are not consecutive!");
1653   static const unsigned OpcTable[3][2] = {
1654     { AArch64::ANDWri, AArch64::ANDXri },
1655     { AArch64::ORRWri, AArch64::ORRXri },
1656     { AArch64::EORWri, AArch64::EORXri }
1657   };
1658   const TargetRegisterClass *RC;
1659   unsigned Opc;
1660   unsigned RegSize;
1661   switch (RetVT.SimpleTy) {
1662   default:
1663     return 0;
1664   case MVT::i1:
1665   case MVT::i8:
1666   case MVT::i16:
1667   case MVT::i32: {
1668     unsigned Idx = ISDOpc - ISD::AND;
1669     Opc = OpcTable[Idx][0];
1670     RC = &AArch64::GPR32spRegClass;
1671     RegSize = 32;
1672     break;
1673   }
1674   case MVT::i64:
1675     Opc = OpcTable[ISDOpc - ISD::AND][1];
1676     RC = &AArch64::GPR64spRegClass;
1677     RegSize = 64;
1678     break;
1679   }
1680
1681   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1682     return 0;
1683
1684   unsigned ResultReg =
1685       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1686                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1687   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1688     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1689     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1690   }
1691   return ResultReg;
1692 }
1693
1694 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1695                                            unsigned LHSReg, bool LHSIsKill,
1696                                            unsigned RHSReg, bool RHSIsKill,
1697                                            uint64_t ShiftImm) {
1698   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1699                 "ISD nodes are not consecutive!");
1700   static const unsigned OpcTable[3][2] = {
1701     { AArch64::ANDWrs, AArch64::ANDXrs },
1702     { AArch64::ORRWrs, AArch64::ORRXrs },
1703     { AArch64::EORWrs, AArch64::EORXrs }
1704   };
1705
1706   // Don't deal with undefined shifts.
1707   if (ShiftImm >= RetVT.getSizeInBits())
1708     return 0;
1709
1710   const TargetRegisterClass *RC;
1711   unsigned Opc;
1712   switch (RetVT.SimpleTy) {
1713   default:
1714     return 0;
1715   case MVT::i1:
1716   case MVT::i8:
1717   case MVT::i16:
1718   case MVT::i32:
1719     Opc = OpcTable[ISDOpc - ISD::AND][0];
1720     RC = &AArch64::GPR32RegClass;
1721     break;
1722   case MVT::i64:
1723     Opc = OpcTable[ISDOpc - ISD::AND][1];
1724     RC = &AArch64::GPR64RegClass;
1725     break;
1726   }
1727   unsigned ResultReg =
1728       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1729                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1730   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1731     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1732     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1733   }
1734   return ResultReg;
1735 }
1736
1737 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1738                                      uint64_t Imm) {
1739   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1740 }
1741
1742 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1743                                    bool WantZExt, MachineMemOperand *MMO) {
1744   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1745     return 0;
1746
1747   // Simplify this down to something we can handle.
1748   if (!simplifyAddress(Addr, VT))
1749     return 0;
1750
1751   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1752   if (!ScaleFactor)
1753     llvm_unreachable("Unexpected value type.");
1754
1755   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1756   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1757   bool UseScaled = true;
1758   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1759     UseScaled = false;
1760     ScaleFactor = 1;
1761   }
1762
1763   static const unsigned GPOpcTable[2][8][4] = {
1764     // Sign-extend.
1765     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1766         AArch64::LDURXi  },
1767       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1768         AArch64::LDURXi  },
1769       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1770         AArch64::LDRXui  },
1771       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1772         AArch64::LDRXui  },
1773       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1774         AArch64::LDRXroX },
1775       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1776         AArch64::LDRXroX },
1777       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1778         AArch64::LDRXroW },
1779       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1780         AArch64::LDRXroW }
1781     },
1782     // Zero-extend.
1783     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1784         AArch64::LDURXi  },
1785       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1786         AArch64::LDURXi  },
1787       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1788         AArch64::LDRXui  },
1789       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1790         AArch64::LDRXui  },
1791       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1792         AArch64::LDRXroX },
1793       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1794         AArch64::LDRXroX },
1795       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1796         AArch64::LDRXroW },
1797       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1798         AArch64::LDRXroW }
1799     }
1800   };
1801
1802   static const unsigned FPOpcTable[4][2] = {
1803     { AArch64::LDURSi,  AArch64::LDURDi  },
1804     { AArch64::LDRSui,  AArch64::LDRDui  },
1805     { AArch64::LDRSroX, AArch64::LDRDroX },
1806     { AArch64::LDRSroW, AArch64::LDRDroW }
1807   };
1808
1809   unsigned Opc;
1810   const TargetRegisterClass *RC;
1811   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1812                       Addr.getOffsetReg();
1813   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1814   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1815       Addr.getExtendType() == AArch64_AM::SXTW)
1816     Idx++;
1817
1818   bool IsRet64Bit = RetVT == MVT::i64;
1819   switch (VT.SimpleTy) {
1820   default:
1821     llvm_unreachable("Unexpected value type.");
1822   case MVT::i1: // Intentional fall-through.
1823   case MVT::i8:
1824     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1825     RC = (IsRet64Bit && !WantZExt) ?
1826              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1827     break;
1828   case MVT::i16:
1829     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1830     RC = (IsRet64Bit && !WantZExt) ?
1831              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1832     break;
1833   case MVT::i32:
1834     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1835     RC = (IsRet64Bit && !WantZExt) ?
1836              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837     break;
1838   case MVT::i64:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1840     RC = &AArch64::GPR64RegClass;
1841     break;
1842   case MVT::f32:
1843     Opc = FPOpcTable[Idx][0];
1844     RC = &AArch64::FPR32RegClass;
1845     break;
1846   case MVT::f64:
1847     Opc = FPOpcTable[Idx][1];
1848     RC = &AArch64::FPR64RegClass;
1849     break;
1850   }
1851
1852   // Create the base instruction, then add the operands.
1853   unsigned ResultReg = createResultReg(RC);
1854   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1855                                     TII.get(Opc), ResultReg);
1856   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1857
1858   // Loading an i1 requires special handling.
1859   if (VT == MVT::i1) {
1860     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1861     assert(ANDReg && "Unexpected AND instruction emission failure.");
1862     ResultReg = ANDReg;
1863   }
1864
1865   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1866   // the 32bit reg to a 64bit reg.
1867   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1868     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1869     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1870             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1871         .addImm(0)
1872         .addReg(ResultReg, getKillRegState(true))
1873         .addImm(AArch64::sub_32);
1874     ResultReg = Reg64;
1875   }
1876   return ResultReg;
1877 }
1878
1879 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1880   MVT VT;
1881   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1882     return false;
1883
1884   if (VT.isVector())
1885     return selectOperator(I, I->getOpcode());
1886
1887   unsigned ResultReg;
1888   switch (I->getOpcode()) {
1889   default:
1890     llvm_unreachable("Unexpected instruction.");
1891   case Instruction::Add:
1892     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1893     break;
1894   case Instruction::Sub:
1895     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1896     break;
1897   }
1898   if (!ResultReg)
1899     return false;
1900
1901   updateValueMap(I, ResultReg);
1902   return true;
1903 }
1904
1905 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1906   MVT VT;
1907   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1908     return false;
1909
1910   if (VT.isVector())
1911     return selectOperator(I, I->getOpcode());
1912
1913   unsigned ResultReg;
1914   switch (I->getOpcode()) {
1915   default:
1916     llvm_unreachable("Unexpected instruction.");
1917   case Instruction::And:
1918     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1919     break;
1920   case Instruction::Or:
1921     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1922     break;
1923   case Instruction::Xor:
1924     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1925     break;
1926   }
1927   if (!ResultReg)
1928     return false;
1929
1930   updateValueMap(I, ResultReg);
1931   return true;
1932 }
1933
1934 bool AArch64FastISel::selectLoad(const Instruction *I) {
1935   MVT VT;
1936   // Verify we have a legal type before going any further.  Currently, we handle
1937   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1938   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1939   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1940       cast<LoadInst>(I)->isAtomic())
1941     return false;
1942
1943   const Value *SV = I->getOperand(0);
1944   if (TLI.supportSwiftError()) {
1945     // Swifterror values can come from either a function parameter with
1946     // swifterror attribute or an alloca with swifterror attribute.
1947     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1948       if (Arg->hasSwiftErrorAttr())
1949         return false;
1950     }
1951
1952     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1953       if (Alloca->isSwiftError())
1954         return false;
1955     }
1956   }
1957
1958   // See if we can handle this address.
1959   Address Addr;
1960   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1961     return false;
1962
1963   // Fold the following sign-/zero-extend into the load instruction.
1964   bool WantZExt = true;
1965   MVT RetVT = VT;
1966   const Value *IntExtVal = nullptr;
1967   if (I->hasOneUse()) {
1968     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1969       if (isTypeSupported(ZE->getType(), RetVT))
1970         IntExtVal = ZE;
1971       else
1972         RetVT = VT;
1973     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1974       if (isTypeSupported(SE->getType(), RetVT))
1975         IntExtVal = SE;
1976       else
1977         RetVT = VT;
1978       WantZExt = false;
1979     }
1980   }
1981
1982   unsigned ResultReg =
1983       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1984   if (!ResultReg)
1985     return false;
1986
1987   // There are a few different cases we have to handle, because the load or the
1988   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1989   // SelectionDAG. There is also an ordering issue when both instructions are in
1990   // different basic blocks.
1991   // 1.) The load instruction is selected by FastISel, but the integer extend
1992   //     not. This usually happens when the integer extend is in a different
1993   //     basic block and SelectionDAG took over for that basic block.
1994   // 2.) The load instruction is selected before the integer extend. This only
1995   //     happens when the integer extend is in a different basic block.
1996   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1997   //     by FastISel. This happens if there are instructions between the load
1998   //     and the integer extend that couldn't be selected by FastISel.
1999   if (IntExtVal) {
2000     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2001     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2002     // it when it selects the integer extend.
2003     unsigned Reg = lookUpRegForValue(IntExtVal);
2004     auto *MI = MRI.getUniqueVRegDef(Reg);
2005     if (!MI) {
2006       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2007         if (WantZExt) {
2008           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2009           std::prev(FuncInfo.InsertPt)->eraseFromParent();
2010           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2011         } else
2012           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2013                                                  /*IsKill=*/true,
2014                                                  AArch64::sub_32);
2015       }
2016       updateValueMap(I, ResultReg);
2017       return true;
2018     }
2019
2020     // The integer extend has already been emitted - delete all the instructions
2021     // that have been emitted by the integer extend lowering code and use the
2022     // result from the load instruction directly.
2023     while (MI) {
2024       Reg = 0;
2025       for (auto &Opnd : MI->uses()) {
2026         if (Opnd.isReg()) {
2027           Reg = Opnd.getReg();
2028           break;
2029         }
2030       }
2031       MI->eraseFromParent();
2032       MI = nullptr;
2033       if (Reg)
2034         MI = MRI.getUniqueVRegDef(Reg);
2035     }
2036     updateValueMap(IntExtVal, ResultReg);
2037     return true;
2038   }
2039
2040   updateValueMap(I, ResultReg);
2041   return true;
2042 }
2043
2044 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2045                                        unsigned AddrReg,
2046                                        MachineMemOperand *MMO) {
2047   unsigned Opc;
2048   switch (VT.SimpleTy) {
2049   default: return false;
2050   case MVT::i8:  Opc = AArch64::STLRB; break;
2051   case MVT::i16: Opc = AArch64::STLRH; break;
2052   case MVT::i32: Opc = AArch64::STLRW; break;
2053   case MVT::i64: Opc = AArch64::STLRX; break;
2054   }
2055
2056   const MCInstrDesc &II = TII.get(Opc);
2057   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2058   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2059   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2060       .addReg(SrcReg)
2061       .addReg(AddrReg)
2062       .addMemOperand(MMO);
2063   return true;
2064 }
2065
2066 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2067                                 MachineMemOperand *MMO) {
2068   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2069     return false;
2070
2071   // Simplify this down to something we can handle.
2072   if (!simplifyAddress(Addr, VT))
2073     return false;
2074
2075   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2076   if (!ScaleFactor)
2077     llvm_unreachable("Unexpected value type.");
2078
2079   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2080   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2081   bool UseScaled = true;
2082   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2083     UseScaled = false;
2084     ScaleFactor = 1;
2085   }
2086
2087   static const unsigned OpcTable[4][6] = {
2088     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2089       AArch64::STURSi,   AArch64::STURDi },
2090     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2091       AArch64::STRSui,   AArch64::STRDui },
2092     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2093       AArch64::STRSroX,  AArch64::STRDroX },
2094     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2095       AArch64::STRSroW,  AArch64::STRDroW }
2096   };
2097
2098   unsigned Opc;
2099   bool VTIsi1 = false;
2100   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2101                       Addr.getOffsetReg();
2102   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2103   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2104       Addr.getExtendType() == AArch64_AM::SXTW)
2105     Idx++;
2106
2107   switch (VT.SimpleTy) {
2108   default: llvm_unreachable("Unexpected value type.");
2109   case MVT::i1:  VTIsi1 = true;
2110   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2111   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2112   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2113   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2114   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2115   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2116   }
2117
2118   // Storing an i1 requires special handling.
2119   if (VTIsi1 && SrcReg != AArch64::WZR) {
2120     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2121     assert(ANDReg && "Unexpected AND instruction emission failure.");
2122     SrcReg = ANDReg;
2123   }
2124   // Create the base instruction, then add the operands.
2125   const MCInstrDesc &II = TII.get(Opc);
2126   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2127   MachineInstrBuilder MIB =
2128       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2129   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2130
2131   return true;
2132 }
2133
2134 bool AArch64FastISel::selectStore(const Instruction *I) {
2135   MVT VT;
2136   const Value *Op0 = I->getOperand(0);
2137   // Verify we have a legal type before going any further.  Currently, we handle
2138   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2139   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2140   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2141     return false;
2142
2143   const Value *PtrV = I->getOperand(1);
2144   if (TLI.supportSwiftError()) {
2145     // Swifterror values can come from either a function parameter with
2146     // swifterror attribute or an alloca with swifterror attribute.
2147     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2148       if (Arg->hasSwiftErrorAttr())
2149         return false;
2150     }
2151
2152     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2153       if (Alloca->isSwiftError())
2154         return false;
2155     }
2156   }
2157
2158   // Get the value to be stored into a register. Use the zero register directly
2159   // when possible to avoid an unnecessary copy and a wasted register.
2160   unsigned SrcReg = 0;
2161   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2162     if (CI->isZero())
2163       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2164   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2165     if (CF->isZero() && !CF->isNegative()) {
2166       VT = MVT::getIntegerVT(VT.getSizeInBits());
2167       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2168     }
2169   }
2170
2171   if (!SrcReg)
2172     SrcReg = getRegForValue(Op0);
2173
2174   if (!SrcReg)
2175     return false;
2176
2177   auto *SI = cast<StoreInst>(I);
2178
2179   // Try to emit a STLR for seq_cst/release.
2180   if (SI->isAtomic()) {
2181     AtomicOrdering Ord = SI->getOrdering();
2182     // The non-atomic instructions are sufficient for relaxed stores.
2183     if (isReleaseOrStronger(Ord)) {
2184       // The STLR addressing mode only supports a base reg; pass that directly.
2185       unsigned AddrReg = getRegForValue(PtrV);
2186       return emitStoreRelease(VT, SrcReg, AddrReg,
2187                               createMachineMemOperandFor(I));
2188     }
2189   }
2190
2191   // See if we can handle this address.
2192   Address Addr;
2193   if (!computeAddress(PtrV, Addr, Op0->getType()))
2194     return false;
2195
2196   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2197     return false;
2198   return true;
2199 }
2200
2201 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2202   switch (Pred) {
2203   case CmpInst::FCMP_ONE:
2204   case CmpInst::FCMP_UEQ:
2205   default:
2206     // AL is our "false" for now. The other two need more compares.
2207     return AArch64CC::AL;
2208   case CmpInst::ICMP_EQ:
2209   case CmpInst::FCMP_OEQ:
2210     return AArch64CC::EQ;
2211   case CmpInst::ICMP_SGT:
2212   case CmpInst::FCMP_OGT:
2213     return AArch64CC::GT;
2214   case CmpInst::ICMP_SGE:
2215   case CmpInst::FCMP_OGE:
2216     return AArch64CC::GE;
2217   case CmpInst::ICMP_UGT:
2218   case CmpInst::FCMP_UGT:
2219     return AArch64CC::HI;
2220   case CmpInst::FCMP_OLT:
2221     return AArch64CC::MI;
2222   case CmpInst::ICMP_ULE:
2223   case CmpInst::FCMP_OLE:
2224     return AArch64CC::LS;
2225   case CmpInst::FCMP_ORD:
2226     return AArch64CC::VC;
2227   case CmpInst::FCMP_UNO:
2228     return AArch64CC::VS;
2229   case CmpInst::FCMP_UGE:
2230     return AArch64CC::PL;
2231   case CmpInst::ICMP_SLT:
2232   case CmpInst::FCMP_ULT:
2233     return AArch64CC::LT;
2234   case CmpInst::ICMP_SLE:
2235   case CmpInst::FCMP_ULE:
2236     return AArch64CC::LE;
2237   case CmpInst::FCMP_UNE:
2238   case CmpInst::ICMP_NE:
2239     return AArch64CC::NE;
2240   case CmpInst::ICMP_UGE:
2241     return AArch64CC::HS;
2242   case CmpInst::ICMP_ULT:
2243     return AArch64CC::LO;
2244   }
2245 }
2246
2247 /// \brief Try to emit a combined compare-and-branch instruction.
2248 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2249   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2250   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2251   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2252
2253   const Value *LHS = CI->getOperand(0);
2254   const Value *RHS = CI->getOperand(1);
2255
2256   MVT VT;
2257   if (!isTypeSupported(LHS->getType(), VT))
2258     return false;
2259
2260   unsigned BW = VT.getSizeInBits();
2261   if (BW > 64)
2262     return false;
2263
2264   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2265   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2266
2267   // Try to take advantage of fallthrough opportunities.
2268   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2269     std::swap(TBB, FBB);
2270     Predicate = CmpInst::getInversePredicate(Predicate);
2271   }
2272
2273   int TestBit = -1;
2274   bool IsCmpNE;
2275   switch (Predicate) {
2276   default:
2277     return false;
2278   case CmpInst::ICMP_EQ:
2279   case CmpInst::ICMP_NE:
2280     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2281       std::swap(LHS, RHS);
2282
2283     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2284       return false;
2285
2286     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2287       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2288         const Value *AndLHS = AI->getOperand(0);
2289         const Value *AndRHS = AI->getOperand(1);
2290
2291         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2292           if (C->getValue().isPowerOf2())
2293             std::swap(AndLHS, AndRHS);
2294
2295         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2296           if (C->getValue().isPowerOf2()) {
2297             TestBit = C->getValue().logBase2();
2298             LHS = AndLHS;
2299           }
2300       }
2301
2302     if (VT == MVT::i1)
2303       TestBit = 0;
2304
2305     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2306     break;
2307   case CmpInst::ICMP_SLT:
2308   case CmpInst::ICMP_SGE:
2309     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2310       return false;
2311
2312     TestBit = BW - 1;
2313     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2314     break;
2315   case CmpInst::ICMP_SGT:
2316   case CmpInst::ICMP_SLE:
2317     if (!isa<ConstantInt>(RHS))
2318       return false;
2319
2320     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2321       return false;
2322
2323     TestBit = BW - 1;
2324     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2325     break;
2326   } // end switch
2327
2328   static const unsigned OpcTable[2][2][2] = {
2329     { {AArch64::CBZW,  AArch64::CBZX },
2330       {AArch64::CBNZW, AArch64::CBNZX} },
2331     { {AArch64::TBZW,  AArch64::TBZX },
2332       {AArch64::TBNZW, AArch64::TBNZX} }
2333   };
2334
2335   bool IsBitTest = TestBit != -1;
2336   bool Is64Bit = BW == 64;
2337   if (TestBit < 32 && TestBit >= 0)
2338     Is64Bit = false;
2339
2340   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2341   const MCInstrDesc &II = TII.get(Opc);
2342
2343   unsigned SrcReg = getRegForValue(LHS);
2344   if (!SrcReg)
2345     return false;
2346   bool SrcIsKill = hasTrivialKill(LHS);
2347
2348   if (BW == 64 && !Is64Bit)
2349     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2350                                         AArch64::sub_32);
2351
2352   if ((BW < 32) && !IsBitTest)
2353     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2354
2355   // Emit the combined compare and branch instruction.
2356   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2357   MachineInstrBuilder MIB =
2358       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2359           .addReg(SrcReg, getKillRegState(SrcIsKill));
2360   if (IsBitTest)
2361     MIB.addImm(TestBit);
2362   MIB.addMBB(TBB);
2363
2364   finishCondBranch(BI->getParent(), TBB, FBB);
2365   return true;
2366 }
2367
2368 bool AArch64FastISel::selectBranch(const Instruction *I) {
2369   const BranchInst *BI = cast<BranchInst>(I);
2370   if (BI->isUnconditional()) {
2371     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2372     fastEmitBranch(MSucc, BI->getDebugLoc());
2373     return true;
2374   }
2375
2376   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2377   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2378
2379   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2380     if (CI->hasOneUse() && isValueAvailable(CI)) {
2381       // Try to optimize or fold the cmp.
2382       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2383       switch (Predicate) {
2384       default:
2385         break;
2386       case CmpInst::FCMP_FALSE:
2387         fastEmitBranch(FBB, DbgLoc);
2388         return true;
2389       case CmpInst::FCMP_TRUE:
2390         fastEmitBranch(TBB, DbgLoc);
2391         return true;
2392       }
2393
2394       // Try to emit a combined compare-and-branch first.
2395       if (emitCompareAndBranch(BI))
2396         return true;
2397
2398       // Try to take advantage of fallthrough opportunities.
2399       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2400         std::swap(TBB, FBB);
2401         Predicate = CmpInst::getInversePredicate(Predicate);
2402       }
2403
2404       // Emit the cmp.
2405       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2406         return false;
2407
2408       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2409       // instruction.
2410       AArch64CC::CondCode CC = getCompareCC(Predicate);
2411       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2412       switch (Predicate) {
2413       default:
2414         break;
2415       case CmpInst::FCMP_UEQ:
2416         ExtraCC = AArch64CC::EQ;
2417         CC = AArch64CC::VS;
2418         break;
2419       case CmpInst::FCMP_ONE:
2420         ExtraCC = AArch64CC::MI;
2421         CC = AArch64CC::GT;
2422         break;
2423       }
2424       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2425
2426       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2427       if (ExtraCC != AArch64CC::AL) {
2428         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2429             .addImm(ExtraCC)
2430             .addMBB(TBB);
2431       }
2432
2433       // Emit the branch.
2434       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2435           .addImm(CC)
2436           .addMBB(TBB);
2437
2438       finishCondBranch(BI->getParent(), TBB, FBB);
2439       return true;
2440     }
2441   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2442     uint64_t Imm = CI->getZExtValue();
2443     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2444     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2445         .addMBB(Target);
2446
2447     // Obtain the branch probability and add the target to the successor list.
2448     if (FuncInfo.BPI) {
2449       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2450           BI->getParent(), Target->getBasicBlock());
2451       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2452     } else
2453       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2454     return true;
2455   } else {
2456     AArch64CC::CondCode CC = AArch64CC::NE;
2457     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2458       // Fake request the condition, otherwise the intrinsic might be completely
2459       // optimized away.
2460       unsigned CondReg = getRegForValue(BI->getCondition());
2461       if (!CondReg)
2462         return false;
2463
2464       // Emit the branch.
2465       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2466         .addImm(CC)
2467         .addMBB(TBB);
2468
2469       finishCondBranch(BI->getParent(), TBB, FBB);
2470       return true;
2471     }
2472   }
2473
2474   unsigned CondReg = getRegForValue(BI->getCondition());
2475   if (CondReg == 0)
2476     return false;
2477   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2478
2479   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2480   unsigned Opcode = AArch64::TBNZW;
2481   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2482     std::swap(TBB, FBB);
2483     Opcode = AArch64::TBZW;
2484   }
2485
2486   const MCInstrDesc &II = TII.get(Opcode);
2487   unsigned ConstrainedCondReg
2488     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2489   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2490       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2491       .addImm(0)
2492       .addMBB(TBB);
2493
2494   finishCondBranch(BI->getParent(), TBB, FBB);
2495   return true;
2496 }
2497
2498 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2499   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2500   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2501   if (AddrReg == 0)
2502     return false;
2503
2504   // Emit the indirect branch.
2505   const MCInstrDesc &II = TII.get(AArch64::BR);
2506   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2507   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2508
2509   // Make sure the CFG is up-to-date.
2510   for (auto *Succ : BI->successors())
2511     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2512
2513   return true;
2514 }
2515
2516 bool AArch64FastISel::selectCmp(const Instruction *I) {
2517   const CmpInst *CI = cast<CmpInst>(I);
2518
2519   // Vectors of i1 are weird: bail out.
2520   if (CI->getType()->isVectorTy())
2521     return false;
2522
2523   // Try to optimize or fold the cmp.
2524   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2525   unsigned ResultReg = 0;
2526   switch (Predicate) {
2527   default:
2528     break;
2529   case CmpInst::FCMP_FALSE:
2530     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2531     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2532             TII.get(TargetOpcode::COPY), ResultReg)
2533         .addReg(AArch64::WZR, getKillRegState(true));
2534     break;
2535   case CmpInst::FCMP_TRUE:
2536     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2537     break;
2538   }
2539
2540   if (ResultReg) {
2541     updateValueMap(I, ResultReg);
2542     return true;
2543   }
2544
2545   // Emit the cmp.
2546   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2547     return false;
2548
2549   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550
2551   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2552   // condition codes are inverted, because they are used by CSINC.
2553   static unsigned CondCodeTable[2][2] = {
2554     { AArch64CC::NE, AArch64CC::VC },
2555     { AArch64CC::PL, AArch64CC::LE }
2556   };
2557   unsigned *CondCodes = nullptr;
2558   switch (Predicate) {
2559   default:
2560     break;
2561   case CmpInst::FCMP_UEQ:
2562     CondCodes = &CondCodeTable[0][0];
2563     break;
2564   case CmpInst::FCMP_ONE:
2565     CondCodes = &CondCodeTable[1][0];
2566     break;
2567   }
2568
2569   if (CondCodes) {
2570     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2571     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2572             TmpReg1)
2573         .addReg(AArch64::WZR, getKillRegState(true))
2574         .addReg(AArch64::WZR, getKillRegState(true))
2575         .addImm(CondCodes[0]);
2576     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2577             ResultReg)
2578         .addReg(TmpReg1, getKillRegState(true))
2579         .addReg(AArch64::WZR, getKillRegState(true))
2580         .addImm(CondCodes[1]);
2581
2582     updateValueMap(I, ResultReg);
2583     return true;
2584   }
2585
2586   // Now set a register based on the comparison.
2587   AArch64CC::CondCode CC = getCompareCC(Predicate);
2588   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2589   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2590   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2591           ResultReg)
2592       .addReg(AArch64::WZR, getKillRegState(true))
2593       .addReg(AArch64::WZR, getKillRegState(true))
2594       .addImm(invertedCC);
2595
2596   updateValueMap(I, ResultReg);
2597   return true;
2598 }
2599
2600 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2601 /// value.
2602 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2603   if (!SI->getType()->isIntegerTy(1))
2604     return false;
2605
2606   const Value *Src1Val, *Src2Val;
2607   unsigned Opc = 0;
2608   bool NeedExtraOp = false;
2609   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2610     if (CI->isOne()) {
2611       Src1Val = SI->getCondition();
2612       Src2Val = SI->getFalseValue();
2613       Opc = AArch64::ORRWrr;
2614     } else {
2615       assert(CI->isZero());
2616       Src1Val = SI->getFalseValue();
2617       Src2Val = SI->getCondition();
2618       Opc = AArch64::BICWrr;
2619     }
2620   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2621     if (CI->isOne()) {
2622       Src1Val = SI->getCondition();
2623       Src2Val = SI->getTrueValue();
2624       Opc = AArch64::ORRWrr;
2625       NeedExtraOp = true;
2626     } else {
2627       assert(CI->isZero());
2628       Src1Val = SI->getCondition();
2629       Src2Val = SI->getTrueValue();
2630       Opc = AArch64::ANDWrr;
2631     }
2632   }
2633
2634   if (!Opc)
2635     return false;
2636
2637   unsigned Src1Reg = getRegForValue(Src1Val);
2638   if (!Src1Reg)
2639     return false;
2640   bool Src1IsKill = hasTrivialKill(Src1Val);
2641
2642   unsigned Src2Reg = getRegForValue(Src2Val);
2643   if (!Src2Reg)
2644     return false;
2645   bool Src2IsKill = hasTrivialKill(Src2Val);
2646
2647   if (NeedExtraOp) {
2648     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2649     Src1IsKill = true;
2650   }
2651   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2652                                        Src1IsKill, Src2Reg, Src2IsKill);
2653   updateValueMap(SI, ResultReg);
2654   return true;
2655 }
2656
2657 bool AArch64FastISel::selectSelect(const Instruction *I) {
2658   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2659   MVT VT;
2660   if (!isTypeSupported(I->getType(), VT))
2661     return false;
2662
2663   unsigned Opc;
2664   const TargetRegisterClass *RC;
2665   switch (VT.SimpleTy) {
2666   default:
2667     return false;
2668   case MVT::i1:
2669   case MVT::i8:
2670   case MVT::i16:
2671   case MVT::i32:
2672     Opc = AArch64::CSELWr;
2673     RC = &AArch64::GPR32RegClass;
2674     break;
2675   case MVT::i64:
2676     Opc = AArch64::CSELXr;
2677     RC = &AArch64::GPR64RegClass;
2678     break;
2679   case MVT::f32:
2680     Opc = AArch64::FCSELSrrr;
2681     RC = &AArch64::FPR32RegClass;
2682     break;
2683   case MVT::f64:
2684     Opc = AArch64::FCSELDrrr;
2685     RC = &AArch64::FPR64RegClass;
2686     break;
2687   }
2688
2689   const SelectInst *SI = cast<SelectInst>(I);
2690   const Value *Cond = SI->getCondition();
2691   AArch64CC::CondCode CC = AArch64CC::NE;
2692   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2693
2694   if (optimizeSelect(SI))
2695     return true;
2696
2697   // Try to pickup the flags, so we don't have to emit another compare.
2698   if (foldXALUIntrinsic(CC, I, Cond)) {
2699     // Fake request the condition to force emission of the XALU intrinsic.
2700     unsigned CondReg = getRegForValue(Cond);
2701     if (!CondReg)
2702       return false;
2703   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2704              isValueAvailable(Cond)) {
2705     const auto *Cmp = cast<CmpInst>(Cond);
2706     // Try to optimize or fold the cmp.
2707     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2708     const Value *FoldSelect = nullptr;
2709     switch (Predicate) {
2710     default:
2711       break;
2712     case CmpInst::FCMP_FALSE:
2713       FoldSelect = SI->getFalseValue();
2714       break;
2715     case CmpInst::FCMP_TRUE:
2716       FoldSelect = SI->getTrueValue();
2717       break;
2718     }
2719
2720     if (FoldSelect) {
2721       unsigned SrcReg = getRegForValue(FoldSelect);
2722       if (!SrcReg)
2723         return false;
2724       unsigned UseReg = lookUpRegForValue(SI);
2725       if (UseReg)
2726         MRI.clearKillFlags(UseReg);
2727
2728       updateValueMap(I, SrcReg);
2729       return true;
2730     }
2731
2732     // Emit the cmp.
2733     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2734       return false;
2735
2736     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2737     CC = getCompareCC(Predicate);
2738     switch (Predicate) {
2739     default:
2740       break;
2741     case CmpInst::FCMP_UEQ:
2742       ExtraCC = AArch64CC::EQ;
2743       CC = AArch64CC::VS;
2744       break;
2745     case CmpInst::FCMP_ONE:
2746       ExtraCC = AArch64CC::MI;
2747       CC = AArch64CC::GT;
2748       break;
2749     }
2750     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2751   } else {
2752     unsigned CondReg = getRegForValue(Cond);
2753     if (!CondReg)
2754       return false;
2755     bool CondIsKill = hasTrivialKill(Cond);
2756
2757     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2758     CondReg = constrainOperandRegClass(II, CondReg, 1);
2759
2760     // Emit a TST instruction (ANDS wzr, reg, #imm).
2761     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2762             AArch64::WZR)
2763         .addReg(CondReg, getKillRegState(CondIsKill))
2764         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2765   }
2766
2767   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2768   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2769
2770   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2771   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2772
2773   if (!Src1Reg || !Src2Reg)
2774     return false;
2775
2776   if (ExtraCC != AArch64CC::AL) {
2777     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2778                                Src2IsKill, ExtraCC);
2779     Src2IsKill = true;
2780   }
2781   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2782                                         Src2IsKill, CC);
2783   updateValueMap(I, ResultReg);
2784   return true;
2785 }
2786
2787 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2788   Value *V = I->getOperand(0);
2789   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2790     return false;
2791
2792   unsigned Op = getRegForValue(V);
2793   if (Op == 0)
2794     return false;
2795
2796   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2797   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2798           ResultReg).addReg(Op);
2799   updateValueMap(I, ResultReg);
2800   return true;
2801 }
2802
2803 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2804   Value *V = I->getOperand(0);
2805   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2806     return false;
2807
2808   unsigned Op = getRegForValue(V);
2809   if (Op == 0)
2810     return false;
2811
2812   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2813   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2814           ResultReg).addReg(Op);
2815   updateValueMap(I, ResultReg);
2816   return true;
2817 }
2818
2819 // FPToUI and FPToSI
2820 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2821   MVT DestVT;
2822   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2823     return false;
2824
2825   unsigned SrcReg = getRegForValue(I->getOperand(0));
2826   if (SrcReg == 0)
2827     return false;
2828
2829   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2830   if (SrcVT == MVT::f128)
2831     return false;
2832
2833   unsigned Opc;
2834   if (SrcVT == MVT::f64) {
2835     if (Signed)
2836       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2837     else
2838       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2839   } else {
2840     if (Signed)
2841       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2842     else
2843       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2844   }
2845   unsigned ResultReg = createResultReg(
2846       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2847   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2848       .addReg(SrcReg);
2849   updateValueMap(I, ResultReg);
2850   return true;
2851 }
2852
2853 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2854   MVT DestVT;
2855   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2856     return false;
2857   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2858          "Unexpected value type.");
2859
2860   unsigned SrcReg = getRegForValue(I->getOperand(0));
2861   if (!SrcReg)
2862     return false;
2863   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2864
2865   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2866
2867   // Handle sign-extension.
2868   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2869     SrcReg =
2870         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2871     if (!SrcReg)
2872       return false;
2873     SrcIsKill = true;
2874   }
2875
2876   unsigned Opc;
2877   if (SrcVT == MVT::i64) {
2878     if (Signed)
2879       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2880     else
2881       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2882   } else {
2883     if (Signed)
2884       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2885     else
2886       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2887   }
2888
2889   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2890                                       SrcIsKill);
2891   updateValueMap(I, ResultReg);
2892   return true;
2893 }
2894
2895 bool AArch64FastISel::fastLowerArguments() {
2896   if (!FuncInfo.CanLowerReturn)
2897     return false;
2898
2899   const Function *F = FuncInfo.Fn;
2900   if (F->isVarArg())
2901     return false;
2902
2903   CallingConv::ID CC = F->getCallingConv();
2904   if (CC != CallingConv::C && CC != CallingConv::Swift)
2905     return false;
2906
2907   // Only handle simple cases of up to 8 GPR and FPR each.
2908   unsigned GPRCnt = 0;
2909   unsigned FPRCnt = 0;
2910   for (auto const &Arg : F->args()) {
2911     if (Arg.hasAttribute(Attribute::ByVal) ||
2912         Arg.hasAttribute(Attribute::InReg) ||
2913         Arg.hasAttribute(Attribute::StructRet) ||
2914         Arg.hasAttribute(Attribute::SwiftSelf) ||
2915         Arg.hasAttribute(Attribute::SwiftError) ||
2916         Arg.hasAttribute(Attribute::Nest))
2917       return false;
2918
2919     Type *ArgTy = Arg.getType();
2920     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2921       return false;
2922
2923     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2924     if (!ArgVT.isSimple())
2925       return false;
2926
2927     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2928     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2929       return false;
2930
2931     if (VT.isVector() &&
2932         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2933       return false;
2934
2935     if (VT >= MVT::i1 && VT <= MVT::i64)
2936       ++GPRCnt;
2937     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2938              VT.is128BitVector())
2939       ++FPRCnt;
2940     else
2941       return false;
2942
2943     if (GPRCnt > 8 || FPRCnt > 8)
2944       return false;
2945   }
2946
2947   static const MCPhysReg Registers[6][8] = {
2948     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2949       AArch64::W5, AArch64::W6, AArch64::W7 },
2950     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2951       AArch64::X5, AArch64::X6, AArch64::X7 },
2952     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2953       AArch64::H5, AArch64::H6, AArch64::H7 },
2954     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2955       AArch64::S5, AArch64::S6, AArch64::S7 },
2956     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2957       AArch64::D5, AArch64::D6, AArch64::D7 },
2958     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2959       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2960   };
2961
2962   unsigned GPRIdx = 0;
2963   unsigned FPRIdx = 0;
2964   for (auto const &Arg : F->args()) {
2965     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2966     unsigned SrcReg;
2967     const TargetRegisterClass *RC;
2968     if (VT >= MVT::i1 && VT <= MVT::i32) {
2969       SrcReg = Registers[0][GPRIdx++];
2970       RC = &AArch64::GPR32RegClass;
2971       VT = MVT::i32;
2972     } else if (VT == MVT::i64) {
2973       SrcReg = Registers[1][GPRIdx++];
2974       RC = &AArch64::GPR64RegClass;
2975     } else if (VT == MVT::f16) {
2976       SrcReg = Registers[2][FPRIdx++];
2977       RC = &AArch64::FPR16RegClass;
2978     } else if (VT ==  MVT::f32) {
2979       SrcReg = Registers[3][FPRIdx++];
2980       RC = &AArch64::FPR32RegClass;
2981     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2982       SrcReg = Registers[4][FPRIdx++];
2983       RC = &AArch64::FPR64RegClass;
2984     } else if (VT.is128BitVector()) {
2985       SrcReg = Registers[5][FPRIdx++];
2986       RC = &AArch64::FPR128RegClass;
2987     } else
2988       llvm_unreachable("Unexpected value type.");
2989
2990     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2991     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2992     // Without this, EmitLiveInCopies may eliminate the livein if its only
2993     // use is a bitcast (which isn't turned into an instruction).
2994     unsigned ResultReg = createResultReg(RC);
2995     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2996             TII.get(TargetOpcode::COPY), ResultReg)
2997         .addReg(DstReg, getKillRegState(true));
2998     updateValueMap(&Arg, ResultReg);
2999   }
3000   return true;
3001 }
3002
3003 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3004                                       SmallVectorImpl<MVT> &OutVTs,
3005                                       unsigned &NumBytes) {
3006   CallingConv::ID CC = CLI.CallConv;
3007   SmallVector<CCValAssign, 16> ArgLocs;
3008   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3009   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3010
3011   // Get a count of how many bytes are to be pushed on the stack.
3012   NumBytes = CCInfo.getNextStackOffset();
3013
3014   // Issue CALLSEQ_START
3015   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3016   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3017     .addImm(NumBytes);
3018
3019   // Process the args.
3020   for (CCValAssign &VA : ArgLocs) {
3021     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3022     MVT ArgVT = OutVTs[VA.getValNo()];
3023
3024     unsigned ArgReg = getRegForValue(ArgVal);
3025     if (!ArgReg)
3026       return false;
3027
3028     // Handle arg promotion: SExt, ZExt, AExt.
3029     switch (VA.getLocInfo()) {
3030     case CCValAssign::Full:
3031       break;
3032     case CCValAssign::SExt: {
3033       MVT DestVT = VA.getLocVT();
3034       MVT SrcVT = ArgVT;
3035       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3036       if (!ArgReg)
3037         return false;
3038       break;
3039     }
3040     case CCValAssign::AExt:
3041     // Intentional fall-through.
3042     case CCValAssign::ZExt: {
3043       MVT DestVT = VA.getLocVT();
3044       MVT SrcVT = ArgVT;
3045       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3046       if (!ArgReg)
3047         return false;
3048       break;
3049     }
3050     default:
3051       llvm_unreachable("Unknown arg promotion!");
3052     }
3053
3054     // Now copy/store arg to correct locations.
3055     if (VA.isRegLoc() && !VA.needsCustom()) {
3056       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3057               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3058       CLI.OutRegs.push_back(VA.getLocReg());
3059     } else if (VA.needsCustom()) {
3060       // FIXME: Handle custom args.
3061       return false;
3062     } else {
3063       assert(VA.isMemLoc() && "Assuming store on stack.");
3064
3065       // Don't emit stores for undef values.
3066       if (isa<UndefValue>(ArgVal))
3067         continue;
3068
3069       // Need to store on the stack.
3070       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3071
3072       unsigned BEAlign = 0;
3073       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3074         BEAlign = 8 - ArgSize;
3075
3076       Address Addr;
3077       Addr.setKind(Address::RegBase);
3078       Addr.setReg(AArch64::SP);
3079       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3080
3081       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3082       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3083           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3084           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3085
3086       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3087         return false;
3088     }
3089   }
3090   return true;
3091 }
3092
3093 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3094                                  unsigned NumBytes) {
3095   CallingConv::ID CC = CLI.CallConv;
3096
3097   // Issue CALLSEQ_END
3098   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3099   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3100     .addImm(NumBytes).addImm(0);
3101
3102   // Now the return value.
3103   if (RetVT != MVT::isVoid) {
3104     SmallVector<CCValAssign, 16> RVLocs;
3105     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3106     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3107
3108     // Only handle a single return value.
3109     if (RVLocs.size() != 1)
3110       return false;
3111
3112     // Copy all of the result registers out of their specified physreg.
3113     MVT CopyVT = RVLocs[0].getValVT();
3114
3115     // TODO: Handle big-endian results
3116     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3117       return false;
3118
3119     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3120     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3121             TII.get(TargetOpcode::COPY), ResultReg)
3122         .addReg(RVLocs[0].getLocReg());
3123     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3124
3125     CLI.ResultReg = ResultReg;
3126     CLI.NumResultRegs = 1;
3127   }
3128
3129   return true;
3130 }
3131
3132 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3133   CallingConv::ID CC  = CLI.CallConv;
3134   bool IsTailCall     = CLI.IsTailCall;
3135   bool IsVarArg       = CLI.IsVarArg;
3136   const Value *Callee = CLI.Callee;
3137   MCSymbol *Symbol = CLI.Symbol;
3138
3139   if (!Callee && !Symbol)
3140     return false;
3141
3142   // Allow SelectionDAG isel to handle tail calls.
3143   if (IsTailCall)
3144     return false;
3145
3146   CodeModel::Model CM = TM.getCodeModel();
3147   // Only support the small-addressing and large code models.
3148   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3149     return false;
3150
3151   // FIXME: Add large code model support for ELF.
3152   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3153     return false;
3154
3155   // Let SDISel handle vararg functions.
3156   if (IsVarArg)
3157     return false;
3158
3159   // FIXME: Only handle *simple* calls for now.
3160   MVT RetVT;
3161   if (CLI.RetTy->isVoidTy())
3162     RetVT = MVT::isVoid;
3163   else if (!isTypeLegal(CLI.RetTy, RetVT))
3164     return false;
3165
3166   for (auto Flag : CLI.OutFlags)
3167     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3168         Flag.isSwiftSelf() || Flag.isSwiftError())
3169       return false;
3170
3171   // Set up the argument vectors.
3172   SmallVector<MVT, 16> OutVTs;
3173   OutVTs.reserve(CLI.OutVals.size());
3174
3175   for (auto *Val : CLI.OutVals) {
3176     MVT VT;
3177     if (!isTypeLegal(Val->getType(), VT) &&
3178         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3179       return false;
3180
3181     // We don't handle vector parameters yet.
3182     if (VT.isVector() || VT.getSizeInBits() > 64)
3183       return false;
3184
3185     OutVTs.push_back(VT);
3186   }
3187
3188   Address Addr;
3189   if (Callee && !computeCallAddress(Callee, Addr))
3190     return false;
3191
3192   // Handle the arguments now that we've gotten them.
3193   unsigned NumBytes;
3194   if (!processCallArgs(CLI, OutVTs, NumBytes))
3195     return false;
3196
3197   // Issue the call.
3198   MachineInstrBuilder MIB;
3199   if (Subtarget->useSmallAddressing()) {
3200     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3201     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3202     if (Symbol)
3203       MIB.addSym(Symbol, 0);
3204     else if (Addr.getGlobalValue())
3205       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3206     else if (Addr.getReg()) {
3207       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3208       MIB.addReg(Reg);
3209     } else
3210       return false;
3211   } else {
3212     unsigned CallReg = 0;
3213     if (Symbol) {
3214       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3215       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3216               ADRPReg)
3217           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3218
3219       CallReg = createResultReg(&AArch64::GPR64RegClass);
3220       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3221               TII.get(AArch64::LDRXui), CallReg)
3222           .addReg(ADRPReg)
3223           .addSym(Symbol,
3224                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3225     } else if (Addr.getGlobalValue())
3226       CallReg = materializeGV(Addr.getGlobalValue());
3227     else if (Addr.getReg())
3228       CallReg = Addr.getReg();
3229
3230     if (!CallReg)
3231       return false;
3232
3233     const MCInstrDesc &II = TII.get(AArch64::BLR);
3234     CallReg = constrainOperandRegClass(II, CallReg, 0);
3235     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3236   }
3237
3238   // Add implicit physical register uses to the call.
3239   for (auto Reg : CLI.OutRegs)
3240     MIB.addReg(Reg, RegState::Implicit);
3241
3242   // Add a register mask with the call-preserved registers.
3243   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3244   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3245
3246   CLI.Call = MIB;
3247
3248   // Finish off the call including any return values.
3249   return finishCall(CLI, RetVT, NumBytes);
3250 }
3251
3252 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3253   if (Alignment)
3254     return Len / Alignment <= 4;
3255   else
3256     return Len < 32;
3257 }
3258
3259 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3260                                          uint64_t Len, unsigned Alignment) {
3261   // Make sure we don't bloat code by inlining very large memcpy's.
3262   if (!isMemCpySmall(Len, Alignment))
3263     return false;
3264
3265   int64_t UnscaledOffset = 0;
3266   Address OrigDest = Dest;
3267   Address OrigSrc = Src;
3268
3269   while (Len) {
3270     MVT VT;
3271     if (!Alignment || Alignment >= 8) {
3272       if (Len >= 8)
3273         VT = MVT::i64;
3274       else if (Len >= 4)
3275         VT = MVT::i32;
3276       else if (Len >= 2)
3277         VT = MVT::i16;
3278       else {
3279         VT = MVT::i8;
3280       }
3281     } else {
3282       // Bound based on alignment.
3283       if (Len >= 4 && Alignment == 4)
3284         VT = MVT::i32;
3285       else if (Len >= 2 && Alignment == 2)
3286         VT = MVT::i16;
3287       else {
3288         VT = MVT::i8;
3289       }
3290     }
3291
3292     unsigned ResultReg = emitLoad(VT, VT, Src);
3293     if (!ResultReg)
3294       return false;
3295
3296     if (!emitStore(VT, ResultReg, Dest))
3297       return false;
3298
3299     int64_t Size = VT.getSizeInBits() / 8;
3300     Len -= Size;
3301     UnscaledOffset += Size;
3302
3303     // We need to recompute the unscaled offset for each iteration.
3304     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3305     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3306   }
3307
3308   return true;
3309 }
3310
3311 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3312 /// into the user. The condition code will only be updated on success.
3313 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3314                                         const Instruction *I,
3315                                         const Value *Cond) {
3316   if (!isa<ExtractValueInst>(Cond))
3317     return false;
3318
3319   const auto *EV = cast<ExtractValueInst>(Cond);
3320   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3321     return false;
3322
3323   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3324   MVT RetVT;
3325   const Function *Callee = II->getCalledFunction();
3326   Type *RetTy =
3327   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3328   if (!isTypeLegal(RetTy, RetVT))
3329     return false;
3330
3331   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3332     return false;
3333
3334   const Value *LHS = II->getArgOperand(0);
3335   const Value *RHS = II->getArgOperand(1);
3336
3337   // Canonicalize immediate to the RHS.
3338   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3339       isCommutativeIntrinsic(II))
3340     std::swap(LHS, RHS);
3341
3342   // Simplify multiplies.
3343   Intrinsic::ID IID = II->getIntrinsicID();
3344   switch (IID) {
3345   default:
3346     break;
3347   case Intrinsic::smul_with_overflow:
3348     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3349       if (C->getValue() == 2)
3350         IID = Intrinsic::sadd_with_overflow;
3351     break;
3352   case Intrinsic::umul_with_overflow:
3353     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3354       if (C->getValue() == 2)
3355         IID = Intrinsic::uadd_with_overflow;
3356     break;
3357   }
3358
3359   AArch64CC::CondCode TmpCC;
3360   switch (IID) {
3361   default:
3362     return false;
3363   case Intrinsic::sadd_with_overflow:
3364   case Intrinsic::ssub_with_overflow:
3365     TmpCC = AArch64CC::VS;
3366     break;
3367   case Intrinsic::uadd_with_overflow:
3368     TmpCC = AArch64CC::HS;
3369     break;
3370   case Intrinsic::usub_with_overflow:
3371     TmpCC = AArch64CC::LO;
3372     break;
3373   case Intrinsic::smul_with_overflow:
3374   case Intrinsic::umul_with_overflow:
3375     TmpCC = AArch64CC::NE;
3376     break;
3377   }
3378
3379   // Check if both instructions are in the same basic block.
3380   if (!isValueAvailable(II))
3381     return false;
3382
3383   // Make sure nothing is in the way
3384   BasicBlock::const_iterator Start(I);
3385   BasicBlock::const_iterator End(II);
3386   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3387     // We only expect extractvalue instructions between the intrinsic and the
3388     // instruction to be selected.
3389     if (!isa<ExtractValueInst>(Itr))
3390       return false;
3391
3392     // Check that the extractvalue operand comes from the intrinsic.
3393     const auto *EVI = cast<ExtractValueInst>(Itr);
3394     if (EVI->getAggregateOperand() != II)
3395       return false;
3396   }
3397
3398   CC = TmpCC;
3399   return true;
3400 }
3401
3402 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3403   // FIXME: Handle more intrinsics.
3404   switch (II->getIntrinsicID()) {
3405   default: return false;
3406   case Intrinsic::frameaddress: {
3407     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3408     MFI.setFrameAddressIsTaken(true);
3409
3410     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3411     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3412     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3413     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3414             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3415     // Recursively load frame address
3416     // ldr x0, [fp]
3417     // ldr x0, [x0]
3418     // ldr x0, [x0]
3419     // ...
3420     unsigned DestReg;
3421     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3422     while (Depth--) {
3423       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3424                                 SrcReg, /*IsKill=*/true, 0);
3425       assert(DestReg && "Unexpected LDR instruction emission failure.");
3426       SrcReg = DestReg;
3427     }
3428
3429     updateValueMap(II, SrcReg);
3430     return true;
3431   }
3432   case Intrinsic::memcpy:
3433   case Intrinsic::memmove: {
3434     const auto *MTI = cast<MemTransferInst>(II);
3435     // Don't handle volatile.
3436     if (MTI->isVolatile())
3437       return false;
3438
3439     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3440     // we would emit dead code because we don't currently handle memmoves.
3441     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3442     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3443       // Small memcpy's are common enough that we want to do them without a call
3444       // if possible.
3445       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3446       unsigned Alignment = MTI->getAlignment();
3447       if (isMemCpySmall(Len, Alignment)) {
3448         Address Dest, Src;
3449         if (!computeAddress(MTI->getRawDest(), Dest) ||
3450             !computeAddress(MTI->getRawSource(), Src))
3451           return false;
3452         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3453           return true;
3454       }
3455     }
3456
3457     if (!MTI->getLength()->getType()->isIntegerTy(64))
3458       return false;
3459
3460     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3461       // Fast instruction selection doesn't support the special
3462       // address spaces.
3463       return false;
3464
3465     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3466     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3467   }
3468   case Intrinsic::memset: {
3469     const MemSetInst *MSI = cast<MemSetInst>(II);
3470     // Don't handle volatile.
3471     if (MSI->isVolatile())
3472       return false;
3473
3474     if (!MSI->getLength()->getType()->isIntegerTy(64))
3475       return false;
3476
3477     if (MSI->getDestAddressSpace() > 255)
3478       // Fast instruction selection doesn't support the special
3479       // address spaces.
3480       return false;
3481
3482     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3483   }
3484   case Intrinsic::sin:
3485   case Intrinsic::cos:
3486   case Intrinsic::pow: {
3487     MVT RetVT;
3488     if (!isTypeLegal(II->getType(), RetVT))
3489       return false;
3490
3491     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3492       return false;
3493
3494     static const RTLIB::Libcall LibCallTable[3][2] = {
3495       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3496       { RTLIB::COS_F32, RTLIB::COS_F64 },
3497       { RTLIB::POW_F32, RTLIB::POW_F64 }
3498     };
3499     RTLIB::Libcall LC;
3500     bool Is64Bit = RetVT == MVT::f64;
3501     switch (II->getIntrinsicID()) {
3502     default:
3503       llvm_unreachable("Unexpected intrinsic.");
3504     case Intrinsic::sin:
3505       LC = LibCallTable[0][Is64Bit];
3506       break;
3507     case Intrinsic::cos:
3508       LC = LibCallTable[1][Is64Bit];
3509       break;
3510     case Intrinsic::pow:
3511       LC = LibCallTable[2][Is64Bit];
3512       break;
3513     }
3514
3515     ArgListTy Args;
3516     Args.reserve(II->getNumArgOperands());
3517
3518     // Populate the argument list.
3519     for (auto &Arg : II->arg_operands()) {
3520       ArgListEntry Entry;
3521       Entry.Val = Arg;
3522       Entry.Ty = Arg->getType();
3523       Args.push_back(Entry);
3524     }
3525
3526     CallLoweringInfo CLI;
3527     MCContext &Ctx = MF->getContext();
3528     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3529                   TLI.getLibcallName(LC), std::move(Args));
3530     if (!lowerCallTo(CLI))
3531       return false;
3532     updateValueMap(II, CLI.ResultReg);
3533     return true;
3534   }
3535   case Intrinsic::fabs: {
3536     MVT VT;
3537     if (!isTypeLegal(II->getType(), VT))
3538       return false;
3539
3540     unsigned Opc;
3541     switch (VT.SimpleTy) {
3542     default:
3543       return false;
3544     case MVT::f32:
3545       Opc = AArch64::FABSSr;
3546       break;
3547     case MVT::f64:
3548       Opc = AArch64::FABSDr;
3549       break;
3550     }
3551     unsigned SrcReg = getRegForValue(II->getOperand(0));
3552     if (!SrcReg)
3553       return false;
3554     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3555     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3556     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3557       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3558     updateValueMap(II, ResultReg);
3559     return true;
3560   }
3561   case Intrinsic::trap:
3562     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3563         .addImm(1);
3564     return true;
3565
3566   case Intrinsic::sqrt: {
3567     Type *RetTy = II->getCalledFunction()->getReturnType();
3568
3569     MVT VT;
3570     if (!isTypeLegal(RetTy, VT))
3571       return false;
3572
3573     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3574     if (!Op0Reg)
3575       return false;
3576     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3577
3578     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3579     if (!ResultReg)
3580       return false;
3581
3582     updateValueMap(II, ResultReg);
3583     return true;
3584   }
3585   case Intrinsic::sadd_with_overflow:
3586   case Intrinsic::uadd_with_overflow:
3587   case Intrinsic::ssub_with_overflow:
3588   case Intrinsic::usub_with_overflow:
3589   case Intrinsic::smul_with_overflow:
3590   case Intrinsic::umul_with_overflow: {
3591     // This implements the basic lowering of the xalu with overflow intrinsics.
3592     const Function *Callee = II->getCalledFunction();
3593     auto *Ty = cast<StructType>(Callee->getReturnType());
3594     Type *RetTy = Ty->getTypeAtIndex(0U);
3595
3596     MVT VT;
3597     if (!isTypeLegal(RetTy, VT))
3598       return false;
3599
3600     if (VT != MVT::i32 && VT != MVT::i64)
3601       return false;
3602
3603     const Value *LHS = II->getArgOperand(0);
3604     const Value *RHS = II->getArgOperand(1);
3605     // Canonicalize immediate to the RHS.
3606     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3607         isCommutativeIntrinsic(II))
3608       std::swap(LHS, RHS);
3609
3610     // Simplify multiplies.
3611     Intrinsic::ID IID = II->getIntrinsicID();
3612     switch (IID) {
3613     default:
3614       break;
3615     case Intrinsic::smul_with_overflow:
3616       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3617         if (C->getValue() == 2) {
3618           IID = Intrinsic::sadd_with_overflow;
3619           RHS = LHS;
3620         }
3621       break;
3622     case Intrinsic::umul_with_overflow:
3623       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3624         if (C->getValue() == 2) {
3625           IID = Intrinsic::uadd_with_overflow;
3626           RHS = LHS;
3627         }
3628       break;
3629     }
3630
3631     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3632     AArch64CC::CondCode CC = AArch64CC::Invalid;
3633     switch (IID) {
3634     default: llvm_unreachable("Unexpected intrinsic!");
3635     case Intrinsic::sadd_with_overflow:
3636       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3637       CC = AArch64CC::VS;
3638       break;
3639     case Intrinsic::uadd_with_overflow:
3640       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3641       CC = AArch64CC::HS;
3642       break;
3643     case Intrinsic::ssub_with_overflow:
3644       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3645       CC = AArch64CC::VS;
3646       break;
3647     case Intrinsic::usub_with_overflow:
3648       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3649       CC = AArch64CC::LO;
3650       break;
3651     case Intrinsic::smul_with_overflow: {
3652       CC = AArch64CC::NE;
3653       unsigned LHSReg = getRegForValue(LHS);
3654       if (!LHSReg)
3655         return false;
3656       bool LHSIsKill = hasTrivialKill(LHS);
3657
3658       unsigned RHSReg = getRegForValue(RHS);
3659       if (!RHSReg)
3660         return false;
3661       bool RHSIsKill = hasTrivialKill(RHS);
3662
3663       if (VT == MVT::i32) {
3664         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3665         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3666                                        /*IsKill=*/false, 32);
3667         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3668                                             AArch64::sub_32);
3669         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3670                                               AArch64::sub_32);
3671         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3672                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3673       } else {
3674         assert(VT == MVT::i64 && "Unexpected value type.");
3675         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3676         // reused in the next instruction.
3677         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3678                             /*IsKill=*/false);
3679         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3680                                         RHSReg, RHSIsKill);
3681         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3682                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3683       }
3684       break;
3685     }
3686     case Intrinsic::umul_with_overflow: {
3687       CC = AArch64CC::NE;
3688       unsigned LHSReg = getRegForValue(LHS);
3689       if (!LHSReg)
3690         return false;
3691       bool LHSIsKill = hasTrivialKill(LHS);
3692
3693       unsigned RHSReg = getRegForValue(RHS);
3694       if (!RHSReg)
3695         return false;
3696       bool RHSIsKill = hasTrivialKill(RHS);
3697
3698       if (VT == MVT::i32) {
3699         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3700         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3701                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3702                     /*WantResult=*/false);
3703         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3704                                             AArch64::sub_32);
3705       } else {
3706         assert(VT == MVT::i64 && "Unexpected value type.");
3707         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3708         // reused in the next instruction.
3709         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3710                             /*IsKill=*/false);
3711         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3712                                         RHSReg, RHSIsKill);
3713         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3714                     /*IsKill=*/false, /*WantResult=*/false);
3715       }
3716       break;
3717     }
3718     }
3719
3720     if (MulReg) {
3721       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3722       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3723               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3724     }
3725
3726     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3727                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3728                                   /*IsKill=*/true, getInvertedCondCode(CC));
3729     (void)ResultReg2;
3730     assert((ResultReg1 + 1) == ResultReg2 &&
3731            "Nonconsecutive result registers.");
3732     updateValueMap(II, ResultReg1, 2);
3733     return true;
3734   }
3735   }
3736   return false;
3737 }
3738
3739 bool AArch64FastISel::selectRet(const Instruction *I) {
3740   const ReturnInst *Ret = cast<ReturnInst>(I);
3741   const Function &F = *I->getParent()->getParent();
3742
3743   if (!FuncInfo.CanLowerReturn)
3744     return false;
3745
3746   if (F.isVarArg())
3747     return false;
3748
3749   if (TLI.supportSwiftError() &&
3750       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3751     return false;
3752
3753   if (TLI.supportSplitCSR(FuncInfo.MF))
3754     return false;
3755
3756   // Build a list of return value registers.
3757   SmallVector<unsigned, 4> RetRegs;
3758
3759   if (Ret->getNumOperands() > 0) {
3760     CallingConv::ID CC = F.getCallingConv();
3761     SmallVector<ISD::OutputArg, 4> Outs;
3762     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3763
3764     // Analyze operands of the call, assigning locations to each operand.
3765     SmallVector<CCValAssign, 16> ValLocs;
3766     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3767     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3768                                                      : RetCC_AArch64_AAPCS;
3769     CCInfo.AnalyzeReturn(Outs, RetCC);
3770
3771     // Only handle a single return value for now.
3772     if (ValLocs.size() != 1)
3773       return false;
3774
3775     CCValAssign &VA = ValLocs[0];
3776     const Value *RV = Ret->getOperand(0);
3777
3778     // Don't bother handling odd stuff for now.
3779     if ((VA.getLocInfo() != CCValAssign::Full) &&
3780         (VA.getLocInfo() != CCValAssign::BCvt))
3781       return false;
3782
3783     // Only handle register returns for now.
3784     if (!VA.isRegLoc())
3785       return false;
3786
3787     unsigned Reg = getRegForValue(RV);
3788     if (Reg == 0)
3789       return false;
3790
3791     unsigned SrcReg = Reg + VA.getValNo();
3792     unsigned DestReg = VA.getLocReg();
3793     // Avoid a cross-class copy. This is very unlikely.
3794     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3795       return false;
3796
3797     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3798     if (!RVEVT.isSimple())
3799       return false;
3800
3801     // Vectors (of > 1 lane) in big endian need tricky handling.
3802     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3803         !Subtarget->isLittleEndian())
3804       return false;
3805
3806     MVT RVVT = RVEVT.getSimpleVT();
3807     if (RVVT == MVT::f128)
3808       return false;
3809
3810     MVT DestVT = VA.getValVT();
3811     // Special handling for extended integers.
3812     if (RVVT != DestVT) {
3813       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3814         return false;
3815
3816       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3817         return false;
3818
3819       bool IsZExt = Outs[0].Flags.isZExt();
3820       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3821       if (SrcReg == 0)
3822         return false;
3823     }
3824
3825     // Make the copy.
3826     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3827             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3828
3829     // Add register to return instruction.
3830     RetRegs.push_back(VA.getLocReg());
3831   }
3832
3833   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3834                                     TII.get(AArch64::RET_ReallyLR));
3835   for (unsigned RetReg : RetRegs)
3836     MIB.addReg(RetReg, RegState::Implicit);
3837   return true;
3838 }
3839
3840 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3841   Type *DestTy = I->getType();
3842   Value *Op = I->getOperand(0);
3843   Type *SrcTy = Op->getType();
3844
3845   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3846   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3847   if (!SrcEVT.isSimple())
3848     return false;
3849   if (!DestEVT.isSimple())
3850     return false;
3851
3852   MVT SrcVT = SrcEVT.getSimpleVT();
3853   MVT DestVT = DestEVT.getSimpleVT();
3854
3855   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3856       SrcVT != MVT::i8)
3857     return false;
3858   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3859       DestVT != MVT::i1)
3860     return false;
3861
3862   unsigned SrcReg = getRegForValue(Op);
3863   if (!SrcReg)
3864     return false;
3865   bool SrcIsKill = hasTrivialKill(Op);
3866
3867   // If we're truncating from i64 to a smaller non-legal type then generate an
3868   // AND. Otherwise, we know the high bits are undefined and a truncate only
3869   // generate a COPY. We cannot mark the source register also as result
3870   // register, because this can incorrectly transfer the kill flag onto the
3871   // source register.
3872   unsigned ResultReg;
3873   if (SrcVT == MVT::i64) {
3874     uint64_t Mask = 0;
3875     switch (DestVT.SimpleTy) {
3876     default:
3877       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3878       return false;
3879     case MVT::i1:
3880       Mask = 0x1;
3881       break;
3882     case MVT::i8:
3883       Mask = 0xff;
3884       break;
3885     case MVT::i16:
3886       Mask = 0xffff;
3887       break;
3888     }
3889     // Issue an extract_subreg to get the lower 32-bits.
3890     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3891                                                 AArch64::sub_32);
3892     // Create the AND instruction which performs the actual truncation.
3893     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3894     assert(ResultReg && "Unexpected AND instruction emission failure.");
3895   } else {
3896     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3897     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3898             TII.get(TargetOpcode::COPY), ResultReg)
3899         .addReg(SrcReg, getKillRegState(SrcIsKill));
3900   }
3901
3902   updateValueMap(I, ResultReg);
3903   return true;
3904 }
3905
3906 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3907   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3908           DestVT == MVT::i64) &&
3909          "Unexpected value type.");
3910   // Handle i8 and i16 as i32.
3911   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3912     DestVT = MVT::i32;
3913
3914   if (IsZExt) {
3915     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3916     assert(ResultReg && "Unexpected AND instruction emission failure.");
3917     if (DestVT == MVT::i64) {
3918       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3919       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3920       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3921       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3922               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3923           .addImm(0)
3924           .addReg(ResultReg)
3925           .addImm(AArch64::sub_32);
3926       ResultReg = Reg64;
3927     }
3928     return ResultReg;
3929   } else {
3930     if (DestVT == MVT::i64) {
3931       // FIXME: We're SExt i1 to i64.
3932       return 0;
3933     }
3934     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3935                             /*TODO:IsKill=*/false, 0, 0);
3936   }
3937 }
3938
3939 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3940                                       unsigned Op1, bool Op1IsKill) {
3941   unsigned Opc, ZReg;
3942   switch (RetVT.SimpleTy) {
3943   default: return 0;
3944   case MVT::i8:
3945   case MVT::i16:
3946   case MVT::i32:
3947     RetVT = MVT::i32;
3948     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3949   case MVT::i64:
3950     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3951   }
3952
3953   const TargetRegisterClass *RC =
3954       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3955   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3956                           /*IsKill=*/ZReg, true);
3957 }
3958
3959 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3960                                         unsigned Op1, bool Op1IsKill) {
3961   if (RetVT != MVT::i64)
3962     return 0;
3963
3964   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3965                           Op0, Op0IsKill, Op1, Op1IsKill,
3966                           AArch64::XZR, /*IsKill=*/true);
3967 }
3968
3969 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3970                                         unsigned Op1, bool Op1IsKill) {
3971   if (RetVT != MVT::i64)
3972     return 0;
3973
3974   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3975                           Op0, Op0IsKill, Op1, Op1IsKill,
3976                           AArch64::XZR, /*IsKill=*/true);
3977 }
3978
3979 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3980                                      unsigned Op1Reg, bool Op1IsKill) {
3981   unsigned Opc = 0;
3982   bool NeedTrunc = false;
3983   uint64_t Mask = 0;
3984   switch (RetVT.SimpleTy) {
3985   default: return 0;
3986   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3987   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3988   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3989   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3990   }
3991
3992   const TargetRegisterClass *RC =
3993       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3994   if (NeedTrunc) {
3995     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3996     Op1IsKill = true;
3997   }
3998   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3999                                        Op1IsKill);
4000   if (NeedTrunc)
4001     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4002   return ResultReg;
4003 }
4004
4005 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4006                                      bool Op0IsKill, uint64_t Shift,
4007                                      bool IsZExt) {
4008   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4009          "Unexpected source/return type pair.");
4010   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4011           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4012          "Unexpected source value type.");
4013   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4014           RetVT == MVT::i64) && "Unexpected return value type.");
4015
4016   bool Is64Bit = (RetVT == MVT::i64);
4017   unsigned RegSize = Is64Bit ? 64 : 32;
4018   unsigned DstBits = RetVT.getSizeInBits();
4019   unsigned SrcBits = SrcVT.getSizeInBits();
4020   const TargetRegisterClass *RC =
4021       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4022
4023   // Just emit a copy for "zero" shifts.
4024   if (Shift == 0) {
4025     if (RetVT == SrcVT) {
4026       unsigned ResultReg = createResultReg(RC);
4027       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4028               TII.get(TargetOpcode::COPY), ResultReg)
4029           .addReg(Op0, getKillRegState(Op0IsKill));
4030       return ResultReg;
4031     } else
4032       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4033   }
4034
4035   // Don't deal with undefined shifts.
4036   if (Shift >= DstBits)
4037     return 0;
4038
4039   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4040   // {S|U}BFM Wd, Wn, #r, #s
4041   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4042
4043   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4044   // %2 = shl i16 %1, 4
4045   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4046   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4047   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4048   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4049
4050   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4051   // %2 = shl i16 %1, 8
4052   // Wd<32+7-24,32-24> = Wn<7:0>
4053   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4054   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4055   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4056
4057   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4058   // %2 = shl i16 %1, 12
4059   // Wd<32+3-20,32-20> = Wn<3:0>
4060   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4061   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4062   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4063
4064   unsigned ImmR = RegSize - Shift;
4065   // Limit the width to the length of the source type.
4066   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4067   static const unsigned OpcTable[2][2] = {
4068     {AArch64::SBFMWri, AArch64::SBFMXri},
4069     {AArch64::UBFMWri, AArch64::UBFMXri}
4070   };
4071   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4072   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4073     unsigned TmpReg = MRI.createVirtualRegister(RC);
4074     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4075             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4076         .addImm(0)
4077         .addReg(Op0, getKillRegState(Op0IsKill))
4078         .addImm(AArch64::sub_32);
4079     Op0 = TmpReg;
4080     Op0IsKill = true;
4081   }
4082   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4083 }
4084
4085 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4086                                      unsigned Op1Reg, bool Op1IsKill) {
4087   unsigned Opc = 0;
4088   bool NeedTrunc = false;
4089   uint64_t Mask = 0;
4090   switch (RetVT.SimpleTy) {
4091   default: return 0;
4092   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4093   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4094   case MVT::i32: Opc = AArch64::LSRVWr; break;
4095   case MVT::i64: Opc = AArch64::LSRVXr; break;
4096   }
4097
4098   const TargetRegisterClass *RC =
4099       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4100   if (NeedTrunc) {
4101     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4102     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4103     Op0IsKill = Op1IsKill = true;
4104   }
4105   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4106                                        Op1IsKill);
4107   if (NeedTrunc)
4108     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4109   return ResultReg;
4110 }
4111
4112 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4113                                      bool Op0IsKill, uint64_t Shift,
4114                                      bool IsZExt) {
4115   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4116          "Unexpected source/return type pair.");
4117   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4118           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4119          "Unexpected source value type.");
4120   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4121           RetVT == MVT::i64) && "Unexpected return value type.");
4122
4123   bool Is64Bit = (RetVT == MVT::i64);
4124   unsigned RegSize = Is64Bit ? 64 : 32;
4125   unsigned DstBits = RetVT.getSizeInBits();
4126   unsigned SrcBits = SrcVT.getSizeInBits();
4127   const TargetRegisterClass *RC =
4128       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4129
4130   // Just emit a copy for "zero" shifts.
4131   if (Shift == 0) {
4132     if (RetVT == SrcVT) {
4133       unsigned ResultReg = createResultReg(RC);
4134       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4135               TII.get(TargetOpcode::COPY), ResultReg)
4136       .addReg(Op0, getKillRegState(Op0IsKill));
4137       return ResultReg;
4138     } else
4139       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4140   }
4141
4142   // Don't deal with undefined shifts.
4143   if (Shift >= DstBits)
4144     return 0;
4145
4146   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4147   // {S|U}BFM Wd, Wn, #r, #s
4148   // Wd<s-r:0> = Wn<s:r> when r <= s
4149
4150   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4151   // %2 = lshr i16 %1, 4
4152   // Wd<7-4:0> = Wn<7:4>
4153   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4154   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4155   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4156
4157   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4158   // %2 = lshr i16 %1, 8
4159   // Wd<7-7,0> = Wn<7:7>
4160   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4161   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4162   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4163
4164   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165   // %2 = lshr i16 %1, 12
4166   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4167   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4169   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4170
4171   if (Shift >= SrcBits && IsZExt)
4172     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4173
4174   // It is not possible to fold a sign-extend into the LShr instruction. In this
4175   // case emit a sign-extend.
4176   if (!IsZExt) {
4177     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4178     if (!Op0)
4179       return 0;
4180     Op0IsKill = true;
4181     SrcVT = RetVT;
4182     SrcBits = SrcVT.getSizeInBits();
4183     IsZExt = true;
4184   }
4185
4186   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4187   unsigned ImmS = SrcBits - 1;
4188   static const unsigned OpcTable[2][2] = {
4189     {AArch64::SBFMWri, AArch64::SBFMXri},
4190     {AArch64::UBFMWri, AArch64::UBFMXri}
4191   };
4192   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4193   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4194     unsigned TmpReg = MRI.createVirtualRegister(RC);
4195     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4196             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4197         .addImm(0)
4198         .addReg(Op0, getKillRegState(Op0IsKill))
4199         .addImm(AArch64::sub_32);
4200     Op0 = TmpReg;
4201     Op0IsKill = true;
4202   }
4203   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4204 }
4205
4206 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4207                                      unsigned Op1Reg, bool Op1IsKill) {
4208   unsigned Opc = 0;
4209   bool NeedTrunc = false;
4210   uint64_t Mask = 0;
4211   switch (RetVT.SimpleTy) {
4212   default: return 0;
4213   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4214   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4215   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4216   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4217   }
4218
4219   const TargetRegisterClass *RC =
4220       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4221   if (NeedTrunc) {
4222     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4223     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4224     Op0IsKill = Op1IsKill = true;
4225   }
4226   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4227                                        Op1IsKill);
4228   if (NeedTrunc)
4229     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4230   return ResultReg;
4231 }
4232
4233 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4234                                      bool Op0IsKill, uint64_t Shift,
4235                                      bool IsZExt) {
4236   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4237          "Unexpected source/return type pair.");
4238   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4239           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4240          "Unexpected source value type.");
4241   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4242           RetVT == MVT::i64) && "Unexpected return value type.");
4243
4244   bool Is64Bit = (RetVT == MVT::i64);
4245   unsigned RegSize = Is64Bit ? 64 : 32;
4246   unsigned DstBits = RetVT.getSizeInBits();
4247   unsigned SrcBits = SrcVT.getSizeInBits();
4248   const TargetRegisterClass *RC =
4249       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4250
4251   // Just emit a copy for "zero" shifts.
4252   if (Shift == 0) {
4253     if (RetVT == SrcVT) {
4254       unsigned ResultReg = createResultReg(RC);
4255       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4256               TII.get(TargetOpcode::COPY), ResultReg)
4257       .addReg(Op0, getKillRegState(Op0IsKill));
4258       return ResultReg;
4259     } else
4260       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4261   }
4262
4263   // Don't deal with undefined shifts.
4264   if (Shift >= DstBits)
4265     return 0;
4266
4267   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4268   // {S|U}BFM Wd, Wn, #r, #s
4269   // Wd<s-r:0> = Wn<s:r> when r <= s
4270
4271   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4272   // %2 = ashr i16 %1, 4
4273   // Wd<7-4:0> = Wn<7:4>
4274   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4275   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4276   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4277
4278   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4279   // %2 = ashr i16 %1, 8
4280   // Wd<7-7,0> = Wn<7:7>
4281   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4282   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4283   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4284
4285   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4286   // %2 = ashr i16 %1, 12
4287   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4288   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4289   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4290   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4291
4292   if (Shift >= SrcBits && IsZExt)
4293     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4294
4295   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4296   unsigned ImmS = SrcBits - 1;
4297   static const unsigned OpcTable[2][2] = {
4298     {AArch64::SBFMWri, AArch64::SBFMXri},
4299     {AArch64::UBFMWri, AArch64::UBFMXri}
4300   };
4301   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4302   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4303     unsigned TmpReg = MRI.createVirtualRegister(RC);
4304     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4305             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4306         .addImm(0)
4307         .addReg(Op0, getKillRegState(Op0IsKill))
4308         .addImm(AArch64::sub_32);
4309     Op0 = TmpReg;
4310     Op0IsKill = true;
4311   }
4312   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4313 }
4314
4315 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4316                                      bool IsZExt) {
4317   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4318
4319   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4320   // DestVT are odd things, so test to make sure that they are both types we can
4321   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4322   // bail out to SelectionDAG.
4323   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4324        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4325       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4326        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4327     return 0;
4328
4329   unsigned Opc;
4330   unsigned Imm = 0;
4331
4332   switch (SrcVT.SimpleTy) {
4333   default:
4334     return 0;
4335   case MVT::i1:
4336     return emiti1Ext(SrcReg, DestVT, IsZExt);
4337   case MVT::i8:
4338     if (DestVT == MVT::i64)
4339       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4340     else
4341       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4342     Imm = 7;
4343     break;
4344   case MVT::i16:
4345     if (DestVT == MVT::i64)
4346       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4347     else
4348       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4349     Imm = 15;
4350     break;
4351   case MVT::i32:
4352     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4353     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4354     Imm = 31;
4355     break;
4356   }
4357
4358   // Handle i8 and i16 as i32.
4359   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4360     DestVT = MVT::i32;
4361   else if (DestVT == MVT::i64) {
4362     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4363     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4364             TII.get(AArch64::SUBREG_TO_REG), Src64)
4365         .addImm(0)
4366         .addReg(SrcReg)
4367         .addImm(AArch64::sub_32);
4368     SrcReg = Src64;
4369   }
4370
4371   const TargetRegisterClass *RC =
4372       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4373   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4374 }
4375
4376 static bool isZExtLoad(const MachineInstr *LI) {
4377   switch (LI->getOpcode()) {
4378   default:
4379     return false;
4380   case AArch64::LDURBBi:
4381   case AArch64::LDURHHi:
4382   case AArch64::LDURWi:
4383   case AArch64::LDRBBui:
4384   case AArch64::LDRHHui:
4385   case AArch64::LDRWui:
4386   case AArch64::LDRBBroX:
4387   case AArch64::LDRHHroX:
4388   case AArch64::LDRWroX:
4389   case AArch64::LDRBBroW:
4390   case AArch64::LDRHHroW:
4391   case AArch64::LDRWroW:
4392     return true;
4393   }
4394 }
4395
4396 static bool isSExtLoad(const MachineInstr *LI) {
4397   switch (LI->getOpcode()) {
4398   default:
4399     return false;
4400   case AArch64::LDURSBWi:
4401   case AArch64::LDURSHWi:
4402   case AArch64::LDURSBXi:
4403   case AArch64::LDURSHXi:
4404   case AArch64::LDURSWi:
4405   case AArch64::LDRSBWui:
4406   case AArch64::LDRSHWui:
4407   case AArch64::LDRSBXui:
4408   case AArch64::LDRSHXui:
4409   case AArch64::LDRSWui:
4410   case AArch64::LDRSBWroX:
4411   case AArch64::LDRSHWroX:
4412   case AArch64::LDRSBXroX:
4413   case AArch64::LDRSHXroX:
4414   case AArch64::LDRSWroX:
4415   case AArch64::LDRSBWroW:
4416   case AArch64::LDRSHWroW:
4417   case AArch64::LDRSBXroW:
4418   case AArch64::LDRSHXroW:
4419   case AArch64::LDRSWroW:
4420     return true;
4421   }
4422 }
4423
4424 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4425                                          MVT SrcVT) {
4426   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4427   if (!LI || !LI->hasOneUse())
4428     return false;
4429
4430   // Check if the load instruction has already been selected.
4431   unsigned Reg = lookUpRegForValue(LI);
4432   if (!Reg)
4433     return false;
4434
4435   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4436   if (!MI)
4437     return false;
4438
4439   // Check if the correct load instruction has been emitted - SelectionDAG might
4440   // have emitted a zero-extending load, but we need a sign-extending load.
4441   bool IsZExt = isa<ZExtInst>(I);
4442   const auto *LoadMI = MI;
4443   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4444       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4445     unsigned LoadReg = MI->getOperand(1).getReg();
4446     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4447     assert(LoadMI && "Expected valid instruction");
4448   }
4449   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4450     return false;
4451
4452   // Nothing to be done.
4453   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4454     updateValueMap(I, Reg);
4455     return true;
4456   }
4457
4458   if (IsZExt) {
4459     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4460     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4461             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4462         .addImm(0)
4463         .addReg(Reg, getKillRegState(true))
4464         .addImm(AArch64::sub_32);
4465     Reg = Reg64;
4466   } else {
4467     assert((MI->getOpcode() == TargetOpcode::COPY &&
4468             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4469            "Expected copy instruction");
4470     Reg = MI->getOperand(1).getReg();
4471     MI->eraseFromParent();
4472   }
4473   updateValueMap(I, Reg);
4474   return true;
4475 }
4476
4477 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4478   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4479          "Unexpected integer extend instruction.");
4480   MVT RetVT;
4481   MVT SrcVT;
4482   if (!isTypeSupported(I->getType(), RetVT))
4483     return false;
4484
4485   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4486     return false;
4487
4488   // Try to optimize already sign-/zero-extended values from load instructions.
4489   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4490     return true;
4491
4492   unsigned SrcReg = getRegForValue(I->getOperand(0));
4493   if (!SrcReg)
4494     return false;
4495   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4496
4497   // Try to optimize already sign-/zero-extended values from function arguments.
4498   bool IsZExt = isa<ZExtInst>(I);
4499   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4500     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4501       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4502         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4503         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4504                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4505             .addImm(0)
4506             .addReg(SrcReg, getKillRegState(SrcIsKill))
4507             .addImm(AArch64::sub_32);
4508         SrcReg = ResultReg;
4509       }
4510       // Conservatively clear all kill flags from all uses, because we are
4511       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4512       // level. The result of the instruction at IR level might have been
4513       // trivially dead, which is now not longer true.
4514       unsigned UseReg = lookUpRegForValue(I);
4515       if (UseReg)
4516         MRI.clearKillFlags(UseReg);
4517
4518       updateValueMap(I, SrcReg);
4519       return true;
4520     }
4521   }
4522
4523   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4524   if (!ResultReg)
4525     return false;
4526
4527   updateValueMap(I, ResultReg);
4528   return true;
4529 }
4530
4531 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4532   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4533   if (!DestEVT.isSimple())
4534     return false;
4535
4536   MVT DestVT = DestEVT.getSimpleVT();
4537   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4538     return false;
4539
4540   unsigned DivOpc;
4541   bool Is64bit = (DestVT == MVT::i64);
4542   switch (ISDOpcode) {
4543   default:
4544     return false;
4545   case ISD::SREM:
4546     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4547     break;
4548   case ISD::UREM:
4549     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4550     break;
4551   }
4552   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4553   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4554   if (!Src0Reg)
4555     return false;
4556   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4557
4558   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4559   if (!Src1Reg)
4560     return false;
4561   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4562
4563   const TargetRegisterClass *RC =
4564       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4565   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4566                                      Src1Reg, /*IsKill=*/false);
4567   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4568   // The remainder is computed as numerator - (quotient * denominator) using the
4569   // MSUB instruction.
4570   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4571                                         Src1Reg, Src1IsKill, Src0Reg,
4572                                         Src0IsKill);
4573   updateValueMap(I, ResultReg);
4574   return true;
4575 }
4576
4577 bool AArch64FastISel::selectMul(const Instruction *I) {
4578   MVT VT;
4579   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4580     return false;
4581
4582   if (VT.isVector())
4583     return selectBinaryOp(I, ISD::MUL);
4584
4585   const Value *Src0 = I->getOperand(0);
4586   const Value *Src1 = I->getOperand(1);
4587   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4588     if (C->getValue().isPowerOf2())
4589       std::swap(Src0, Src1);
4590
4591   // Try to simplify to a shift instruction.
4592   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4593     if (C->getValue().isPowerOf2()) {
4594       uint64_t ShiftVal = C->getValue().logBase2();
4595       MVT SrcVT = VT;
4596       bool IsZExt = true;
4597       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4598         if (!isIntExtFree(ZExt)) {
4599           MVT VT;
4600           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4601             SrcVT = VT;
4602             IsZExt = true;
4603             Src0 = ZExt->getOperand(0);
4604           }
4605         }
4606       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4607         if (!isIntExtFree(SExt)) {
4608           MVT VT;
4609           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4610             SrcVT = VT;
4611             IsZExt = false;
4612             Src0 = SExt->getOperand(0);
4613           }
4614         }
4615       }
4616
4617       unsigned Src0Reg = getRegForValue(Src0);
4618       if (!Src0Reg)
4619         return false;
4620       bool Src0IsKill = hasTrivialKill(Src0);
4621
4622       unsigned ResultReg =
4623           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4624
4625       if (ResultReg) {
4626         updateValueMap(I, ResultReg);
4627         return true;
4628       }
4629     }
4630
4631   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4632   if (!Src0Reg)
4633     return false;
4634   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4635
4636   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4637   if (!Src1Reg)
4638     return false;
4639   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4640
4641   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4642
4643   if (!ResultReg)
4644     return false;
4645
4646   updateValueMap(I, ResultReg);
4647   return true;
4648 }
4649
4650 bool AArch64FastISel::selectShift(const Instruction *I) {
4651   MVT RetVT;
4652   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4653     return false;
4654
4655   if (RetVT.isVector())
4656     return selectOperator(I, I->getOpcode());
4657
4658   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4659     unsigned ResultReg = 0;
4660     uint64_t ShiftVal = C->getZExtValue();
4661     MVT SrcVT = RetVT;
4662     bool IsZExt = I->getOpcode() != Instruction::AShr;
4663     const Value *Op0 = I->getOperand(0);
4664     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4665       if (!isIntExtFree(ZExt)) {
4666         MVT TmpVT;
4667         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4668           SrcVT = TmpVT;
4669           IsZExt = true;
4670           Op0 = ZExt->getOperand(0);
4671         }
4672       }
4673     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4674       if (!isIntExtFree(SExt)) {
4675         MVT TmpVT;
4676         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4677           SrcVT = TmpVT;
4678           IsZExt = false;
4679           Op0 = SExt->getOperand(0);
4680         }
4681       }
4682     }
4683
4684     unsigned Op0Reg = getRegForValue(Op0);
4685     if (!Op0Reg)
4686       return false;
4687     bool Op0IsKill = hasTrivialKill(Op0);
4688
4689     switch (I->getOpcode()) {
4690     default: llvm_unreachable("Unexpected instruction.");
4691     case Instruction::Shl:
4692       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4693       break;
4694     case Instruction::AShr:
4695       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4696       break;
4697     case Instruction::LShr:
4698       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4699       break;
4700     }
4701     if (!ResultReg)
4702       return false;
4703
4704     updateValueMap(I, ResultReg);
4705     return true;
4706   }
4707
4708   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4709   if (!Op0Reg)
4710     return false;
4711   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4712
4713   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4714   if (!Op1Reg)
4715     return false;
4716   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4717
4718   unsigned ResultReg = 0;
4719   switch (I->getOpcode()) {
4720   default: llvm_unreachable("Unexpected instruction.");
4721   case Instruction::Shl:
4722     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4723     break;
4724   case Instruction::AShr:
4725     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4726     break;
4727   case Instruction::LShr:
4728     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4729     break;
4730   }
4731
4732   if (!ResultReg)
4733     return false;
4734
4735   updateValueMap(I, ResultReg);
4736   return true;
4737 }
4738
4739 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4740   MVT RetVT, SrcVT;
4741
4742   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4743     return false;
4744   if (!isTypeLegal(I->getType(), RetVT))
4745     return false;
4746
4747   unsigned Opc;
4748   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4749     Opc = AArch64::FMOVWSr;
4750   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4751     Opc = AArch64::FMOVXDr;
4752   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4753     Opc = AArch64::FMOVSWr;
4754   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4755     Opc = AArch64::FMOVDXr;
4756   else
4757     return false;
4758
4759   const TargetRegisterClass *RC = nullptr;
4760   switch (RetVT.SimpleTy) {
4761   default: llvm_unreachable("Unexpected value type.");
4762   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4763   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4764   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4765   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4766   }
4767   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4768   if (!Op0Reg)
4769     return false;
4770   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4771   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4772
4773   if (!ResultReg)
4774     return false;
4775
4776   updateValueMap(I, ResultReg);
4777   return true;
4778 }
4779
4780 bool AArch64FastISel::selectFRem(const Instruction *I) {
4781   MVT RetVT;
4782   if (!isTypeLegal(I->getType(), RetVT))
4783     return false;
4784
4785   RTLIB::Libcall LC;
4786   switch (RetVT.SimpleTy) {
4787   default:
4788     return false;
4789   case MVT::f32:
4790     LC = RTLIB::REM_F32;
4791     break;
4792   case MVT::f64:
4793     LC = RTLIB::REM_F64;
4794     break;
4795   }
4796
4797   ArgListTy Args;
4798   Args.reserve(I->getNumOperands());
4799
4800   // Populate the argument list.
4801   for (auto &Arg : I->operands()) {
4802     ArgListEntry Entry;
4803     Entry.Val = Arg;
4804     Entry.Ty = Arg->getType();
4805     Args.push_back(Entry);
4806   }
4807
4808   CallLoweringInfo CLI;
4809   MCContext &Ctx = MF->getContext();
4810   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4811                 TLI.getLibcallName(LC), std::move(Args));
4812   if (!lowerCallTo(CLI))
4813     return false;
4814   updateValueMap(I, CLI.ResultReg);
4815   return true;
4816 }
4817
4818 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4819   MVT VT;
4820   if (!isTypeLegal(I->getType(), VT))
4821     return false;
4822
4823   if (!isa<ConstantInt>(I->getOperand(1)))
4824     return selectBinaryOp(I, ISD::SDIV);
4825
4826   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4827   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4828       !(C.isPowerOf2() || (-C).isPowerOf2()))
4829     return selectBinaryOp(I, ISD::SDIV);
4830
4831   unsigned Lg2 = C.countTrailingZeros();
4832   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4833   if (!Src0Reg)
4834     return false;
4835   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4836
4837   if (cast<BinaryOperator>(I)->isExact()) {
4838     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4839     if (!ResultReg)
4840       return false;
4841     updateValueMap(I, ResultReg);
4842     return true;
4843   }
4844
4845   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4846   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4847   if (!AddReg)
4848     return false;
4849
4850   // (Src0 < 0) ? Pow2 - 1 : 0;
4851   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4852     return false;
4853
4854   unsigned SelectOpc;
4855   const TargetRegisterClass *RC;
4856   if (VT == MVT::i64) {
4857     SelectOpc = AArch64::CSELXr;
4858     RC = &AArch64::GPR64RegClass;
4859   } else {
4860     SelectOpc = AArch64::CSELWr;
4861     RC = &AArch64::GPR32RegClass;
4862   }
4863   unsigned SelectReg =
4864       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4865                        Src0IsKill, AArch64CC::LT);
4866   if (!SelectReg)
4867     return false;
4868
4869   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4870   // negate the result.
4871   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4872   unsigned ResultReg;
4873   if (C.isNegative())
4874     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4875                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4876   else
4877     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4878
4879   if (!ResultReg)
4880     return false;
4881
4882   updateValueMap(I, ResultReg);
4883   return true;
4884 }
4885
4886 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4887 /// have to duplicate it for AArch64, because otherwise we would fail during the
4888 /// sign-extend emission.
4889 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4890   unsigned IdxN = getRegForValue(Idx);
4891   if (IdxN == 0)
4892     // Unhandled operand. Halt "fast" selection and bail.
4893     return std::pair<unsigned, bool>(0, false);
4894
4895   bool IdxNIsKill = hasTrivialKill(Idx);
4896
4897   // If the index is smaller or larger than intptr_t, truncate or extend it.
4898   MVT PtrVT = TLI.getPointerTy(DL);
4899   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4900   if (IdxVT.bitsLT(PtrVT)) {
4901     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4902     IdxNIsKill = true;
4903   } else if (IdxVT.bitsGT(PtrVT))
4904     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4905   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4906 }
4907
4908 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4909 /// duplicate it for AArch64, because otherwise we would bail out even for
4910 /// simple cases. This is because the standard fastEmit functions don't cover
4911 /// MUL at all and ADD is lowered very inefficientily.
4912 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4913   unsigned N = getRegForValue(I->getOperand(0));
4914   if (!N)
4915     return false;
4916   bool NIsKill = hasTrivialKill(I->getOperand(0));
4917
4918   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4919   // into a single N = N + TotalOffset.
4920   uint64_t TotalOffs = 0;
4921   MVT VT = TLI.getPointerTy(DL);
4922   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4923        GTI != E; ++GTI) {
4924     const Value *Idx = GTI.getOperand();
4925     if (auto *StTy = GTI.getStructTypeOrNull()) {
4926       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4927       // N = N + Offset
4928       if (Field)
4929         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4930     } else {
4931       Type *Ty = GTI.getIndexedType();
4932
4933       // If this is a constant subscript, handle it quickly.
4934       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4935         if (CI->isZero())
4936           continue;
4937         // N = N + Offset
4938         TotalOffs +=
4939             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4940         continue;
4941       }
4942       if (TotalOffs) {
4943         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4944         if (!N)
4945           return false;
4946         NIsKill = true;
4947         TotalOffs = 0;
4948       }
4949
4950       // N = N + Idx * ElementSize;
4951       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4952       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4953       unsigned IdxN = Pair.first;
4954       bool IdxNIsKill = Pair.second;
4955       if (!IdxN)
4956         return false;
4957
4958       if (ElementSize != 1) {
4959         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4960         if (!C)
4961           return false;
4962         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4963         if (!IdxN)
4964           return false;
4965         IdxNIsKill = true;
4966       }
4967       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4968       if (!N)
4969         return false;
4970     }
4971   }
4972   if (TotalOffs) {
4973     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4974     if (!N)
4975       return false;
4976   }
4977   updateValueMap(I, N);
4978   return true;
4979 }
4980
4981 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4982   assert(TM.getOptLevel() == CodeGenOpt::None &&
4983          "cmpxchg survived AtomicExpand at optlevel > -O0");
4984
4985   auto *RetPairTy = cast<StructType>(I->getType());
4986   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4987   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4988          "cmpxchg has a non-i1 status result");
4989
4990   MVT VT;
4991   if (!isTypeLegal(RetTy, VT))
4992     return false;
4993
4994   const TargetRegisterClass *ResRC;
4995   unsigned Opc, CmpOpc;
4996   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4997   // extractvalue selection doesn't support that.
4998   if (VT == MVT::i32) {
4999     Opc = AArch64::CMP_SWAP_32;
5000     CmpOpc = AArch64::SUBSWrs;
5001     ResRC = &AArch64::GPR32RegClass;
5002   } else if (VT == MVT::i64) {
5003     Opc = AArch64::CMP_SWAP_64;
5004     CmpOpc = AArch64::SUBSXrs;
5005     ResRC = &AArch64::GPR64RegClass;
5006   } else {
5007     return false;
5008   }
5009
5010   const MCInstrDesc &II = TII.get(Opc);
5011
5012   const unsigned AddrReg = constrainOperandRegClass(
5013       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5014   const unsigned DesiredReg = constrainOperandRegClass(
5015       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5016   const unsigned NewReg = constrainOperandRegClass(
5017       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5018
5019   const unsigned ResultReg1 = createResultReg(ResRC);
5020   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5021   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5022
5023   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5024   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5025       .addDef(ResultReg1)
5026       .addDef(ScratchReg)
5027       .addUse(AddrReg)
5028       .addUse(DesiredReg)
5029       .addUse(NewReg);
5030
5031   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5032       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5033       .addUse(ResultReg1)
5034       .addUse(DesiredReg)
5035       .addImm(0);
5036
5037   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5038       .addDef(ResultReg2)
5039       .addUse(AArch64::WZR)
5040       .addUse(AArch64::WZR)
5041       .addImm(AArch64CC::NE);
5042
5043   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5044   updateValueMap(I, ResultReg1, 2);
5045   return true;
5046 }
5047
5048 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5049   switch (I->getOpcode()) {
5050   default:
5051     break;
5052   case Instruction::Add:
5053   case Instruction::Sub:
5054     return selectAddSub(I);
5055   case Instruction::Mul:
5056     return selectMul(I);
5057   case Instruction::SDiv:
5058     return selectSDiv(I);
5059   case Instruction::SRem:
5060     if (!selectBinaryOp(I, ISD::SREM))
5061       return selectRem(I, ISD::SREM);
5062     return true;
5063   case Instruction::URem:
5064     if (!selectBinaryOp(I, ISD::UREM))
5065       return selectRem(I, ISD::UREM);
5066     return true;
5067   case Instruction::Shl:
5068   case Instruction::LShr:
5069   case Instruction::AShr:
5070     return selectShift(I);
5071   case Instruction::And:
5072   case Instruction::Or:
5073   case Instruction::Xor:
5074     return selectLogicalOp(I);
5075   case Instruction::Br:
5076     return selectBranch(I);
5077   case Instruction::IndirectBr:
5078     return selectIndirectBr(I);
5079   case Instruction::BitCast:
5080     if (!FastISel::selectBitCast(I))
5081       return selectBitCast(I);
5082     return true;
5083   case Instruction::FPToSI:
5084     if (!selectCast(I, ISD::FP_TO_SINT))
5085       return selectFPToInt(I, /*Signed=*/true);
5086     return true;
5087   case Instruction::FPToUI:
5088     return selectFPToInt(I, /*Signed=*/false);
5089   case Instruction::ZExt:
5090   case Instruction::SExt:
5091     return selectIntExt(I);
5092   case Instruction::Trunc:
5093     if (!selectCast(I, ISD::TRUNCATE))
5094       return selectTrunc(I);
5095     return true;
5096   case Instruction::FPExt:
5097     return selectFPExt(I);
5098   case Instruction::FPTrunc:
5099     return selectFPTrunc(I);
5100   case Instruction::SIToFP:
5101     if (!selectCast(I, ISD::SINT_TO_FP))
5102       return selectIntToFP(I, /*Signed=*/true);
5103     return true;
5104   case Instruction::UIToFP:
5105     return selectIntToFP(I, /*Signed=*/false);
5106   case Instruction::Load:
5107     return selectLoad(I);
5108   case Instruction::Store:
5109     return selectStore(I);
5110   case Instruction::FCmp:
5111   case Instruction::ICmp:
5112     return selectCmp(I);
5113   case Instruction::Select:
5114     return selectSelect(I);
5115   case Instruction::Ret:
5116     return selectRet(I);
5117   case Instruction::FRem:
5118     return selectFRem(I);
5119   case Instruction::GetElementPtr:
5120     return selectGetElementPtr(I);
5121   case Instruction::AtomicCmpXchg:
5122     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5123   }
5124
5125   // fall-back to target-independent instruction selection.
5126   return selectOperator(I, I->getOpcode());
5127   // Silence warnings.
5128   (void)&CC_AArch64_DarwinPCS_VarArg;
5129 }
5130
5131 namespace llvm {
5132
5133 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5134                                         const TargetLibraryInfo *LibInfo) {
5135   return new AArch64FastISel(FuncInfo, LibInfo);
5136 }
5137
5138 } // end namespace llvm