contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64CallingConvention.h"
  18 #include "AArch64RegisterInfo.h"
  19 #include "AArch64Subtarget.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "Utils/AArch64BaseInfo.h"
  22 #include "llvm/ADT/APFloat.h"
  23 #include "llvm/ADT/APInt.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/Analysis/BranchProbabilityInfo.h"
  27 #include "llvm/CodeGen/CallingConvLower.h"
  28 #include "llvm/CodeGen/FastISel.h"
  29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  30 #include "llvm/CodeGen/ISDOpcodes.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineConstantPool.h"
  33 #include "llvm/CodeGen/MachineFrameInfo.h"
  34 #include "llvm/CodeGen/MachineInstr.h"
  35 #include "llvm/CodeGen/MachineInstrBuilder.h"
  36 #include "llvm/CodeGen/MachineMemOperand.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/MachineValueType.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/ValueTypes.h"
  41 #include "llvm/IR/Argument.h"
  42 #include "llvm/IR/Attributes.h"
  43 #include "llvm/IR/BasicBlock.h"
  44 #include "llvm/IR/CallingConv.h"
  45 #include "llvm/IR/Constant.h"
  46 #include "llvm/IR/Constants.h"
  47 #include "llvm/IR/DataLayout.h"
  48 #include "llvm/IR/DerivedTypes.h"
  49 #include "llvm/IR/Function.h"
  50 #include "llvm/IR/GetElementPtrTypeIterator.h"
  51 #include "llvm/IR/GlobalValue.h"
  52 #include "llvm/IR/InstrTypes.h"
  53 #include "llvm/IR/Instruction.h"
  54 #include "llvm/IR/Instructions.h"
  55 #include "llvm/IR/IntrinsicInst.h"
  56 #include "llvm/IR/Operator.h"
  57 #include "llvm/IR/Type.h"
  58 #include "llvm/IR/User.h"
  59 #include "llvm/IR/Value.h"
  60 #include "llvm/MC/MCInstrDesc.h"
  61 #include "llvm/MC/MCRegisterInfo.h"
  62 #include "llvm/MC/MCSymbol.h"
  63 #include "llvm/Support/AtomicOrdering.h"
  64 #include "llvm/Support/Casting.h"
  65 #include "llvm/Support/CodeGen.h"
  66 #include "llvm/Support/ErrorHandling.h"
  67 #include "llvm/Support/MathExtras.h"
  68 #include <algorithm>
  69 #include <cassert>
  70 #include <cstdint>
  71 #include <iterator>
  72 #include <utility>
  73
  74 using namespace llvm;
  75
  76 namespace {
  77
  78 class AArch64FastISel final : public FastISel {
  79   class Address {
  80   public:
  81     typedef enum {
  82       RegBase,
  83       FrameIndexBase
  84     } BaseKind;
  85
  86   private:
  87     BaseKind Kind = RegBase;
  88     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
  89     union {
  90       unsigned Reg;
  91       int FI;
  92     } Base;
  93     unsigned OffsetReg = 0;
  94     unsigned Shift = 0;
  95     int64_t Offset = 0;
  96     const GlobalValue *GV = nullptr;
  97
  98   public:
  99     Address() { Base.Reg = 0; }
 100
 101     void setKind(BaseKind K) { Kind = K; }
 102     BaseKind getKind() const { return Kind; }
 103     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
 104     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
 105     bool isRegBase() const { return Kind == RegBase; }
 106     bool isFIBase() const { return Kind == FrameIndexBase; }
 107
 108     void setReg(unsigned Reg) {
 109       assert(isRegBase() && "Invalid base register access!");
 110       Base.Reg = Reg;
 111     }
 112
 113     unsigned getReg() const {
 114       assert(isRegBase() && "Invalid base register access!");
 115       return Base.Reg;
 116     }
 117
 118     void setOffsetReg(unsigned Reg) {
 119       OffsetReg = Reg;
 120     }
 121
 122     unsigned getOffsetReg() const {
 123       return OffsetReg;
 124     }
 125
 126     void setFI(unsigned FI) {
 127       assert(isFIBase() && "Invalid base frame index  access!");
 128       Base.FI = FI;
 129     }
 130
 131     unsigned getFI() const {
 132       assert(isFIBase() && "Invalid base frame index access!");
 133       return Base.FI;
 134     }
 135
 136     void setOffset(int64_t O) { Offset = O; }
 137     int64_t getOffset() { return Offset; }
 138     void setShift(unsigned S) { Shift = S; }
 139     unsigned getShift() { return Shift; }
 140
 141     void setGlobalValue(const GlobalValue *G) { GV = G; }
 142     const GlobalValue *getGlobalValue() { return GV; }
 143   };
 144
 145   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 146   /// make the right decision when generating code for different targets.
 147   const AArch64Subtarget *Subtarget;
 148   LLVMContext *Context;
 149
 150   bool fastLowerArguments() override;
 151   bool fastLowerCall(CallLoweringInfo &CLI) override;
 152   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 153
 154 private:
 155   // Selection routines.
 156   bool selectAddSub(const Instruction *I);
 157   bool selectLogicalOp(const Instruction *I);
 158   bool selectLoad(const Instruction *I);
 159   bool selectStore(const Instruction *I);
 160   bool selectBranch(const Instruction *I);
 161   bool selectIndirectBr(const Instruction *I);
 162   bool selectCmp(const Instruction *I);
 163   bool selectSelect(const Instruction *I);
 164   bool selectFPExt(const Instruction *I);
 165   bool selectFPTrunc(const Instruction *I);
 166   bool selectFPToInt(const Instruction *I, bool Signed);
 167   bool selectIntToFP(const Instruction *I, bool Signed);
 168   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 169   bool selectRet(const Instruction *I);
 170   bool selectTrunc(const Instruction *I);
 171   bool selectIntExt(const Instruction *I);
 172   bool selectMul(const Instruction *I);
 173   bool selectShift(const Instruction *I);
 174   bool selectBitCast(const Instruction *I);
 175   bool selectFRem(const Instruction *I);
 176   bool selectSDiv(const Instruction *I);
 177   bool selectGetElementPtr(const Instruction *I);
 178   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 179
 180   // Utility helper routines.
 181   bool isTypeLegal(Type *Ty, MVT &VT);
 182   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 183   bool isValueAvailable(const Value *V) const;
 184   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 185   bool computeCallAddress(const Value *V, Address &Addr);
 186   bool simplifyAddress(Address &Addr, MVT VT);
 187   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 188                             MachineMemOperand::Flags Flags,
 189                             unsigned ScaleFactor, MachineMemOperand *MMO);
 190   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 191   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 192                           unsigned Alignment);
 193   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 194                          const Value *Cond);
 195   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 196   bool optimizeSelect(const SelectInst *SI);
 197   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 198
 199   // Emit helper routines.
 200   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 201                       const Value *RHS, bool SetFlags = false,
 202                       bool WantResult = true,  bool IsZExt = false);
 203   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 204                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 205                          bool SetFlags = false, bool WantResult = true);
 206   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 207                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 208                          bool WantResult = true);
 209   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 210                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 211                          AArch64_AM::ShiftExtendType ShiftType,
 212                          uint64_t ShiftImm, bool SetFlags = false,
 213                          bool WantResult = true);
 214   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 215                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 216                           AArch64_AM::ShiftExtendType ExtType,
 217                           uint64_t ShiftImm, bool SetFlags = false,
 218                          bool WantResult = true);
 219
 220   // Emit functions.
 221   bool emitCompareAndBranch(const BranchInst *BI);
 222   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 223   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 224   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 225   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 226   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 227                     MachineMemOperand *MMO = nullptr);
 228   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 229                  MachineMemOperand *MMO = nullptr);
 230   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
 231                         MachineMemOperand *MMO = nullptr);
 232   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 233   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 234   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 235                    bool SetFlags = false, bool WantResult = true,
 236                    bool IsZExt = false);
 237   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 238   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 239                    bool SetFlags = false, bool WantResult = true,
 240                    bool IsZExt = false);
 241   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 242                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 243   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 244                        unsigned RHSReg, bool RHSIsKill,
 245                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 246                        bool WantResult = true);
 247   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 248                          const Value *RHS);
 249   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 250                             bool LHSIsKill, uint64_t Imm);
 251   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 252                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 253                             uint64_t ShiftImm);
 254   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 255   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 256                       unsigned Op1, bool Op1IsKill);
 257   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 258                         unsigned Op1, bool Op1IsKill);
 259   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 260                         unsigned Op1, bool Op1IsKill);
 261   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 262                       unsigned Op1Reg, bool Op1IsKill);
 263   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 264                       uint64_t Imm, bool IsZExt = true);
 265   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 266                       unsigned Op1Reg, bool Op1IsKill);
 267   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 268                       uint64_t Imm, bool IsZExt = true);
 269   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 270                       unsigned Op1Reg, bool Op1IsKill);
 271   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 272                       uint64_t Imm, bool IsZExt = false);
 273
 274   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 275   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 276   unsigned materializeGV(const GlobalValue *GV);
 277
 278   // Call handling routines.
 279 private:
 280   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 281   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 282                        unsigned &NumBytes);
 283   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 284
 285 public:
 286   // Backend specific FastISel code.
 287   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 288   unsigned fastMaterializeConstant(const Constant *C) override;
 289   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 290
 291   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 292                            const TargetLibraryInfo *LibInfo)
 293       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 294     Subtarget =
 295         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
 296     Context = &FuncInfo.Fn->getContext();
 297   }
 298
 299   bool fastSelectInstruction(const Instruction *I) override;
 300
 301 #include "AArch64GenFastISel.inc"
 302 };
 303
 304 } // end anonymous namespace
 305
 306 #include "AArch64GenCallingConv.inc"
 307
 308 /// \brief Check if the sign-/zero-extend will be a noop.
 309 static bool isIntExtFree(const Instruction *I) {
 310   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 311          "Unexpected integer extend instruction.");
 312   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 313          "Unexpected value type.");
 314   bool IsZExt = isa<ZExtInst>(I);
 315
 316   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 317     if (LI->hasOneUse())
 318       return true;
 319
 320   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 321     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 322       return true;
 323
 324   return false;
 325 }
 326
 327 /// \brief Determine the implicit scale factor that is applied by a memory
 328 /// operation for a given value type.
 329 static unsigned getImplicitScaleFactor(MVT VT) {
 330   switch (VT.SimpleTy) {
 331   default:
 332     return 0;    // invalid
 333   case MVT::i1:  // fall-through
 334   case MVT::i8:
 335     return 1;
 336   case MVT::i16:
 337     return 2;
 338   case MVT::i32: // fall-through
 339   case MVT::f32:
 340     return 4;
 341   case MVT::i64: // fall-through
 342   case MVT::f64:
 343     return 8;
 344   }
 345 }
 346
 347 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 348   if (CC == CallingConv::WebKit_JS)
 349     return CC_AArch64_WebKit_JS;
 350   if (CC == CallingConv::GHC)
 351     return CC_AArch64_GHC;
 352   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 353 }
 354
 355 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 356   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
 357          "Alloca should always return a pointer.");
 358
 359   // Don't handle dynamic allocas.
 360   if (!FuncInfo.StaticAllocaMap.count(AI))
 361     return 0;
 362
 363   DenseMap<const AllocaInst *, int>::iterator SI =
 364       FuncInfo.StaticAllocaMap.find(AI);
 365
 366   if (SI != FuncInfo.StaticAllocaMap.end()) {
 367     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 368     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 369             ResultReg)
 370         .addFrameIndex(SI->second)
 371         .addImm(0)
 372         .addImm(0);
 373     return ResultReg;
 374   }
 375
 376   return 0;
 377 }
 378
 379 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 380   if (VT > MVT::i64)
 381     return 0;
 382
 383   if (!CI->isZero())
 384     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 385
 386   // Create a copy from the zero register to materialize a "0" value.
 387   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 388                                                    : &AArch64::GPR32RegClass;
 389   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 390   unsigned ResultReg = createResultReg(RC);
 391   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 392           ResultReg).addReg(ZeroReg, getKillRegState(true));
 393   return ResultReg;
 394 }
 395
 396 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 397   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 398   // register, because the immediate version of fmov cannot encode zero.
 399   if (CFP->isNullValue())
 400     return fastMaterializeFloatZero(CFP);
 401
 402   if (VT != MVT::f32 && VT != MVT::f64)
 403     return 0;
 404
 405   const APFloat Val = CFP->getValueAPF();
 406   bool Is64Bit = (VT == MVT::f64);
 407   // This checks to see if we can use FMOV instructions to materialize
 408   // a constant, otherwise we have to materialize via the constant pool.
 409   if (TLI.isFPImmLegal(Val, VT)) {
 410     int Imm =
 411         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 412     assert((Imm != -1) && "Cannot encode floating-point constant.");
 413     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 414     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 415   }
 416
 417   // For the MachO large code model materialize the FP constant in code.
 418   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
 419     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 420     const TargetRegisterClass *RC = Is64Bit ?
 421         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 422
 423     unsigned TmpReg = createResultReg(RC);
 424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
 425         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
 426
 427     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 428     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 429             TII.get(TargetOpcode::COPY), ResultReg)
 430         .addReg(TmpReg, getKillRegState(true));
 431
 432     return ResultReg;
 433   }
 434
 435   // Materialize via constant pool.  MachineConstantPool wants an explicit
 436   // alignment.
 437   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 438   if (Align == 0)
 439     Align = DL.getTypeAllocSize(CFP->getType());
 440
 441   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 442   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 443   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 444           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 445
 446   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 447   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 448   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 449       .addReg(ADRPReg)
 450       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 451   return ResultReg;
 452 }
 453
 454 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 455   // We can't handle thread-local variables quickly yet.
 456   if (GV->isThreadLocal())
 457     return 0;
 458
 459   // MachO still uses GOT for large code-model accesses, but ELF requires
 460   // movz/movk sequences, which FastISel doesn't handle yet.
 461   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
 462     return 0;
 463
 464   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 465
 466   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
 467   if (!DestEVT.isSimple())
 468     return 0;
 469
 470   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 471   unsigned ResultReg;
 472
 473   if (OpFlags & AArch64II::MO_GOT) {
 474     // ADRP + LDRX
 475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 476             ADRPReg)
 477       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 478
 479     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 480     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 481             ResultReg)
 482       .addReg(ADRPReg)
 483       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 484                         AArch64II::MO_NC);
 485   } else {
 486     // ADRP + ADDX
 487     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 488             ADRPReg)
 489       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 490
 491     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 492     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 493             ResultReg)
 494       .addReg(ADRPReg)
 495       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 496       .addImm(0);
 497   }
 498   return ResultReg;
 499 }
 500
 501 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 502   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 503
 504   // Only handle simple types.
 505   if (!CEVT.isSimple())
 506     return 0;
 507   MVT VT = CEVT.getSimpleVT();
 508
 509   if (const auto *CI = dyn_cast<ConstantInt>(C))
 510     return materializeInt(CI, VT);
 511   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 512     return materializeFP(CFP, VT);
 513   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 514     return materializeGV(GV);
 515
 516   return 0;
 517 }
 518
 519 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 520   assert(CFP->isNullValue() &&
 521          "Floating-point constant is not a positive zero.");
 522   MVT VT;
 523   if (!isTypeLegal(CFP->getType(), VT))
 524     return 0;
 525
 526   if (VT != MVT::f32 && VT != MVT::f64)
 527     return 0;
 528
 529   bool Is64Bit = (VT == MVT::f64);
 530   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 531   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 532   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 533 }
 534
 535 /// \brief Check if the multiply is by a power-of-2 constant.
 536 static bool isMulPowOf2(const Value *I) {
 537   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 538     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 539       if (C->getValue().isPowerOf2())
 540         return true;
 541     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 542       if (C->getValue().isPowerOf2())
 543         return true;
 544   }
 545   return false;
 546 }
 547
 548 // Computes the address to get to an object.
 549 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 550 {
 551   const User *U = nullptr;
 552   unsigned Opcode = Instruction::UserOp1;
 553   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 554     // Don't walk into other basic blocks unless the object is an alloca from
 555     // another block, otherwise it may not have a virtual register assigned.
 556     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 557         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 558       Opcode = I->getOpcode();
 559       U = I;
 560     }
 561   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 562     Opcode = C->getOpcode();
 563     U = C;
 564   }
 565
 566   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
 567     if (Ty->getAddressSpace() > 255)
 568       // Fast instruction selection doesn't support the special
 569       // address spaces.
 570       return false;
 571
 572   switch (Opcode) {
 573   default:
 574     break;
 575   case Instruction::BitCast:
 576     // Look through bitcasts.
 577     return computeAddress(U->getOperand(0), Addr, Ty);
 578
 579   case Instruction::IntToPtr:
 580     // Look past no-op inttoptrs.
 581     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 582         TLI.getPointerTy(DL))
 583       return computeAddress(U->getOperand(0), Addr, Ty);
 584     break;
 585
 586   case Instruction::PtrToInt:
 587     // Look past no-op ptrtoints.
 588     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 589       return computeAddress(U->getOperand(0), Addr, Ty);
 590     break;
 591
 592   case Instruction::GetElementPtr: {
 593     Address SavedAddr = Addr;
 594     uint64_t TmpOffset = Addr.getOffset();
 595
 596     // Iterate through the GEP folding the constants into offsets where
 597     // we can.
 598     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
 599          GTI != E; ++GTI) {
 600       const Value *Op = GTI.getOperand();
 601       if (StructType *STy = GTI.getStructTypeOrNull()) {
 602         const StructLayout *SL = DL.getStructLayout(STy);
 603         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 604         TmpOffset += SL->getElementOffset(Idx);
 605       } else {
 606         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 607         while (true) {
 608           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 609             // Constant-offset addressing.
 610             TmpOffset += CI->getSExtValue() * S;
 611             break;
 612           }
 613           if (canFoldAddIntoGEP(U, Op)) {
 614             // A compatible add with a constant operand. Fold the constant.
 615             ConstantInt *CI =
 616                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 617             TmpOffset += CI->getSExtValue() * S;
 618             // Iterate on the other operand.
 619             Op = cast<AddOperator>(Op)->getOperand(0);
 620             continue;
 621           }
 622           // Unsupported
 623           goto unsupported_gep;
 624         }
 625       }
 626     }
 627
 628     // Try to grab the base operand now.
 629     Addr.setOffset(TmpOffset);
 630     if (computeAddress(U->getOperand(0), Addr, Ty))
 631       return true;
 632
 633     // We failed, restore everything and try the other options.
 634     Addr = SavedAddr;
 635
 636   unsupported_gep:
 637     break;
 638   }
 639   case Instruction::Alloca: {
 640     const AllocaInst *AI = cast<AllocaInst>(Obj);
 641     DenseMap<const AllocaInst *, int>::iterator SI =
 642         FuncInfo.StaticAllocaMap.find(AI);
 643     if (SI != FuncInfo.StaticAllocaMap.end()) {
 644       Addr.setKind(Address::FrameIndexBase);
 645       Addr.setFI(SI->second);
 646       return true;
 647     }
 648     break;
 649   }
 650   case Instruction::Add: {
 651     // Adds of constants are common and easy enough.
 652     const Value *LHS = U->getOperand(0);
 653     const Value *RHS = U->getOperand(1);
 654
 655     if (isa<ConstantInt>(LHS))
 656       std::swap(LHS, RHS);
 657
 658     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 659       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 660       return computeAddress(LHS, Addr, Ty);
 661     }
 662
 663     Address Backup = Addr;
 664     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 665       return true;
 666     Addr = Backup;
 667
 668     break;
 669   }
 670   case Instruction::Sub: {
 671     // Subs of constants are common and easy enough.
 672     const Value *LHS = U->getOperand(0);
 673     const Value *RHS = U->getOperand(1);
 674
 675     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 676       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 677       return computeAddress(LHS, Addr, Ty);
 678     }
 679     break;
 680   }
 681   case Instruction::Shl: {
 682     if (Addr.getOffsetReg())
 683       break;
 684
 685     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 686     if (!CI)
 687       break;
 688
 689     unsigned Val = CI->getZExtValue();
 690     if (Val < 1 || Val > 3)
 691       break;
 692
 693     uint64_t NumBytes = 0;
 694     if (Ty && Ty->isSized()) {
 695       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 696       NumBytes = NumBits / 8;
 697       if (!isPowerOf2_64(NumBits))
 698         NumBytes = 0;
 699     }
 700
 701     if (NumBytes != (1ULL << Val))
 702       break;
 703
 704     Addr.setShift(Val);
 705     Addr.setExtendType(AArch64_AM::LSL);
 706
 707     const Value *Src = U->getOperand(0);
 708     if (const auto *I = dyn_cast<Instruction>(Src)) {
 709       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 710         // Fold the zext or sext when it won't become a noop.
 711         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 712           if (!isIntExtFree(ZE) &&
 713               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 714             Addr.setExtendType(AArch64_AM::UXTW);
 715             Src = ZE->getOperand(0);
 716           }
 717         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 718           if (!isIntExtFree(SE) &&
 719               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 720             Addr.setExtendType(AArch64_AM::SXTW);
 721             Src = SE->getOperand(0);
 722           }
 723         }
 724       }
 725     }
 726
 727     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 728       if (AI->getOpcode() == Instruction::And) {
 729         const Value *LHS = AI->getOperand(0);
 730         const Value *RHS = AI->getOperand(1);
 731
 732         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 733           if (C->getValue() == 0xffffffff)
 734             std::swap(LHS, RHS);
 735
 736         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 737           if (C->getValue() == 0xffffffff) {
 738             Addr.setExtendType(AArch64_AM::UXTW);
 739             unsigned Reg = getRegForValue(LHS);
 740             if (!Reg)
 741               return false;
 742             bool RegIsKill = hasTrivialKill(LHS);
 743             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 744                                              AArch64::sub_32);
 745             Addr.setOffsetReg(Reg);
 746             return true;
 747           }
 748       }
 749
 750     unsigned Reg = getRegForValue(Src);
 751     if (!Reg)
 752       return false;
 753     Addr.setOffsetReg(Reg);
 754     return true;
 755   }
 756   case Instruction::Mul: {
 757     if (Addr.getOffsetReg())
 758       break;
 759
 760     if (!isMulPowOf2(U))
 761       break;
 762
 763     const Value *LHS = U->getOperand(0);
 764     const Value *RHS = U->getOperand(1);
 765
 766     // Canonicalize power-of-2 value to the RHS.
 767     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 768       if (C->getValue().isPowerOf2())
 769         std::swap(LHS, RHS);
 770
 771     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 772     const auto *C = cast<ConstantInt>(RHS);
 773     unsigned Val = C->getValue().logBase2();
 774     if (Val < 1 || Val > 3)
 775       break;
 776
 777     uint64_t NumBytes = 0;
 778     if (Ty && Ty->isSized()) {
 779       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 780       NumBytes = NumBits / 8;
 781       if (!isPowerOf2_64(NumBits))
 782         NumBytes = 0;
 783     }
 784
 785     if (NumBytes != (1ULL << Val))
 786       break;
 787
 788     Addr.setShift(Val);
 789     Addr.setExtendType(AArch64_AM::LSL);
 790
 791     const Value *Src = LHS;
 792     if (const auto *I = dyn_cast<Instruction>(Src)) {
 793       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 794         // Fold the zext or sext when it won't become a noop.
 795         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 796           if (!isIntExtFree(ZE) &&
 797               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 798             Addr.setExtendType(AArch64_AM::UXTW);
 799             Src = ZE->getOperand(0);
 800           }
 801         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 802           if (!isIntExtFree(SE) &&
 803               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 804             Addr.setExtendType(AArch64_AM::SXTW);
 805             Src = SE->getOperand(0);
 806           }
 807         }
 808       }
 809     }
 810
 811     unsigned Reg = getRegForValue(Src);
 812     if (!Reg)
 813       return false;
 814     Addr.setOffsetReg(Reg);
 815     return true;
 816   }
 817   case Instruction::And: {
 818     if (Addr.getOffsetReg())
 819       break;
 820
 821     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
 822       break;
 823
 824     const Value *LHS = U->getOperand(0);
 825     const Value *RHS = U->getOperand(1);
 826
 827     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 828       if (C->getValue() == 0xffffffff)
 829         std::swap(LHS, RHS);
 830
 831     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 832       if (C->getValue() == 0xffffffff) {
 833         Addr.setShift(0);
 834         Addr.setExtendType(AArch64_AM::LSL);
 835         Addr.setExtendType(AArch64_AM::UXTW);
 836
 837         unsigned Reg = getRegForValue(LHS);
 838         if (!Reg)
 839           return false;
 840         bool RegIsKill = hasTrivialKill(LHS);
 841         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 842                                          AArch64::sub_32);
 843         Addr.setOffsetReg(Reg);
 844         return true;
 845       }
 846     break;
 847   }
 848   case Instruction::SExt:
 849   case Instruction::ZExt: {
 850     if (!Addr.getReg() || Addr.getOffsetReg())
 851       break;
 852
 853     const Value *Src = nullptr;
 854     // Fold the zext or sext when it won't become a noop.
 855     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 856       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 857         Addr.setExtendType(AArch64_AM::UXTW);
 858         Src = ZE->getOperand(0);
 859       }
 860     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 861       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 862         Addr.setExtendType(AArch64_AM::SXTW);
 863         Src = SE->getOperand(0);
 864       }
 865     }
 866
 867     if (!Src)
 868       break;
 869
 870     Addr.setShift(0);
 871     unsigned Reg = getRegForValue(Src);
 872     if (!Reg)
 873       return false;
 874     Addr.setOffsetReg(Reg);
 875     return true;
 876   }
 877   } // end switch
 878
 879   if (Addr.isRegBase() && !Addr.getReg()) {
 880     unsigned Reg = getRegForValue(Obj);
 881     if (!Reg)
 882       return false;
 883     Addr.setReg(Reg);
 884     return true;
 885   }
 886
 887   if (!Addr.getOffsetReg()) {
 888     unsigned Reg = getRegForValue(Obj);
 889     if (!Reg)
 890       return false;
 891     Addr.setOffsetReg(Reg);
 892     return true;
 893   }
 894
 895   return false;
 896 }
 897
 898 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 899   const User *U = nullptr;
 900   unsigned Opcode = Instruction::UserOp1;
 901   bool InMBB = true;
 902
 903   if (const auto *I = dyn_cast<Instruction>(V)) {
 904     Opcode = I->getOpcode();
 905     U = I;
 906     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 907   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 908     Opcode = C->getOpcode();
 909     U = C;
 910   }
 911
 912   switch (Opcode) {
 913   default: break;
 914   case Instruction::BitCast:
 915     // Look past bitcasts if its operand is in the same BB.
 916     if (InMBB)
 917       return computeCallAddress(U->getOperand(0), Addr);
 918     break;
 919   case Instruction::IntToPtr:
 920     // Look past no-op inttoptrs if its operand is in the same BB.
 921     if (InMBB &&
 922         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 923             TLI.getPointerTy(DL))
 924       return computeCallAddress(U->getOperand(0), Addr);
 925     break;
 926   case Instruction::PtrToInt:
 927     // Look past no-op ptrtoints if its operand is in the same BB.
 928     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 929       return computeCallAddress(U->getOperand(0), Addr);
 930     break;
 931   }
 932
 933   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 934     Addr.setGlobalValue(GV);
 935     return true;
 936   }
 937
 938   // If all else fails, try to materialize the value in a register.
 939   if (!Addr.getGlobalValue()) {
 940     Addr.setReg(getRegForValue(V));
 941     return Addr.getReg() != 0;
 942   }
 943
 944   return false;
 945 }
 946
 947
 948 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 949   EVT evt = TLI.getValueType(DL, Ty, true);
 950
 951   // Only handle simple types.
 952   if (evt == MVT::Other || !evt.isSimple())
 953     return false;
 954   VT = evt.getSimpleVT();
 955
 956   // This is a legal type, but it's not something we handle in fast-isel.
 957   if (VT == MVT::f128)
 958     return false;
 959
 960   // Handle all other legal types, i.e. a register that will directly hold this
 961   // value.
 962   return TLI.isTypeLegal(VT);
 963 }
 964
 965 /// \brief Determine if the value type is supported by FastISel.
 966 ///
 967 /// FastISel for AArch64 can handle more value types than are legal. This adds
 968 /// simple value type such as i1, i8, and i16.
 969 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 970   if (Ty->isVectorTy() && !IsVectorAllowed)
 971     return false;
 972
 973   if (isTypeLegal(Ty, VT))
 974     return true;
 975
 976   // If this is a type than can be sign or zero-extended to a basic operation
 977   // go ahead and accept it now.
 978   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 979     return true;
 980
 981   return false;
 982 }
 983
 984 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 985   if (!isa<Instruction>(V))
 986     return true;
 987
 988   const auto *I = cast<Instruction>(V);
 989   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 990 }
 991
 992 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 993   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 994   if (!ScaleFactor)
 995     return false;
 996
 997   bool ImmediateOffsetNeedsLowering = false;
 998   bool RegisterOffsetNeedsLowering = false;
 999   int64_t Offset = Addr.getOffset();
1000   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1001     ImmediateOffsetNeedsLowering = true;
1002   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1003            !isUInt<12>(Offset / ScaleFactor))
1004     ImmediateOffsetNeedsLowering = true;
1005
1006   // Cannot encode an offset register and an immediate offset in the same
1007   // instruction. Fold the immediate offset into the load/store instruction and
1008   // emit an additional add to take care of the offset register.
1009   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1010     RegisterOffsetNeedsLowering = true;
1011
1012   // Cannot encode zero register as base.
1013   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1014     RegisterOffsetNeedsLowering = true;
1015
1016   // If this is a stack pointer and the offset needs to be simplified then put
1017   // the alloca address into a register, set the base type back to register and
1018   // continue. This should almost never happen.
1019   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1020   {
1021     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1022     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1023             ResultReg)
1024       .addFrameIndex(Addr.getFI())
1025       .addImm(0)
1026       .addImm(0);
1027     Addr.setKind(Address::RegBase);
1028     Addr.setReg(ResultReg);
1029   }
1030
1031   if (RegisterOffsetNeedsLowering) {
1032     unsigned ResultReg = 0;
1033     if (Addr.getReg()) {
1034       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1035           Addr.getExtendType() == AArch64_AM::UXTW   )
1036         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1037                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1038                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1039                                   Addr.getShift());
1040       else
1041         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1042                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1043                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1044                                   Addr.getShift());
1045     } else {
1046       if (Addr.getExtendType() == AArch64_AM::UXTW)
1047         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1048                                /*Op0IsKill=*/false, Addr.getShift(),
1049                                /*IsZExt=*/true);
1050       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1051         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1052                                /*Op0IsKill=*/false, Addr.getShift(),
1053                                /*IsZExt=*/false);
1054       else
1055         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1056                                /*Op0IsKill=*/false, Addr.getShift());
1057     }
1058     if (!ResultReg)
1059       return false;
1060
1061     Addr.setReg(ResultReg);
1062     Addr.setOffsetReg(0);
1063     Addr.setShift(0);
1064     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1065   }
1066
1067   // Since the offset is too large for the load/store instruction get the
1068   // reg+offset into a register.
1069   if (ImmediateOffsetNeedsLowering) {
1070     unsigned ResultReg;
1071     if (Addr.getReg())
1072       // Try to fold the immediate into the add instruction.
1073       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1074     else
1075       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1076
1077     if (!ResultReg)
1078       return false;
1079     Addr.setReg(ResultReg);
1080     Addr.setOffset(0);
1081   }
1082   return true;
1083 }
1084
1085 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1086                                            const MachineInstrBuilder &MIB,
1087                                            MachineMemOperand::Flags Flags,
1088                                            unsigned ScaleFactor,
1089                                            MachineMemOperand *MMO) {
1090   int64_t Offset = Addr.getOffset() / ScaleFactor;
1091   // Frame base works a bit differently. Handle it separately.
1092   if (Addr.isFIBase()) {
1093     int FI = Addr.getFI();
1094     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1095     // and alignment should be based on the VT.
1096     MMO = FuncInfo.MF->getMachineMemOperand(
1097         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1098         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1099     // Now add the rest of the operands.
1100     MIB.addFrameIndex(FI).addImm(Offset);
1101   } else {
1102     assert(Addr.isRegBase() && "Unexpected address kind.");
1103     const MCInstrDesc &II = MIB->getDesc();
1104     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1105     Addr.setReg(
1106       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1107     Addr.setOffsetReg(
1108       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1109     if (Addr.getOffsetReg()) {
1110       assert(Addr.getOffset() == 0 && "Unexpected offset");
1111       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1112                       Addr.getExtendType() == AArch64_AM::SXTX;
1113       MIB.addReg(Addr.getReg());
1114       MIB.addReg(Addr.getOffsetReg());
1115       MIB.addImm(IsSigned);
1116       MIB.addImm(Addr.getShift() != 0);
1117     } else
1118       MIB.addReg(Addr.getReg()).addImm(Offset);
1119   }
1120
1121   if (MMO)
1122     MIB.addMemOperand(MMO);
1123 }
1124
1125 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1126                                      const Value *RHS, bool SetFlags,
1127                                      bool WantResult,  bool IsZExt) {
1128   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1129   bool NeedExtend = false;
1130   switch (RetVT.SimpleTy) {
1131   default:
1132     return 0;
1133   case MVT::i1:
1134     NeedExtend = true;
1135     break;
1136   case MVT::i8:
1137     NeedExtend = true;
1138     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1139     break;
1140   case MVT::i16:
1141     NeedExtend = true;
1142     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1143     break;
1144   case MVT::i32:  // fall-through
1145   case MVT::i64:
1146     break;
1147   }
1148   MVT SrcVT = RetVT;
1149   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1150
1151   // Canonicalize immediates to the RHS first.
1152   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1153     std::swap(LHS, RHS);
1154
1155   // Canonicalize mul by power of 2 to the RHS.
1156   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1157     if (isMulPowOf2(LHS))
1158       std::swap(LHS, RHS);
1159
1160   // Canonicalize shift immediate to the RHS.
1161   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1162     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1163       if (isa<ConstantInt>(SI->getOperand(1)))
1164         if (SI->getOpcode() == Instruction::Shl  ||
1165             SI->getOpcode() == Instruction::LShr ||
1166             SI->getOpcode() == Instruction::AShr   )
1167           std::swap(LHS, RHS);
1168
1169   unsigned LHSReg = getRegForValue(LHS);
1170   if (!LHSReg)
1171     return 0;
1172   bool LHSIsKill = hasTrivialKill(LHS);
1173
1174   if (NeedExtend)
1175     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1176
1177   unsigned ResultReg = 0;
1178   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1179     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1180     if (C->isNegative())
1181       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1182                                 SetFlags, WantResult);
1183     else
1184       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1185                                 WantResult);
1186   } else if (const auto *C = dyn_cast<Constant>(RHS))
1187     if (C->isNullValue())
1188       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1189                                 WantResult);
1190
1191   if (ResultReg)
1192     return ResultReg;
1193
1194   // Only extend the RHS within the instruction if there is a valid extend type.
1195   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1196       isValueAvailable(RHS)) {
1197     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1198       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1199         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1200           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1201           if (!RHSReg)
1202             return 0;
1203           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1204           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1205                                RHSIsKill, ExtendType, C->getZExtValue(),
1206                                SetFlags, WantResult);
1207         }
1208     unsigned RHSReg = getRegForValue(RHS);
1209     if (!RHSReg)
1210       return 0;
1211     bool RHSIsKill = hasTrivialKill(RHS);
1212     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1213                          ExtendType, 0, SetFlags, WantResult);
1214   }
1215
1216   // Check if the mul can be folded into the instruction.
1217   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1218     if (isMulPowOf2(RHS)) {
1219       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1220       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1221
1222       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1223         if (C->getValue().isPowerOf2())
1224           std::swap(MulLHS, MulRHS);
1225
1226       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1227       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1228       unsigned RHSReg = getRegForValue(MulLHS);
1229       if (!RHSReg)
1230         return 0;
1231       bool RHSIsKill = hasTrivialKill(MulLHS);
1232       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1233                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1234                                 WantResult);
1235       if (ResultReg)
1236         return ResultReg;
1237     }
1238   }
1239
1240   // Check if the shift can be folded into the instruction.
1241   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1242     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1243       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1244         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1245         switch (SI->getOpcode()) {
1246         default: break;
1247         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1248         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1249         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1250         }
1251         uint64_t ShiftVal = C->getZExtValue();
1252         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1253           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1254           if (!RHSReg)
1255             return 0;
1256           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1257           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1258                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1259                                     WantResult);
1260           if (ResultReg)
1261             return ResultReg;
1262         }
1263       }
1264     }
1265   }
1266
1267   unsigned RHSReg = getRegForValue(RHS);
1268   if (!RHSReg)
1269     return 0;
1270   bool RHSIsKill = hasTrivialKill(RHS);
1271
1272   if (NeedExtend)
1273     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1274
1275   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1276                        SetFlags, WantResult);
1277 }
1278
1279 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1280                                         bool LHSIsKill, unsigned RHSReg,
1281                                         bool RHSIsKill, bool SetFlags,
1282                                         bool WantResult) {
1283   assert(LHSReg && RHSReg && "Invalid register number.");
1284
1285   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287     return 0;
1288
1289   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290     return 0;
1291
1292   static const unsigned OpcTable[2][2][2] = {
1293     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1294       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1295     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1297   };
1298   bool Is64Bit = RetVT == MVT::i64;
1299   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300   const TargetRegisterClass *RC =
1301       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302   unsigned ResultReg;
1303   if (WantResult)
1304     ResultReg = createResultReg(RC);
1305   else
1306     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307
1308   const MCInstrDesc &II = TII.get(Opc);
1309   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312       .addReg(LHSReg, getKillRegState(LHSIsKill))
1313       .addReg(RHSReg, getKillRegState(RHSIsKill));
1314   return ResultReg;
1315 }
1316
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318                                         bool LHSIsKill, uint64_t Imm,
1319                                         bool SetFlags, bool WantResult) {
1320   assert(LHSReg && "Invalid register number.");
1321
1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323     return 0;
1324
1325   unsigned ShiftImm;
1326   if (isUInt<12>(Imm))
1327     ShiftImm = 0;
1328   else if ((Imm & 0xfff000) == Imm) {
1329     ShiftImm = 12;
1330     Imm >>= 12;
1331   } else
1332     return 0;
1333
1334   static const unsigned OpcTable[2][2][2] = {
1335     { { AArch64::SUBWri,  AArch64::SUBXri  },
1336       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1337     { { AArch64::SUBSWri, AArch64::SUBSXri },
1338       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1339   };
1340   bool Is64Bit = RetVT == MVT::i64;
1341   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342   const TargetRegisterClass *RC;
1343   if (SetFlags)
1344     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345   else
1346     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347   unsigned ResultReg;
1348   if (WantResult)
1349     ResultReg = createResultReg(RC);
1350   else
1351     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352
1353   const MCInstrDesc &II = TII.get(Opc);
1354   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356       .addReg(LHSReg, getKillRegState(LHSIsKill))
1357       .addImm(Imm)
1358       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359   return ResultReg;
1360 }
1361
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363                                         bool LHSIsKill, unsigned RHSReg,
1364                                         bool RHSIsKill,
1365                                         AArch64_AM::ShiftExtendType ShiftType,
1366                                         uint64_t ShiftImm, bool SetFlags,
1367                                         bool WantResult) {
1368   assert(LHSReg && RHSReg && "Invalid register number.");
1369   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1370          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1371
1372   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1373     return 0;
1374
1375   // Don't deal with undefined shifts.
1376   if (ShiftImm >= RetVT.getSizeInBits())
1377     return 0;
1378
1379   static const unsigned OpcTable[2][2][2] = {
1380     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1381       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1382     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1383       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1384   };
1385   bool Is64Bit = RetVT == MVT::i64;
1386   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1387   const TargetRegisterClass *RC =
1388       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1389   unsigned ResultReg;
1390   if (WantResult)
1391     ResultReg = createResultReg(RC);
1392   else
1393     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1394
1395   const MCInstrDesc &II = TII.get(Opc);
1396   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1397   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1398   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1399       .addReg(LHSReg, getKillRegState(LHSIsKill))
1400       .addReg(RHSReg, getKillRegState(RHSIsKill))
1401       .addImm(getShifterImm(ShiftType, ShiftImm));
1402   return ResultReg;
1403 }
1404
1405 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1406                                         bool LHSIsKill, unsigned RHSReg,
1407                                         bool RHSIsKill,
1408                                         AArch64_AM::ShiftExtendType ExtType,
1409                                         uint64_t ShiftImm, bool SetFlags,
1410                                         bool WantResult) {
1411   assert(LHSReg && RHSReg && "Invalid register number.");
1412   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1413          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1414
1415   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1416     return 0;
1417
1418   if (ShiftImm >= 4)
1419     return 0;
1420
1421   static const unsigned OpcTable[2][2][2] = {
1422     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1423       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1424     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1425       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1426   };
1427   bool Is64Bit = RetVT == MVT::i64;
1428   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1429   const TargetRegisterClass *RC = nullptr;
1430   if (SetFlags)
1431     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1432   else
1433     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1434   unsigned ResultReg;
1435   if (WantResult)
1436     ResultReg = createResultReg(RC);
1437   else
1438     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1439
1440   const MCInstrDesc &II = TII.get(Opc);
1441   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1442   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1443   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1444       .addReg(LHSReg, getKillRegState(LHSIsKill))
1445       .addReg(RHSReg, getKillRegState(RHSIsKill))
1446       .addImm(getArithExtendImm(ExtType, ShiftImm));
1447   return ResultReg;
1448 }
1449
1450 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1451   Type *Ty = LHS->getType();
1452   EVT EVT = TLI.getValueType(DL, Ty, true);
1453   if (!EVT.isSimple())
1454     return false;
1455   MVT VT = EVT.getSimpleVT();
1456
1457   switch (VT.SimpleTy) {
1458   default:
1459     return false;
1460   case MVT::i1:
1461   case MVT::i8:
1462   case MVT::i16:
1463   case MVT::i32:
1464   case MVT::i64:
1465     return emitICmp(VT, LHS, RHS, IsZExt);
1466   case MVT::f32:
1467   case MVT::f64:
1468     return emitFCmp(VT, LHS, RHS);
1469   }
1470 }
1471
1472 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1473                                bool IsZExt) {
1474   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1475                  IsZExt) != 0;
1476 }
1477
1478 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1479                                   uint64_t Imm) {
1480   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1481                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1482 }
1483
1484 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1485   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1486     return false;
1487
1488   // Check to see if the 2nd operand is a constant that we can encode directly
1489   // in the compare.
1490   bool UseImm = false;
1491   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1492     if (CFP->isZero() && !CFP->isNegative())
1493       UseImm = true;
1494
1495   unsigned LHSReg = getRegForValue(LHS);
1496   if (!LHSReg)
1497     return false;
1498   bool LHSIsKill = hasTrivialKill(LHS);
1499
1500   if (UseImm) {
1501     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1502     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1503         .addReg(LHSReg, getKillRegState(LHSIsKill));
1504     return true;
1505   }
1506
1507   unsigned RHSReg = getRegForValue(RHS);
1508   if (!RHSReg)
1509     return false;
1510   bool RHSIsKill = hasTrivialKill(RHS);
1511
1512   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1513   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1514       .addReg(LHSReg, getKillRegState(LHSIsKill))
1515       .addReg(RHSReg, getKillRegState(RHSIsKill));
1516   return true;
1517 }
1518
1519 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1520                                   bool SetFlags, bool WantResult, bool IsZExt) {
1521   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1522                     IsZExt);
1523 }
1524
1525 /// \brief This method is a wrapper to simplify add emission.
1526 ///
1527 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1528 /// that fails, then try to materialize the immediate into a register and use
1529 /// emitAddSub_rr instead.
1530 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1531                                       int64_t Imm) {
1532   unsigned ResultReg;
1533   if (Imm < 0)
1534     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1535   else
1536     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1537
1538   if (ResultReg)
1539     return ResultReg;
1540
1541   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1542   if (!CReg)
1543     return 0;
1544
1545   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1546   return ResultReg;
1547 }
1548
1549 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1550                                   bool SetFlags, bool WantResult, bool IsZExt) {
1551   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1552                     IsZExt);
1553 }
1554
1555 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1556                                       bool LHSIsKill, unsigned RHSReg,
1557                                       bool RHSIsKill, bool WantResult) {
1558   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1559                        RHSIsKill, /*SetFlags=*/true, WantResult);
1560 }
1561
1562 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1563                                       bool LHSIsKill, unsigned RHSReg,
1564                                       bool RHSIsKill,
1565                                       AArch64_AM::ShiftExtendType ShiftType,
1566                                       uint64_t ShiftImm, bool WantResult) {
1567   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1568                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1569                        WantResult);
1570 }
1571
1572 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1573                                         const Value *LHS, const Value *RHS) {
1574   // Canonicalize immediates to the RHS first.
1575   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1576     std::swap(LHS, RHS);
1577
1578   // Canonicalize mul by power-of-2 to the RHS.
1579   if (LHS->hasOneUse() && isValueAvailable(LHS))
1580     if (isMulPowOf2(LHS))
1581       std::swap(LHS, RHS);
1582
1583   // Canonicalize shift immediate to the RHS.
1584   if (LHS->hasOneUse() && isValueAvailable(LHS))
1585     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1586       if (isa<ConstantInt>(SI->getOperand(1)))
1587         std::swap(LHS, RHS);
1588
1589   unsigned LHSReg = getRegForValue(LHS);
1590   if (!LHSReg)
1591     return 0;
1592   bool LHSIsKill = hasTrivialKill(LHS);
1593
1594   unsigned ResultReg = 0;
1595   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1596     uint64_t Imm = C->getZExtValue();
1597     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1598   }
1599   if (ResultReg)
1600     return ResultReg;
1601
1602   // Check if the mul can be folded into the instruction.
1603   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1604     if (isMulPowOf2(RHS)) {
1605       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1606       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1607
1608       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1609         if (C->getValue().isPowerOf2())
1610           std::swap(MulLHS, MulRHS);
1611
1612       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1613       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1614
1615       unsigned RHSReg = getRegForValue(MulLHS);
1616       if (!RHSReg)
1617         return 0;
1618       bool RHSIsKill = hasTrivialKill(MulLHS);
1619       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1620                                    RHSIsKill, ShiftVal);
1621       if (ResultReg)
1622         return ResultReg;
1623     }
1624   }
1625
1626   // Check if the shift can be folded into the instruction.
1627   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1628     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1629       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1630         uint64_t ShiftVal = C->getZExtValue();
1631         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1632         if (!RHSReg)
1633           return 0;
1634         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1635         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1636                                      RHSIsKill, ShiftVal);
1637         if (ResultReg)
1638           return ResultReg;
1639       }
1640   }
1641
1642   unsigned RHSReg = getRegForValue(RHS);
1643   if (!RHSReg)
1644     return 0;
1645   bool RHSIsKill = hasTrivialKill(RHS);
1646
1647   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1648   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1649   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1650     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1651     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1652   }
1653   return ResultReg;
1654 }
1655
1656 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1657                                            unsigned LHSReg, bool LHSIsKill,
1658                                            uint64_t Imm) {
1659   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1660                 "ISD nodes are not consecutive!");
1661   static const unsigned OpcTable[3][2] = {
1662     { AArch64::ANDWri, AArch64::ANDXri },
1663     { AArch64::ORRWri, AArch64::ORRXri },
1664     { AArch64::EORWri, AArch64::EORXri }
1665   };
1666   const TargetRegisterClass *RC;
1667   unsigned Opc;
1668   unsigned RegSize;
1669   switch (RetVT.SimpleTy) {
1670   default:
1671     return 0;
1672   case MVT::i1:
1673   case MVT::i8:
1674   case MVT::i16:
1675   case MVT::i32: {
1676     unsigned Idx = ISDOpc - ISD::AND;
1677     Opc = OpcTable[Idx][0];
1678     RC = &AArch64::GPR32spRegClass;
1679     RegSize = 32;
1680     break;
1681   }
1682   case MVT::i64:
1683     Opc = OpcTable[ISDOpc - ISD::AND][1];
1684     RC = &AArch64::GPR64spRegClass;
1685     RegSize = 64;
1686     break;
1687   }
1688
1689   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1690     return 0;
1691
1692   unsigned ResultReg =
1693       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1694                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1695   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1696     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1697     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1698   }
1699   return ResultReg;
1700 }
1701
1702 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1703                                            unsigned LHSReg, bool LHSIsKill,
1704                                            unsigned RHSReg, bool RHSIsKill,
1705                                            uint64_t ShiftImm) {
1706   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1707                 "ISD nodes are not consecutive!");
1708   static const unsigned OpcTable[3][2] = {
1709     { AArch64::ANDWrs, AArch64::ANDXrs },
1710     { AArch64::ORRWrs, AArch64::ORRXrs },
1711     { AArch64::EORWrs, AArch64::EORXrs }
1712   };
1713
1714   // Don't deal with undefined shifts.
1715   if (ShiftImm >= RetVT.getSizeInBits())
1716     return 0;
1717
1718   const TargetRegisterClass *RC;
1719   unsigned Opc;
1720   switch (RetVT.SimpleTy) {
1721   default:
1722     return 0;
1723   case MVT::i1:
1724   case MVT::i8:
1725   case MVT::i16:
1726   case MVT::i32:
1727     Opc = OpcTable[ISDOpc - ISD::AND][0];
1728     RC = &AArch64::GPR32RegClass;
1729     break;
1730   case MVT::i64:
1731     Opc = OpcTable[ISDOpc - ISD::AND][1];
1732     RC = &AArch64::GPR64RegClass;
1733     break;
1734   }
1735   unsigned ResultReg =
1736       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1737                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1738   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1739     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1740     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1741   }
1742   return ResultReg;
1743 }
1744
1745 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1746                                      uint64_t Imm) {
1747   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1748 }
1749
1750 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1751                                    bool WantZExt, MachineMemOperand *MMO) {
1752   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1753     return 0;
1754
1755   // Simplify this down to something we can handle.
1756   if (!simplifyAddress(Addr, VT))
1757     return 0;
1758
1759   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1760   if (!ScaleFactor)
1761     llvm_unreachable("Unexpected value type.");
1762
1763   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1764   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1765   bool UseScaled = true;
1766   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1767     UseScaled = false;
1768     ScaleFactor = 1;
1769   }
1770
1771   static const unsigned GPOpcTable[2][8][4] = {
1772     // Sign-extend.
1773     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1774         AArch64::LDURXi  },
1775       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1778         AArch64::LDRXui  },
1779       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1782         AArch64::LDRXroX },
1783       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1786         AArch64::LDRXroW },
1787       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1788         AArch64::LDRXroW }
1789     },
1790     // Zero-extend.
1791     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1792         AArch64::LDURXi  },
1793       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794         AArch64::LDURXi  },
1795       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1796         AArch64::LDRXui  },
1797       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798         AArch64::LDRXui  },
1799       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1800         AArch64::LDRXroX },
1801       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802         AArch64::LDRXroX },
1803       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1804         AArch64::LDRXroW },
1805       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806         AArch64::LDRXroW }
1807     }
1808   };
1809
1810   static const unsigned FPOpcTable[4][2] = {
1811     { AArch64::LDURSi,  AArch64::LDURDi  },
1812     { AArch64::LDRSui,  AArch64::LDRDui  },
1813     { AArch64::LDRSroX, AArch64::LDRDroX },
1814     { AArch64::LDRSroW, AArch64::LDRDroW }
1815   };
1816
1817   unsigned Opc;
1818   const TargetRegisterClass *RC;
1819   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1820                       Addr.getOffsetReg();
1821   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1822   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1823       Addr.getExtendType() == AArch64_AM::SXTW)
1824     Idx++;
1825
1826   bool IsRet64Bit = RetVT == MVT::i64;
1827   switch (VT.SimpleTy) {
1828   default:
1829     llvm_unreachable("Unexpected value type.");
1830   case MVT::i1: // Intentional fall-through.
1831   case MVT::i8:
1832     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1833     RC = (IsRet64Bit && !WantZExt) ?
1834              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1835     break;
1836   case MVT::i16:
1837     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1838     RC = (IsRet64Bit && !WantZExt) ?
1839              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1840     break;
1841   case MVT::i32:
1842     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1843     RC = (IsRet64Bit && !WantZExt) ?
1844              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1845     break;
1846   case MVT::i64:
1847     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1848     RC = &AArch64::GPR64RegClass;
1849     break;
1850   case MVT::f32:
1851     Opc = FPOpcTable[Idx][0];
1852     RC = &AArch64::FPR32RegClass;
1853     break;
1854   case MVT::f64:
1855     Opc = FPOpcTable[Idx][1];
1856     RC = &AArch64::FPR64RegClass;
1857     break;
1858   }
1859
1860   // Create the base instruction, then add the operands.
1861   unsigned ResultReg = createResultReg(RC);
1862   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1863                                     TII.get(Opc), ResultReg);
1864   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1865
1866   // Loading an i1 requires special handling.
1867   if (VT == MVT::i1) {
1868     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1869     assert(ANDReg && "Unexpected AND instruction emission failure.");
1870     ResultReg = ANDReg;
1871   }
1872
1873   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1874   // the 32bit reg to a 64bit reg.
1875   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1876     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1877     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1878             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1879         .addImm(0)
1880         .addReg(ResultReg, getKillRegState(true))
1881         .addImm(AArch64::sub_32);
1882     ResultReg = Reg64;
1883   }
1884   return ResultReg;
1885 }
1886
1887 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1888   MVT VT;
1889   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1890     return false;
1891
1892   if (VT.isVector())
1893     return selectOperator(I, I->getOpcode());
1894
1895   unsigned ResultReg;
1896   switch (I->getOpcode()) {
1897   default:
1898     llvm_unreachable("Unexpected instruction.");
1899   case Instruction::Add:
1900     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1901     break;
1902   case Instruction::Sub:
1903     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1904     break;
1905   }
1906   if (!ResultReg)
1907     return false;
1908
1909   updateValueMap(I, ResultReg);
1910   return true;
1911 }
1912
1913 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1914   MVT VT;
1915   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1916     return false;
1917
1918   if (VT.isVector())
1919     return selectOperator(I, I->getOpcode());
1920
1921   unsigned ResultReg;
1922   switch (I->getOpcode()) {
1923   default:
1924     llvm_unreachable("Unexpected instruction.");
1925   case Instruction::And:
1926     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1927     break;
1928   case Instruction::Or:
1929     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1930     break;
1931   case Instruction::Xor:
1932     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1933     break;
1934   }
1935   if (!ResultReg)
1936     return false;
1937
1938   updateValueMap(I, ResultReg);
1939   return true;
1940 }
1941
1942 bool AArch64FastISel::selectLoad(const Instruction *I) {
1943   MVT VT;
1944   // Verify we have a legal type before going any further.  Currently, we handle
1945   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1946   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1947   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1948       cast<LoadInst>(I)->isAtomic())
1949     return false;
1950
1951   const Value *SV = I->getOperand(0);
1952   if (TLI.supportSwiftError()) {
1953     // Swifterror values can come from either a function parameter with
1954     // swifterror attribute or an alloca with swifterror attribute.
1955     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1956       if (Arg->hasSwiftErrorAttr())
1957         return false;
1958     }
1959
1960     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1961       if (Alloca->isSwiftError())
1962         return false;
1963     }
1964   }
1965
1966   // See if we can handle this address.
1967   Address Addr;
1968   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1969     return false;
1970
1971   // Fold the following sign-/zero-extend into the load instruction.
1972   bool WantZExt = true;
1973   MVT RetVT = VT;
1974   const Value *IntExtVal = nullptr;
1975   if (I->hasOneUse()) {
1976     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1977       if (isTypeSupported(ZE->getType(), RetVT))
1978         IntExtVal = ZE;
1979       else
1980         RetVT = VT;
1981     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1982       if (isTypeSupported(SE->getType(), RetVT))
1983         IntExtVal = SE;
1984       else
1985         RetVT = VT;
1986       WantZExt = false;
1987     }
1988   }
1989
1990   unsigned ResultReg =
1991       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1992   if (!ResultReg)
1993     return false;
1994
1995   // There are a few different cases we have to handle, because the load or the
1996   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1997   // SelectionDAG. There is also an ordering issue when both instructions are in
1998   // different basic blocks.
1999   // 1.) The load instruction is selected by FastISel, but the integer extend
2000   //     not. This usually happens when the integer extend is in a different
2001   //     basic block and SelectionDAG took over for that basic block.
2002   // 2.) The load instruction is selected before the integer extend. This only
2003   //     happens when the integer extend is in a different basic block.
2004   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2005   //     by FastISel. This happens if there are instructions between the load
2006   //     and the integer extend that couldn't be selected by FastISel.
2007   if (IntExtVal) {
2008     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2009     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2010     // it when it selects the integer extend.
2011     unsigned Reg = lookUpRegForValue(IntExtVal);
2012     auto *MI = MRI.getUniqueVRegDef(Reg);
2013     if (!MI) {
2014       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2015         if (WantZExt) {
2016           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2017           std::prev(FuncInfo.InsertPt)->eraseFromParent();
2018           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2019         } else
2020           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2021                                                  /*IsKill=*/true,
2022                                                  AArch64::sub_32);
2023       }
2024       updateValueMap(I, ResultReg);
2025       return true;
2026     }
2027
2028     // The integer extend has already been emitted - delete all the instructions
2029     // that have been emitted by the integer extend lowering code and use the
2030     // result from the load instruction directly.
2031     while (MI) {
2032       Reg = 0;
2033       for (auto &Opnd : MI->uses()) {
2034         if (Opnd.isReg()) {
2035           Reg = Opnd.getReg();
2036           break;
2037         }
2038       }
2039       MI->eraseFromParent();
2040       MI = nullptr;
2041       if (Reg)
2042         MI = MRI.getUniqueVRegDef(Reg);
2043     }
2044     updateValueMap(IntExtVal, ResultReg);
2045     return true;
2046   }
2047
2048   updateValueMap(I, ResultReg);
2049   return true;
2050 }
2051
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053                                        unsigned AddrReg,
2054                                        MachineMemOperand *MMO) {
2055   unsigned Opc;
2056   switch (VT.SimpleTy) {
2057   default: return false;
2058   case MVT::i8:  Opc = AArch64::STLRB; break;
2059   case MVT::i16: Opc = AArch64::STLRH; break;
2060   case MVT::i32: Opc = AArch64::STLRW; break;
2061   case MVT::i64: Opc = AArch64::STLRX; break;
2062   }
2063
2064   const MCInstrDesc &II = TII.get(Opc);
2065   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068       .addReg(SrcReg)
2069       .addReg(AddrReg)
2070       .addMemOperand(MMO);
2071   return true;
2072 }
2073
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075                                 MachineMemOperand *MMO) {
2076   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077     return false;
2078
2079   // Simplify this down to something we can handle.
2080   if (!simplifyAddress(Addr, VT))
2081     return false;
2082
2083   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084   if (!ScaleFactor)
2085     llvm_unreachable("Unexpected value type.");
2086
2087   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089   bool UseScaled = true;
2090   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091     UseScaled = false;
2092     ScaleFactor = 1;
2093   }
2094
2095   static const unsigned OpcTable[4][6] = {
2096     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2097       AArch64::STURSi,   AArch64::STURDi },
2098     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2099       AArch64::STRSui,   AArch64::STRDui },
2100     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101       AArch64::STRSroX,  AArch64::STRDroX },
2102     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103       AArch64::STRSroW,  AArch64::STRDroW }
2104   };
2105
2106   unsigned Opc;
2107   bool VTIsi1 = false;
2108   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109                       Addr.getOffsetReg();
2110   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112       Addr.getExtendType() == AArch64_AM::SXTW)
2113     Idx++;
2114
2115   switch (VT.SimpleTy) {
2116   default: llvm_unreachable("Unexpected value type.");
2117   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2118   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2119   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124   }
2125
2126   // Storing an i1 requires special handling.
2127   if (VTIsi1 && SrcReg != AArch64::WZR) {
2128     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129     assert(ANDReg && "Unexpected AND instruction emission failure.");
2130     SrcReg = ANDReg;
2131   }
2132   // Create the base instruction, then add the operands.
2133   const MCInstrDesc &II = TII.get(Opc);
2134   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135   MachineInstrBuilder MIB =
2136       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138
2139   return true;
2140 }
2141
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143   MVT VT;
2144   const Value *Op0 = I->getOperand(0);
2145   // Verify we have a legal type before going any further.  Currently, we handle
2146   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149     return false;
2150
2151   const Value *PtrV = I->getOperand(1);
2152   if (TLI.supportSwiftError()) {
2153     // Swifterror values can come from either a function parameter with
2154     // swifterror attribute or an alloca with swifterror attribute.
2155     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156       if (Arg->hasSwiftErrorAttr())
2157         return false;
2158     }
2159
2160     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161       if (Alloca->isSwiftError())
2162         return false;
2163     }
2164   }
2165
2166   // Get the value to be stored into a register. Use the zero register directly
2167   // when possible to avoid an unnecessary copy and a wasted register.
2168   unsigned SrcReg = 0;
2169   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170     if (CI->isZero())
2171       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173     if (CF->isZero() && !CF->isNegative()) {
2174       VT = MVT::getIntegerVT(VT.getSizeInBits());
2175       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176     }
2177   }
2178
2179   if (!SrcReg)
2180     SrcReg = getRegForValue(Op0);
2181
2182   if (!SrcReg)
2183     return false;
2184
2185   auto *SI = cast<StoreInst>(I);
2186
2187   // Try to emit a STLR for seq_cst/release.
2188   if (SI->isAtomic()) {
2189     AtomicOrdering Ord = SI->getOrdering();
2190     // The non-atomic instructions are sufficient for relaxed stores.
2191     if (isReleaseOrStronger(Ord)) {
2192       // The STLR addressing mode only supports a base reg; pass that directly.
2193       unsigned AddrReg = getRegForValue(PtrV);
2194       return emitStoreRelease(VT, SrcReg, AddrReg,
2195                               createMachineMemOperandFor(I));
2196     }
2197   }
2198
2199   // See if we can handle this address.
2200   Address Addr;
2201   if (!computeAddress(PtrV, Addr, Op0->getType()))
2202     return false;
2203
2204   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205     return false;
2206   return true;
2207 }
2208
2209 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2210   switch (Pred) {
2211   case CmpInst::FCMP_ONE:
2212   case CmpInst::FCMP_UEQ:
2213   default:
2214     // AL is our "false" for now. The other two need more compares.
2215     return AArch64CC::AL;
2216   case CmpInst::ICMP_EQ:
2217   case CmpInst::FCMP_OEQ:
2218     return AArch64CC::EQ;
2219   case CmpInst::ICMP_SGT:
2220   case CmpInst::FCMP_OGT:
2221     return AArch64CC::GT;
2222   case CmpInst::ICMP_SGE:
2223   case CmpInst::FCMP_OGE:
2224     return AArch64CC::GE;
2225   case CmpInst::ICMP_UGT:
2226   case CmpInst::FCMP_UGT:
2227     return AArch64CC::HI;
2228   case CmpInst::FCMP_OLT:
2229     return AArch64CC::MI;
2230   case CmpInst::ICMP_ULE:
2231   case CmpInst::FCMP_OLE:
2232     return AArch64CC::LS;
2233   case CmpInst::FCMP_ORD:
2234     return AArch64CC::VC;
2235   case CmpInst::FCMP_UNO:
2236     return AArch64CC::VS;
2237   case CmpInst::FCMP_UGE:
2238     return AArch64CC::PL;
2239   case CmpInst::ICMP_SLT:
2240   case CmpInst::FCMP_ULT:
2241     return AArch64CC::LT;
2242   case CmpInst::ICMP_SLE:
2243   case CmpInst::FCMP_ULE:
2244     return AArch64CC::LE;
2245   case CmpInst::FCMP_UNE:
2246   case CmpInst::ICMP_NE:
2247     return AArch64CC::NE;
2248   case CmpInst::ICMP_UGE:
2249     return AArch64CC::HS;
2250   case CmpInst::ICMP_ULT:
2251     return AArch64CC::LO;
2252   }
2253 }
2254
2255 /// \brief Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2258   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2259   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2260
2261   const Value *LHS = CI->getOperand(0);
2262   const Value *RHS = CI->getOperand(1);
2263
2264   MVT VT;
2265   if (!isTypeSupported(LHS->getType(), VT))
2266     return false;
2267
2268   unsigned BW = VT.getSizeInBits();
2269   if (BW > 64)
2270     return false;
2271
2272   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2273   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2274
2275   // Try to take advantage of fallthrough opportunities.
2276   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2277     std::swap(TBB, FBB);
2278     Predicate = CmpInst::getInversePredicate(Predicate);
2279   }
2280
2281   int TestBit = -1;
2282   bool IsCmpNE;
2283   switch (Predicate) {
2284   default:
2285     return false;
2286   case CmpInst::ICMP_EQ:
2287   case CmpInst::ICMP_NE:
2288     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2289       std::swap(LHS, RHS);
2290
2291     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2292       return false;
2293
2294     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2295       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2296         const Value *AndLHS = AI->getOperand(0);
2297         const Value *AndRHS = AI->getOperand(1);
2298
2299         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2300           if (C->getValue().isPowerOf2())
2301             std::swap(AndLHS, AndRHS);
2302
2303         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2304           if (C->getValue().isPowerOf2()) {
2305             TestBit = C->getValue().logBase2();
2306             LHS = AndLHS;
2307           }
2308       }
2309
2310     if (VT == MVT::i1)
2311       TestBit = 0;
2312
2313     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2314     break;
2315   case CmpInst::ICMP_SLT:
2316   case CmpInst::ICMP_SGE:
2317     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2318       return false;
2319
2320     TestBit = BW - 1;
2321     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2322     break;
2323   case CmpInst::ICMP_SGT:
2324   case CmpInst::ICMP_SLE:
2325     if (!isa<ConstantInt>(RHS))
2326       return false;
2327
2328     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2329       return false;
2330
2331     TestBit = BW - 1;
2332     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2333     break;
2334   } // end switch
2335
2336   static const unsigned OpcTable[2][2][2] = {
2337     { {AArch64::CBZW,  AArch64::CBZX },
2338       {AArch64::CBNZW, AArch64::CBNZX} },
2339     { {AArch64::TBZW,  AArch64::TBZX },
2340       {AArch64::TBNZW, AArch64::TBNZX} }
2341   };
2342
2343   bool IsBitTest = TestBit != -1;
2344   bool Is64Bit = BW == 64;
2345   if (TestBit < 32 && TestBit >= 0)
2346     Is64Bit = false;
2347
2348   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2349   const MCInstrDesc &II = TII.get(Opc);
2350
2351   unsigned SrcReg = getRegForValue(LHS);
2352   if (!SrcReg)
2353     return false;
2354   bool SrcIsKill = hasTrivialKill(LHS);
2355
2356   if (BW == 64 && !Is64Bit)
2357     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2358                                         AArch64::sub_32);
2359
2360   if ((BW < 32) && !IsBitTest)
2361     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2362
2363   // Emit the combined compare and branch instruction.
2364   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2365   MachineInstrBuilder MIB =
2366       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2367           .addReg(SrcReg, getKillRegState(SrcIsKill));
2368   if (IsBitTest)
2369     MIB.addImm(TestBit);
2370   MIB.addMBB(TBB);
2371
2372   finishCondBranch(BI->getParent(), TBB, FBB);
2373   return true;
2374 }
2375
2376 bool AArch64FastISel::selectBranch(const Instruction *I) {
2377   const BranchInst *BI = cast<BranchInst>(I);
2378   if (BI->isUnconditional()) {
2379     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2380     fastEmitBranch(MSucc, BI->getDebugLoc());
2381     return true;
2382   }
2383
2384   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2385   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2386
2387   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2388     if (CI->hasOneUse() && isValueAvailable(CI)) {
2389       // Try to optimize or fold the cmp.
2390       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2391       switch (Predicate) {
2392       default:
2393         break;
2394       case CmpInst::FCMP_FALSE:
2395         fastEmitBranch(FBB, DbgLoc);
2396         return true;
2397       case CmpInst::FCMP_TRUE:
2398         fastEmitBranch(TBB, DbgLoc);
2399         return true;
2400       }
2401
2402       // Try to emit a combined compare-and-branch first.
2403       if (emitCompareAndBranch(BI))
2404         return true;
2405
2406       // Try to take advantage of fallthrough opportunities.
2407       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2408         std::swap(TBB, FBB);
2409         Predicate = CmpInst::getInversePredicate(Predicate);
2410       }
2411
2412       // Emit the cmp.
2413       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2414         return false;
2415
2416       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2417       // instruction.
2418       AArch64CC::CondCode CC = getCompareCC(Predicate);
2419       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2420       switch (Predicate) {
2421       default:
2422         break;
2423       case CmpInst::FCMP_UEQ:
2424         ExtraCC = AArch64CC::EQ;
2425         CC = AArch64CC::VS;
2426         break;
2427       case CmpInst::FCMP_ONE:
2428         ExtraCC = AArch64CC::MI;
2429         CC = AArch64CC::GT;
2430         break;
2431       }
2432       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2433
2434       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2435       if (ExtraCC != AArch64CC::AL) {
2436         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2437             .addImm(ExtraCC)
2438             .addMBB(TBB);
2439       }
2440
2441       // Emit the branch.
2442       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2443           .addImm(CC)
2444           .addMBB(TBB);
2445
2446       finishCondBranch(BI->getParent(), TBB, FBB);
2447       return true;
2448     }
2449   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2450     uint64_t Imm = CI->getZExtValue();
2451     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2452     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2453         .addMBB(Target);
2454
2455     // Obtain the branch probability and add the target to the successor list.
2456     if (FuncInfo.BPI) {
2457       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2458           BI->getParent(), Target->getBasicBlock());
2459       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2460     } else
2461       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2462     return true;
2463   } else {
2464     AArch64CC::CondCode CC = AArch64CC::NE;
2465     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2466       // Fake request the condition, otherwise the intrinsic might be completely
2467       // optimized away.
2468       unsigned CondReg = getRegForValue(BI->getCondition());
2469       if (!CondReg)
2470         return false;
2471
2472       // Emit the branch.
2473       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2474         .addImm(CC)
2475         .addMBB(TBB);
2476
2477       finishCondBranch(BI->getParent(), TBB, FBB);
2478       return true;
2479     }
2480   }
2481
2482   unsigned CondReg = getRegForValue(BI->getCondition());
2483   if (CondReg == 0)
2484     return false;
2485   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2486
2487   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2488   unsigned Opcode = AArch64::TBNZW;
2489   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2490     std::swap(TBB, FBB);
2491     Opcode = AArch64::TBZW;
2492   }
2493
2494   const MCInstrDesc &II = TII.get(Opcode);
2495   unsigned ConstrainedCondReg
2496     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2497   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2498       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2499       .addImm(0)
2500       .addMBB(TBB);
2501
2502   finishCondBranch(BI->getParent(), TBB, FBB);
2503   return true;
2504 }
2505
2506 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2507   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2508   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2509   if (AddrReg == 0)
2510     return false;
2511
2512   // Emit the indirect branch.
2513   const MCInstrDesc &II = TII.get(AArch64::BR);
2514   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2515   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2516
2517   // Make sure the CFG is up-to-date.
2518   for (auto *Succ : BI->successors())
2519     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2520
2521   return true;
2522 }
2523
2524 bool AArch64FastISel::selectCmp(const Instruction *I) {
2525   const CmpInst *CI = cast<CmpInst>(I);
2526
2527   // Vectors of i1 are weird: bail out.
2528   if (CI->getType()->isVectorTy())
2529     return false;
2530
2531   // Try to optimize or fold the cmp.
2532   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2533   unsigned ResultReg = 0;
2534   switch (Predicate) {
2535   default:
2536     break;
2537   case CmpInst::FCMP_FALSE:
2538     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2539     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2540             TII.get(TargetOpcode::COPY), ResultReg)
2541         .addReg(AArch64::WZR, getKillRegState(true));
2542     break;
2543   case CmpInst::FCMP_TRUE:
2544     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2545     break;
2546   }
2547
2548   if (ResultReg) {
2549     updateValueMap(I, ResultReg);
2550     return true;
2551   }
2552
2553   // Emit the cmp.
2554   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2555     return false;
2556
2557   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2558
2559   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2560   // condition codes are inverted, because they are used by CSINC.
2561   static unsigned CondCodeTable[2][2] = {
2562     { AArch64CC::NE, AArch64CC::VC },
2563     { AArch64CC::PL, AArch64CC::LE }
2564   };
2565   unsigned *CondCodes = nullptr;
2566   switch (Predicate) {
2567   default:
2568     break;
2569   case CmpInst::FCMP_UEQ:
2570     CondCodes = &CondCodeTable[0][0];
2571     break;
2572   case CmpInst::FCMP_ONE:
2573     CondCodes = &CondCodeTable[1][0];
2574     break;
2575   }
2576
2577   if (CondCodes) {
2578     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2579     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2580             TmpReg1)
2581         .addReg(AArch64::WZR, getKillRegState(true))
2582         .addReg(AArch64::WZR, getKillRegState(true))
2583         .addImm(CondCodes[0]);
2584     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2585             ResultReg)
2586         .addReg(TmpReg1, getKillRegState(true))
2587         .addReg(AArch64::WZR, getKillRegState(true))
2588         .addImm(CondCodes[1]);
2589
2590     updateValueMap(I, ResultReg);
2591     return true;
2592   }
2593
2594   // Now set a register based on the comparison.
2595   AArch64CC::CondCode CC = getCompareCC(Predicate);
2596   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2597   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2598   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2599           ResultReg)
2600       .addReg(AArch64::WZR, getKillRegState(true))
2601       .addReg(AArch64::WZR, getKillRegState(true))
2602       .addImm(invertedCC);
2603
2604   updateValueMap(I, ResultReg);
2605   return true;
2606 }
2607
2608 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2609 /// value.
2610 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2611   if (!SI->getType()->isIntegerTy(1))
2612     return false;
2613
2614   const Value *Src1Val, *Src2Val;
2615   unsigned Opc = 0;
2616   bool NeedExtraOp = false;
2617   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2618     if (CI->isOne()) {
2619       Src1Val = SI->getCondition();
2620       Src2Val = SI->getFalseValue();
2621       Opc = AArch64::ORRWrr;
2622     } else {
2623       assert(CI->isZero());
2624       Src1Val = SI->getFalseValue();
2625       Src2Val = SI->getCondition();
2626       Opc = AArch64::BICWrr;
2627     }
2628   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2629     if (CI->isOne()) {
2630       Src1Val = SI->getCondition();
2631       Src2Val = SI->getTrueValue();
2632       Opc = AArch64::ORRWrr;
2633       NeedExtraOp = true;
2634     } else {
2635       assert(CI->isZero());
2636       Src1Val = SI->getCondition();
2637       Src2Val = SI->getTrueValue();
2638       Opc = AArch64::ANDWrr;
2639     }
2640   }
2641
2642   if (!Opc)
2643     return false;
2644
2645   unsigned Src1Reg = getRegForValue(Src1Val);
2646   if (!Src1Reg)
2647     return false;
2648   bool Src1IsKill = hasTrivialKill(Src1Val);
2649
2650   unsigned Src2Reg = getRegForValue(Src2Val);
2651   if (!Src2Reg)
2652     return false;
2653   bool Src2IsKill = hasTrivialKill(Src2Val);
2654
2655   if (NeedExtraOp) {
2656     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2657     Src1IsKill = true;
2658   }
2659   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2660                                        Src1IsKill, Src2Reg, Src2IsKill);
2661   updateValueMap(SI, ResultReg);
2662   return true;
2663 }
2664
2665 bool AArch64FastISel::selectSelect(const Instruction *I) {
2666   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2667   MVT VT;
2668   if (!isTypeSupported(I->getType(), VT))
2669     return false;
2670
2671   unsigned Opc;
2672   const TargetRegisterClass *RC;
2673   switch (VT.SimpleTy) {
2674   default:
2675     return false;
2676   case MVT::i1:
2677   case MVT::i8:
2678   case MVT::i16:
2679   case MVT::i32:
2680     Opc = AArch64::CSELWr;
2681     RC = &AArch64::GPR32RegClass;
2682     break;
2683   case MVT::i64:
2684     Opc = AArch64::CSELXr;
2685     RC = &AArch64::GPR64RegClass;
2686     break;
2687   case MVT::f32:
2688     Opc = AArch64::FCSELSrrr;
2689     RC = &AArch64::FPR32RegClass;
2690     break;
2691   case MVT::f64:
2692     Opc = AArch64::FCSELDrrr;
2693     RC = &AArch64::FPR64RegClass;
2694     break;
2695   }
2696
2697   const SelectInst *SI = cast<SelectInst>(I);
2698   const Value *Cond = SI->getCondition();
2699   AArch64CC::CondCode CC = AArch64CC::NE;
2700   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2701
2702   if (optimizeSelect(SI))
2703     return true;
2704
2705   // Try to pickup the flags, so we don't have to emit another compare.
2706   if (foldXALUIntrinsic(CC, I, Cond)) {
2707     // Fake request the condition to force emission of the XALU intrinsic.
2708     unsigned CondReg = getRegForValue(Cond);
2709     if (!CondReg)
2710       return false;
2711   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2712              isValueAvailable(Cond)) {
2713     const auto *Cmp = cast<CmpInst>(Cond);
2714     // Try to optimize or fold the cmp.
2715     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2716     const Value *FoldSelect = nullptr;
2717     switch (Predicate) {
2718     default:
2719       break;
2720     case CmpInst::FCMP_FALSE:
2721       FoldSelect = SI->getFalseValue();
2722       break;
2723     case CmpInst::FCMP_TRUE:
2724       FoldSelect = SI->getTrueValue();
2725       break;
2726     }
2727
2728     if (FoldSelect) {
2729       unsigned SrcReg = getRegForValue(FoldSelect);
2730       if (!SrcReg)
2731         return false;
2732       unsigned UseReg = lookUpRegForValue(SI);
2733       if (UseReg)
2734         MRI.clearKillFlags(UseReg);
2735
2736       updateValueMap(I, SrcReg);
2737       return true;
2738     }
2739
2740     // Emit the cmp.
2741     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2742       return false;
2743
2744     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2745     CC = getCompareCC(Predicate);
2746     switch (Predicate) {
2747     default:
2748       break;
2749     case CmpInst::FCMP_UEQ:
2750       ExtraCC = AArch64CC::EQ;
2751       CC = AArch64CC::VS;
2752       break;
2753     case CmpInst::FCMP_ONE:
2754       ExtraCC = AArch64CC::MI;
2755       CC = AArch64CC::GT;
2756       break;
2757     }
2758     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2759   } else {
2760     unsigned CondReg = getRegForValue(Cond);
2761     if (!CondReg)
2762       return false;
2763     bool CondIsKill = hasTrivialKill(Cond);
2764
2765     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2766     CondReg = constrainOperandRegClass(II, CondReg, 1);
2767
2768     // Emit a TST instruction (ANDS wzr, reg, #imm).
2769     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2770             AArch64::WZR)
2771         .addReg(CondReg, getKillRegState(CondIsKill))
2772         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2773   }
2774
2775   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2776   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2777
2778   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2779   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2780
2781   if (!Src1Reg || !Src2Reg)
2782     return false;
2783
2784   if (ExtraCC != AArch64CC::AL) {
2785     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2786                                Src2IsKill, ExtraCC);
2787     Src2IsKill = true;
2788   }
2789   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2790                                         Src2IsKill, CC);
2791   updateValueMap(I, ResultReg);
2792   return true;
2793 }
2794
2795 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2796   Value *V = I->getOperand(0);
2797   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2798     return false;
2799
2800   unsigned Op = getRegForValue(V);
2801   if (Op == 0)
2802     return false;
2803
2804   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2805   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2806           ResultReg).addReg(Op);
2807   updateValueMap(I, ResultReg);
2808   return true;
2809 }
2810
2811 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2812   Value *V = I->getOperand(0);
2813   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2814     return false;
2815
2816   unsigned Op = getRegForValue(V);
2817   if (Op == 0)
2818     return false;
2819
2820   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2821   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2822           ResultReg).addReg(Op);
2823   updateValueMap(I, ResultReg);
2824   return true;
2825 }
2826
2827 // FPToUI and FPToSI
2828 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2829   MVT DestVT;
2830   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2831     return false;
2832
2833   unsigned SrcReg = getRegForValue(I->getOperand(0));
2834   if (SrcReg == 0)
2835     return false;
2836
2837   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2838   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2839     return false;
2840
2841   unsigned Opc;
2842   if (SrcVT == MVT::f64) {
2843     if (Signed)
2844       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2845     else
2846       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2847   } else {
2848     if (Signed)
2849       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2850     else
2851       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2852   }
2853   unsigned ResultReg = createResultReg(
2854       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2855   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2856       .addReg(SrcReg);
2857   updateValueMap(I, ResultReg);
2858   return true;
2859 }
2860
2861 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2862   MVT DestVT;
2863   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2864     return false;
2865   // Let regular ISEL handle FP16
2866   if (DestVT == MVT::f16)
2867     return false;
2868
2869   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2870          "Unexpected value type.");
2871
2872   unsigned SrcReg = getRegForValue(I->getOperand(0));
2873   if (!SrcReg)
2874     return false;
2875   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2876
2877   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2878
2879   // Handle sign-extension.
2880   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2881     SrcReg =
2882         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2883     if (!SrcReg)
2884       return false;
2885     SrcIsKill = true;
2886   }
2887
2888   unsigned Opc;
2889   if (SrcVT == MVT::i64) {
2890     if (Signed)
2891       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2892     else
2893       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2894   } else {
2895     if (Signed)
2896       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2897     else
2898       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2899   }
2900
2901   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2902                                       SrcIsKill);
2903   updateValueMap(I, ResultReg);
2904   return true;
2905 }
2906
2907 bool AArch64FastISel::fastLowerArguments() {
2908   if (!FuncInfo.CanLowerReturn)
2909     return false;
2910
2911   const Function *F = FuncInfo.Fn;
2912   if (F->isVarArg())
2913     return false;
2914
2915   CallingConv::ID CC = F->getCallingConv();
2916   if (CC != CallingConv::C && CC != CallingConv::Swift)
2917     return false;
2918
2919   // Only handle simple cases of up to 8 GPR and FPR each.
2920   unsigned GPRCnt = 0;
2921   unsigned FPRCnt = 0;
2922   for (auto const &Arg : F->args()) {
2923     if (Arg.hasAttribute(Attribute::ByVal) ||
2924         Arg.hasAttribute(Attribute::InReg) ||
2925         Arg.hasAttribute(Attribute::StructRet) ||
2926         Arg.hasAttribute(Attribute::SwiftSelf) ||
2927         Arg.hasAttribute(Attribute::SwiftError) ||
2928         Arg.hasAttribute(Attribute::Nest))
2929       return false;
2930
2931     Type *ArgTy = Arg.getType();
2932     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2933       return false;
2934
2935     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2936     if (!ArgVT.isSimple())
2937       return false;
2938
2939     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2940     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2941       return false;
2942
2943     if (VT.isVector() &&
2944         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2945       return false;
2946
2947     if (VT >= MVT::i1 && VT <= MVT::i64)
2948       ++GPRCnt;
2949     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2950              VT.is128BitVector())
2951       ++FPRCnt;
2952     else
2953       return false;
2954
2955     if (GPRCnt > 8 || FPRCnt > 8)
2956       return false;
2957   }
2958
2959   static const MCPhysReg Registers[6][8] = {
2960     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2961       AArch64::W5, AArch64::W6, AArch64::W7 },
2962     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2963       AArch64::X5, AArch64::X6, AArch64::X7 },
2964     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2965       AArch64::H5, AArch64::H6, AArch64::H7 },
2966     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2967       AArch64::S5, AArch64::S6, AArch64::S7 },
2968     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2969       AArch64::D5, AArch64::D6, AArch64::D7 },
2970     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2971       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2972   };
2973
2974   unsigned GPRIdx = 0;
2975   unsigned FPRIdx = 0;
2976   for (auto const &Arg : F->args()) {
2977     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2978     unsigned SrcReg;
2979     const TargetRegisterClass *RC;
2980     if (VT >= MVT::i1 && VT <= MVT::i32) {
2981       SrcReg = Registers[0][GPRIdx++];
2982       RC = &AArch64::GPR32RegClass;
2983       VT = MVT::i32;
2984     } else if (VT == MVT::i64) {
2985       SrcReg = Registers[1][GPRIdx++];
2986       RC = &AArch64::GPR64RegClass;
2987     } else if (VT == MVT::f16) {
2988       SrcReg = Registers[2][FPRIdx++];
2989       RC = &AArch64::FPR16RegClass;
2990     } else if (VT ==  MVT::f32) {
2991       SrcReg = Registers[3][FPRIdx++];
2992       RC = &AArch64::FPR32RegClass;
2993     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2994       SrcReg = Registers[4][FPRIdx++];
2995       RC = &AArch64::FPR64RegClass;
2996     } else if (VT.is128BitVector()) {
2997       SrcReg = Registers[5][FPRIdx++];
2998       RC = &AArch64::FPR128RegClass;
2999     } else
3000       llvm_unreachable("Unexpected value type.");
3001
3002     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3003     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3004     // Without this, EmitLiveInCopies may eliminate the livein if its only
3005     // use is a bitcast (which isn't turned into an instruction).
3006     unsigned ResultReg = createResultReg(RC);
3007     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3008             TII.get(TargetOpcode::COPY), ResultReg)
3009         .addReg(DstReg, getKillRegState(true));
3010     updateValueMap(&Arg, ResultReg);
3011   }
3012   return true;
3013 }
3014
3015 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3016                                       SmallVectorImpl<MVT> &OutVTs,
3017                                       unsigned &NumBytes) {
3018   CallingConv::ID CC = CLI.CallConv;
3019   SmallVector<CCValAssign, 16> ArgLocs;
3020   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3021   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3022
3023   // Get a count of how many bytes are to be pushed on the stack.
3024   NumBytes = CCInfo.getNextStackOffset();
3025
3026   // Issue CALLSEQ_START
3027   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3028   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3029     .addImm(NumBytes).addImm(0);
3030
3031   // Process the args.
3032   for (CCValAssign &VA : ArgLocs) {
3033     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3034     MVT ArgVT = OutVTs[VA.getValNo()];
3035
3036     unsigned ArgReg = getRegForValue(ArgVal);
3037     if (!ArgReg)
3038       return false;
3039
3040     // Handle arg promotion: SExt, ZExt, AExt.
3041     switch (VA.getLocInfo()) {
3042     case CCValAssign::Full:
3043       break;
3044     case CCValAssign::SExt: {
3045       MVT DestVT = VA.getLocVT();
3046       MVT SrcVT = ArgVT;
3047       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3048       if (!ArgReg)
3049         return false;
3050       break;
3051     }
3052     case CCValAssign::AExt:
3053     // Intentional fall-through.
3054     case CCValAssign::ZExt: {
3055       MVT DestVT = VA.getLocVT();
3056       MVT SrcVT = ArgVT;
3057       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3058       if (!ArgReg)
3059         return false;
3060       break;
3061     }
3062     default:
3063       llvm_unreachable("Unknown arg promotion!");
3064     }
3065
3066     // Now copy/store arg to correct locations.
3067     if (VA.isRegLoc() && !VA.needsCustom()) {
3068       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3069               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3070       CLI.OutRegs.push_back(VA.getLocReg());
3071     } else if (VA.needsCustom()) {
3072       // FIXME: Handle custom args.
3073       return false;
3074     } else {
3075       assert(VA.isMemLoc() && "Assuming store on stack.");
3076
3077       // Don't emit stores for undef values.
3078       if (isa<UndefValue>(ArgVal))
3079         continue;
3080
3081       // Need to store on the stack.
3082       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3083
3084       unsigned BEAlign = 0;
3085       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3086         BEAlign = 8 - ArgSize;
3087
3088       Address Addr;
3089       Addr.setKind(Address::RegBase);
3090       Addr.setReg(AArch64::SP);
3091       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3092
3093       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3094       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3095           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3096           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3097
3098       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3099         return false;
3100     }
3101   }
3102   return true;
3103 }
3104
3105 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3106                                  unsigned NumBytes) {
3107   CallingConv::ID CC = CLI.CallConv;
3108
3109   // Issue CALLSEQ_END
3110   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3112     .addImm(NumBytes).addImm(0);
3113
3114   // Now the return value.
3115   if (RetVT != MVT::isVoid) {
3116     SmallVector<CCValAssign, 16> RVLocs;
3117     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3118     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3119
3120     // Only handle a single return value.
3121     if (RVLocs.size() != 1)
3122       return false;
3123
3124     // Copy all of the result registers out of their specified physreg.
3125     MVT CopyVT = RVLocs[0].getValVT();
3126
3127     // TODO: Handle big-endian results
3128     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3129       return false;
3130
3131     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3132     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3133             TII.get(TargetOpcode::COPY), ResultReg)
3134         .addReg(RVLocs[0].getLocReg());
3135     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3136
3137     CLI.ResultReg = ResultReg;
3138     CLI.NumResultRegs = 1;
3139   }
3140
3141   return true;
3142 }
3143
3144 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3145   CallingConv::ID CC  = CLI.CallConv;
3146   bool IsTailCall     = CLI.IsTailCall;
3147   bool IsVarArg       = CLI.IsVarArg;
3148   const Value *Callee = CLI.Callee;
3149   MCSymbol *Symbol = CLI.Symbol;
3150
3151   if (!Callee && !Symbol)
3152     return false;
3153
3154   // Allow SelectionDAG isel to handle tail calls.
3155   if (IsTailCall)
3156     return false;
3157
3158   CodeModel::Model CM = TM.getCodeModel();
3159   // Only support the small-addressing and large code models.
3160   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3161     return false;
3162
3163   // FIXME: Add large code model support for ELF.
3164   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3165     return false;
3166
3167   // Let SDISel handle vararg functions.
3168   if (IsVarArg)
3169     return false;
3170
3171   // FIXME: Only handle *simple* calls for now.
3172   MVT RetVT;
3173   if (CLI.RetTy->isVoidTy())
3174     RetVT = MVT::isVoid;
3175   else if (!isTypeLegal(CLI.RetTy, RetVT))
3176     return false;
3177
3178   for (auto Flag : CLI.OutFlags)
3179     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3180         Flag.isSwiftSelf() || Flag.isSwiftError())
3181       return false;
3182
3183   // Set up the argument vectors.
3184   SmallVector<MVT, 16> OutVTs;
3185   OutVTs.reserve(CLI.OutVals.size());
3186
3187   for (auto *Val : CLI.OutVals) {
3188     MVT VT;
3189     if (!isTypeLegal(Val->getType(), VT) &&
3190         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3191       return false;
3192
3193     // We don't handle vector parameters yet.
3194     if (VT.isVector() || VT.getSizeInBits() > 64)
3195       return false;
3196
3197     OutVTs.push_back(VT);
3198   }
3199
3200   Address Addr;
3201   if (Callee && !computeCallAddress(Callee, Addr))
3202     return false;
3203
3204   // Handle the arguments now that we've gotten them.
3205   unsigned NumBytes;
3206   if (!processCallArgs(CLI, OutVTs, NumBytes))
3207     return false;
3208
3209   // Issue the call.
3210   MachineInstrBuilder MIB;
3211   if (Subtarget->useSmallAddressing()) {
3212     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3213     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3214     if (Symbol)
3215       MIB.addSym(Symbol, 0);
3216     else if (Addr.getGlobalValue())
3217       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3218     else if (Addr.getReg()) {
3219       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3220       MIB.addReg(Reg);
3221     } else
3222       return false;
3223   } else {
3224     unsigned CallReg = 0;
3225     if (Symbol) {
3226       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3227       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3228               ADRPReg)
3229           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3230
3231       CallReg = createResultReg(&AArch64::GPR64RegClass);
3232       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3233               TII.get(AArch64::LDRXui), CallReg)
3234           .addReg(ADRPReg)
3235           .addSym(Symbol,
3236                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3237     } else if (Addr.getGlobalValue())
3238       CallReg = materializeGV(Addr.getGlobalValue());
3239     else if (Addr.getReg())
3240       CallReg = Addr.getReg();
3241
3242     if (!CallReg)
3243       return false;
3244
3245     const MCInstrDesc &II = TII.get(AArch64::BLR);
3246     CallReg = constrainOperandRegClass(II, CallReg, 0);
3247     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3248   }
3249
3250   // Add implicit physical register uses to the call.
3251   for (auto Reg : CLI.OutRegs)
3252     MIB.addReg(Reg, RegState::Implicit);
3253
3254   // Add a register mask with the call-preserved registers.
3255   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3256   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3257
3258   CLI.Call = MIB;
3259
3260   // Finish off the call including any return values.
3261   return finishCall(CLI, RetVT, NumBytes);
3262 }
3263
3264 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3265   if (Alignment)
3266     return Len / Alignment <= 4;
3267   else
3268     return Len < 32;
3269 }
3270
3271 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3272                                          uint64_t Len, unsigned Alignment) {
3273   // Make sure we don't bloat code by inlining very large memcpy's.
3274   if (!isMemCpySmall(Len, Alignment))
3275     return false;
3276
3277   int64_t UnscaledOffset = 0;
3278   Address OrigDest = Dest;
3279   Address OrigSrc = Src;
3280
3281   while (Len) {
3282     MVT VT;
3283     if (!Alignment || Alignment >= 8) {
3284       if (Len >= 8)
3285         VT = MVT::i64;
3286       else if (Len >= 4)
3287         VT = MVT::i32;
3288       else if (Len >= 2)
3289         VT = MVT::i16;
3290       else {
3291         VT = MVT::i8;
3292       }
3293     } else {
3294       // Bound based on alignment.
3295       if (Len >= 4 && Alignment == 4)
3296         VT = MVT::i32;
3297       else if (Len >= 2 && Alignment == 2)
3298         VT = MVT::i16;
3299       else {
3300         VT = MVT::i8;
3301       }
3302     }
3303
3304     unsigned ResultReg = emitLoad(VT, VT, Src);
3305     if (!ResultReg)
3306       return false;
3307
3308     if (!emitStore(VT, ResultReg, Dest))
3309       return false;
3310
3311     int64_t Size = VT.getSizeInBits() / 8;
3312     Len -= Size;
3313     UnscaledOffset += Size;
3314
3315     // We need to recompute the unscaled offset for each iteration.
3316     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3317     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3318   }
3319
3320   return true;
3321 }
3322
3323 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3324 /// into the user. The condition code will only be updated on success.
3325 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3326                                         const Instruction *I,
3327                                         const Value *Cond) {
3328   if (!isa<ExtractValueInst>(Cond))
3329     return false;
3330
3331   const auto *EV = cast<ExtractValueInst>(Cond);
3332   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3333     return false;
3334
3335   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3336   MVT RetVT;
3337   const Function *Callee = II->getCalledFunction();
3338   Type *RetTy =
3339   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3340   if (!isTypeLegal(RetTy, RetVT))
3341     return false;
3342
3343   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3344     return false;
3345
3346   const Value *LHS = II->getArgOperand(0);
3347   const Value *RHS = II->getArgOperand(1);
3348
3349   // Canonicalize immediate to the RHS.
3350   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3351       isCommutativeIntrinsic(II))
3352     std::swap(LHS, RHS);
3353
3354   // Simplify multiplies.
3355   Intrinsic::ID IID = II->getIntrinsicID();
3356   switch (IID) {
3357   default:
3358     break;
3359   case Intrinsic::smul_with_overflow:
3360     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3361       if (C->getValue() == 2)
3362         IID = Intrinsic::sadd_with_overflow;
3363     break;
3364   case Intrinsic::umul_with_overflow:
3365     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3366       if (C->getValue() == 2)
3367         IID = Intrinsic::uadd_with_overflow;
3368     break;
3369   }
3370
3371   AArch64CC::CondCode TmpCC;
3372   switch (IID) {
3373   default:
3374     return false;
3375   case Intrinsic::sadd_with_overflow:
3376   case Intrinsic::ssub_with_overflow:
3377     TmpCC = AArch64CC::VS;
3378     break;
3379   case Intrinsic::uadd_with_overflow:
3380     TmpCC = AArch64CC::HS;
3381     break;
3382   case Intrinsic::usub_with_overflow:
3383     TmpCC = AArch64CC::LO;
3384     break;
3385   case Intrinsic::smul_with_overflow:
3386   case Intrinsic::umul_with_overflow:
3387     TmpCC = AArch64CC::NE;
3388     break;
3389   }
3390
3391   // Check if both instructions are in the same basic block.
3392   if (!isValueAvailable(II))
3393     return false;
3394
3395   // Make sure nothing is in the way
3396   BasicBlock::const_iterator Start(I);
3397   BasicBlock::const_iterator End(II);
3398   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3399     // We only expect extractvalue instructions between the intrinsic and the
3400     // instruction to be selected.
3401     if (!isa<ExtractValueInst>(Itr))
3402       return false;
3403
3404     // Check that the extractvalue operand comes from the intrinsic.
3405     const auto *EVI = cast<ExtractValueInst>(Itr);
3406     if (EVI->getAggregateOperand() != II)
3407       return false;
3408   }
3409
3410   CC = TmpCC;
3411   return true;
3412 }
3413
3414 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3415   // FIXME: Handle more intrinsics.
3416   switch (II->getIntrinsicID()) {
3417   default: return false;
3418   case Intrinsic::frameaddress: {
3419     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3420     MFI.setFrameAddressIsTaken(true);
3421
3422     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3423     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3424     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3425     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3427     // Recursively load frame address
3428     // ldr x0, [fp]
3429     // ldr x0, [x0]
3430     // ldr x0, [x0]
3431     // ...
3432     unsigned DestReg;
3433     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3434     while (Depth--) {
3435       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3436                                 SrcReg, /*IsKill=*/true, 0);
3437       assert(DestReg && "Unexpected LDR instruction emission failure.");
3438       SrcReg = DestReg;
3439     }
3440
3441     updateValueMap(II, SrcReg);
3442     return true;
3443   }
3444   case Intrinsic::memcpy:
3445   case Intrinsic::memmove: {
3446     const auto *MTI = cast<MemTransferInst>(II);
3447     // Don't handle volatile.
3448     if (MTI->isVolatile())
3449       return false;
3450
3451     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3452     // we would emit dead code because we don't currently handle memmoves.
3453     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3454     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3455       // Small memcpy's are common enough that we want to do them without a call
3456       // if possible.
3457       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3458       unsigned Alignment = MTI->getAlignment();
3459       if (isMemCpySmall(Len, Alignment)) {
3460         Address Dest, Src;
3461         if (!computeAddress(MTI->getRawDest(), Dest) ||
3462             !computeAddress(MTI->getRawSource(), Src))
3463           return false;
3464         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3465           return true;
3466       }
3467     }
3468
3469     if (!MTI->getLength()->getType()->isIntegerTy(64))
3470       return false;
3471
3472     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3473       // Fast instruction selection doesn't support the special
3474       // address spaces.
3475       return false;
3476
3477     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3478     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3479   }
3480   case Intrinsic::memset: {
3481     const MemSetInst *MSI = cast<MemSetInst>(II);
3482     // Don't handle volatile.
3483     if (MSI->isVolatile())
3484       return false;
3485
3486     if (!MSI->getLength()->getType()->isIntegerTy(64))
3487       return false;
3488
3489     if (MSI->getDestAddressSpace() > 255)
3490       // Fast instruction selection doesn't support the special
3491       // address spaces.
3492       return false;
3493
3494     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3495   }
3496   case Intrinsic::sin:
3497   case Intrinsic::cos:
3498   case Intrinsic::pow: {
3499     MVT RetVT;
3500     if (!isTypeLegal(II->getType(), RetVT))
3501       return false;
3502
3503     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3504       return false;
3505
3506     static const RTLIB::Libcall LibCallTable[3][2] = {
3507       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3508       { RTLIB::COS_F32, RTLIB::COS_F64 },
3509       { RTLIB::POW_F32, RTLIB::POW_F64 }
3510     };
3511     RTLIB::Libcall LC;
3512     bool Is64Bit = RetVT == MVT::f64;
3513     switch (II->getIntrinsicID()) {
3514     default:
3515       llvm_unreachable("Unexpected intrinsic.");
3516     case Intrinsic::sin:
3517       LC = LibCallTable[0][Is64Bit];
3518       break;
3519     case Intrinsic::cos:
3520       LC = LibCallTable[1][Is64Bit];
3521       break;
3522     case Intrinsic::pow:
3523       LC = LibCallTable[2][Is64Bit];
3524       break;
3525     }
3526
3527     ArgListTy Args;
3528     Args.reserve(II->getNumArgOperands());
3529
3530     // Populate the argument list.
3531     for (auto &Arg : II->arg_operands()) {
3532       ArgListEntry Entry;
3533       Entry.Val = Arg;
3534       Entry.Ty = Arg->getType();
3535       Args.push_back(Entry);
3536     }
3537
3538     CallLoweringInfo CLI;
3539     MCContext &Ctx = MF->getContext();
3540     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3541                   TLI.getLibcallName(LC), std::move(Args));
3542     if (!lowerCallTo(CLI))
3543       return false;
3544     updateValueMap(II, CLI.ResultReg);
3545     return true;
3546   }
3547   case Intrinsic::fabs: {
3548     MVT VT;
3549     if (!isTypeLegal(II->getType(), VT))
3550       return false;
3551
3552     unsigned Opc;
3553     switch (VT.SimpleTy) {
3554     default:
3555       return false;
3556     case MVT::f32:
3557       Opc = AArch64::FABSSr;
3558       break;
3559     case MVT::f64:
3560       Opc = AArch64::FABSDr;
3561       break;
3562     }
3563     unsigned SrcReg = getRegForValue(II->getOperand(0));
3564     if (!SrcReg)
3565       return false;
3566     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3567     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3568     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3569       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3570     updateValueMap(II, ResultReg);
3571     return true;
3572   }
3573   case Intrinsic::trap:
3574     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3575         .addImm(1);
3576     return true;
3577
3578   case Intrinsic::sqrt: {
3579     Type *RetTy = II->getCalledFunction()->getReturnType();
3580
3581     MVT VT;
3582     if (!isTypeLegal(RetTy, VT))
3583       return false;
3584
3585     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3586     if (!Op0Reg)
3587       return false;
3588     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3589
3590     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3591     if (!ResultReg)
3592       return false;
3593
3594     updateValueMap(II, ResultReg);
3595     return true;
3596   }
3597   case Intrinsic::sadd_with_overflow:
3598   case Intrinsic::uadd_with_overflow:
3599   case Intrinsic::ssub_with_overflow:
3600   case Intrinsic::usub_with_overflow:
3601   case Intrinsic::smul_with_overflow:
3602   case Intrinsic::umul_with_overflow: {
3603     // This implements the basic lowering of the xalu with overflow intrinsics.
3604     const Function *Callee = II->getCalledFunction();
3605     auto *Ty = cast<StructType>(Callee->getReturnType());
3606     Type *RetTy = Ty->getTypeAtIndex(0U);
3607
3608     MVT VT;
3609     if (!isTypeLegal(RetTy, VT))
3610       return false;
3611
3612     if (VT != MVT::i32 && VT != MVT::i64)
3613       return false;
3614
3615     const Value *LHS = II->getArgOperand(0);
3616     const Value *RHS = II->getArgOperand(1);
3617     // Canonicalize immediate to the RHS.
3618     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3619         isCommutativeIntrinsic(II))
3620       std::swap(LHS, RHS);
3621
3622     // Simplify multiplies.
3623     Intrinsic::ID IID = II->getIntrinsicID();
3624     switch (IID) {
3625     default:
3626       break;
3627     case Intrinsic::smul_with_overflow:
3628       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3629         if (C->getValue() == 2) {
3630           IID = Intrinsic::sadd_with_overflow;
3631           RHS = LHS;
3632         }
3633       break;
3634     case Intrinsic::umul_with_overflow:
3635       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3636         if (C->getValue() == 2) {
3637           IID = Intrinsic::uadd_with_overflow;
3638           RHS = LHS;
3639         }
3640       break;
3641     }
3642
3643     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3644     AArch64CC::CondCode CC = AArch64CC::Invalid;
3645     switch (IID) {
3646     default: llvm_unreachable("Unexpected intrinsic!");
3647     case Intrinsic::sadd_with_overflow:
3648       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3649       CC = AArch64CC::VS;
3650       break;
3651     case Intrinsic::uadd_with_overflow:
3652       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3653       CC = AArch64CC::HS;
3654       break;
3655     case Intrinsic::ssub_with_overflow:
3656       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3657       CC = AArch64CC::VS;
3658       break;
3659     case Intrinsic::usub_with_overflow:
3660       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3661       CC = AArch64CC::LO;
3662       break;
3663     case Intrinsic::smul_with_overflow: {
3664       CC = AArch64CC::NE;
3665       unsigned LHSReg = getRegForValue(LHS);
3666       if (!LHSReg)
3667         return false;
3668       bool LHSIsKill = hasTrivialKill(LHS);
3669
3670       unsigned RHSReg = getRegForValue(RHS);
3671       if (!RHSReg)
3672         return false;
3673       bool RHSIsKill = hasTrivialKill(RHS);
3674
3675       if (VT == MVT::i32) {
3676         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3677         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3678                                        /*IsKill=*/false, 32);
3679         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3680                                             AArch64::sub_32);
3681         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3682                                               AArch64::sub_32);
3683         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3684                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3685       } else {
3686         assert(VT == MVT::i64 && "Unexpected value type.");
3687         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3688         // reused in the next instruction.
3689         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3690                             /*IsKill=*/false);
3691         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3692                                         RHSReg, RHSIsKill);
3693         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3694                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3695       }
3696       break;
3697     }
3698     case Intrinsic::umul_with_overflow: {
3699       CC = AArch64CC::NE;
3700       unsigned LHSReg = getRegForValue(LHS);
3701       if (!LHSReg)
3702         return false;
3703       bool LHSIsKill = hasTrivialKill(LHS);
3704
3705       unsigned RHSReg = getRegForValue(RHS);
3706       if (!RHSReg)
3707         return false;
3708       bool RHSIsKill = hasTrivialKill(RHS);
3709
3710       if (VT == MVT::i32) {
3711         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3712         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3713                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3714                     /*WantResult=*/false);
3715         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3716                                             AArch64::sub_32);
3717       } else {
3718         assert(VT == MVT::i64 && "Unexpected value type.");
3719         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3720         // reused in the next instruction.
3721         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3722                             /*IsKill=*/false);
3723         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3724                                         RHSReg, RHSIsKill);
3725         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3726                     /*IsKill=*/false, /*WantResult=*/false);
3727       }
3728       break;
3729     }
3730     }
3731
3732     if (MulReg) {
3733       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3734       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3735               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3736     }
3737
3738     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3739                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3740                                   /*IsKill=*/true, getInvertedCondCode(CC));
3741     (void)ResultReg2;
3742     assert((ResultReg1 + 1) == ResultReg2 &&
3743            "Nonconsecutive result registers.");
3744     updateValueMap(II, ResultReg1, 2);
3745     return true;
3746   }
3747   }
3748   return false;
3749 }
3750
3751 bool AArch64FastISel::selectRet(const Instruction *I) {
3752   const ReturnInst *Ret = cast<ReturnInst>(I);
3753   const Function &F = *I->getParent()->getParent();
3754
3755   if (!FuncInfo.CanLowerReturn)
3756     return false;
3757
3758   if (F.isVarArg())
3759     return false;
3760
3761   if (TLI.supportSwiftError() &&
3762       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3763     return false;
3764
3765   if (TLI.supportSplitCSR(FuncInfo.MF))
3766     return false;
3767
3768   // Build a list of return value registers.
3769   SmallVector<unsigned, 4> RetRegs;
3770
3771   if (Ret->getNumOperands() > 0) {
3772     CallingConv::ID CC = F.getCallingConv();
3773     SmallVector<ISD::OutputArg, 4> Outs;
3774     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3775
3776     // Analyze operands of the call, assigning locations to each operand.
3777     SmallVector<CCValAssign, 16> ValLocs;
3778     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3779     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3780                                                      : RetCC_AArch64_AAPCS;
3781     CCInfo.AnalyzeReturn(Outs, RetCC);
3782
3783     // Only handle a single return value for now.
3784     if (ValLocs.size() != 1)
3785       return false;
3786
3787     CCValAssign &VA = ValLocs[0];
3788     const Value *RV = Ret->getOperand(0);
3789
3790     // Don't bother handling odd stuff for now.
3791     if ((VA.getLocInfo() != CCValAssign::Full) &&
3792         (VA.getLocInfo() != CCValAssign::BCvt))
3793       return false;
3794
3795     // Only handle register returns for now.
3796     if (!VA.isRegLoc())
3797       return false;
3798
3799     unsigned Reg = getRegForValue(RV);
3800     if (Reg == 0)
3801       return false;
3802
3803     unsigned SrcReg = Reg + VA.getValNo();
3804     unsigned DestReg = VA.getLocReg();
3805     // Avoid a cross-class copy. This is very unlikely.
3806     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3807       return false;
3808
3809     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3810     if (!RVEVT.isSimple())
3811       return false;
3812
3813     // Vectors (of > 1 lane) in big endian need tricky handling.
3814     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3815         !Subtarget->isLittleEndian())
3816       return false;
3817
3818     MVT RVVT = RVEVT.getSimpleVT();
3819     if (RVVT == MVT::f128)
3820       return false;
3821
3822     MVT DestVT = VA.getValVT();
3823     // Special handling for extended integers.
3824     if (RVVT != DestVT) {
3825       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3826         return false;
3827
3828       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3829         return false;
3830
3831       bool IsZExt = Outs[0].Flags.isZExt();
3832       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3833       if (SrcReg == 0)
3834         return false;
3835     }
3836
3837     // Make the copy.
3838     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3839             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3840
3841     // Add register to return instruction.
3842     RetRegs.push_back(VA.getLocReg());
3843   }
3844
3845   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3846                                     TII.get(AArch64::RET_ReallyLR));
3847   for (unsigned RetReg : RetRegs)
3848     MIB.addReg(RetReg, RegState::Implicit);
3849   return true;
3850 }
3851
3852 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3853   Type *DestTy = I->getType();
3854   Value *Op = I->getOperand(0);
3855   Type *SrcTy = Op->getType();
3856
3857   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3858   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3859   if (!SrcEVT.isSimple())
3860     return false;
3861   if (!DestEVT.isSimple())
3862     return false;
3863
3864   MVT SrcVT = SrcEVT.getSimpleVT();
3865   MVT DestVT = DestEVT.getSimpleVT();
3866
3867   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3868       SrcVT != MVT::i8)
3869     return false;
3870   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3871       DestVT != MVT::i1)
3872     return false;
3873
3874   unsigned SrcReg = getRegForValue(Op);
3875   if (!SrcReg)
3876     return false;
3877   bool SrcIsKill = hasTrivialKill(Op);
3878
3879   // If we're truncating from i64 to a smaller non-legal type then generate an
3880   // AND. Otherwise, we know the high bits are undefined and a truncate only
3881   // generate a COPY. We cannot mark the source register also as result
3882   // register, because this can incorrectly transfer the kill flag onto the
3883   // source register.
3884   unsigned ResultReg;
3885   if (SrcVT == MVT::i64) {
3886     uint64_t Mask = 0;
3887     switch (DestVT.SimpleTy) {
3888     default:
3889       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3890       return false;
3891     case MVT::i1:
3892       Mask = 0x1;
3893       break;
3894     case MVT::i8:
3895       Mask = 0xff;
3896       break;
3897     case MVT::i16:
3898       Mask = 0xffff;
3899       break;
3900     }
3901     // Issue an extract_subreg to get the lower 32-bits.
3902     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3903                                                 AArch64::sub_32);
3904     // Create the AND instruction which performs the actual truncation.
3905     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3906     assert(ResultReg && "Unexpected AND instruction emission failure.");
3907   } else {
3908     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3909     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3910             TII.get(TargetOpcode::COPY), ResultReg)
3911         .addReg(SrcReg, getKillRegState(SrcIsKill));
3912   }
3913
3914   updateValueMap(I, ResultReg);
3915   return true;
3916 }
3917
3918 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3919   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3920           DestVT == MVT::i64) &&
3921          "Unexpected value type.");
3922   // Handle i8 and i16 as i32.
3923   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3924     DestVT = MVT::i32;
3925
3926   if (IsZExt) {
3927     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3928     assert(ResultReg && "Unexpected AND instruction emission failure.");
3929     if (DestVT == MVT::i64) {
3930       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3931       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3932       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3933       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3934               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3935           .addImm(0)
3936           .addReg(ResultReg)
3937           .addImm(AArch64::sub_32);
3938       ResultReg = Reg64;
3939     }
3940     return ResultReg;
3941   } else {
3942     if (DestVT == MVT::i64) {
3943       // FIXME: We're SExt i1 to i64.
3944       return 0;
3945     }
3946     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3947                             /*TODO:IsKill=*/false, 0, 0);
3948   }
3949 }
3950
3951 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3952                                       unsigned Op1, bool Op1IsKill) {
3953   unsigned Opc, ZReg;
3954   switch (RetVT.SimpleTy) {
3955   default: return 0;
3956   case MVT::i8:
3957   case MVT::i16:
3958   case MVT::i32:
3959     RetVT = MVT::i32;
3960     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3961   case MVT::i64:
3962     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3963   }
3964
3965   const TargetRegisterClass *RC =
3966       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3967   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3968                           /*IsKill=*/ZReg, true);
3969 }
3970
3971 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3972                                         unsigned Op1, bool Op1IsKill) {
3973   if (RetVT != MVT::i64)
3974     return 0;
3975
3976   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3977                           Op0, Op0IsKill, Op1, Op1IsKill,
3978                           AArch64::XZR, /*IsKill=*/true);
3979 }
3980
3981 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3982                                         unsigned Op1, bool Op1IsKill) {
3983   if (RetVT != MVT::i64)
3984     return 0;
3985
3986   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3987                           Op0, Op0IsKill, Op1, Op1IsKill,
3988                           AArch64::XZR, /*IsKill=*/true);
3989 }
3990
3991 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3992                                      unsigned Op1Reg, bool Op1IsKill) {
3993   unsigned Opc = 0;
3994   bool NeedTrunc = false;
3995   uint64_t Mask = 0;
3996   switch (RetVT.SimpleTy) {
3997   default: return 0;
3998   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3999   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4000   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4001   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4002   }
4003
4004   const TargetRegisterClass *RC =
4005       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4006   if (NeedTrunc) {
4007     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4008     Op1IsKill = true;
4009   }
4010   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4011                                        Op1IsKill);
4012   if (NeedTrunc)
4013     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4014   return ResultReg;
4015 }
4016
4017 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4018                                      bool Op0IsKill, uint64_t Shift,
4019                                      bool IsZExt) {
4020   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4021          "Unexpected source/return type pair.");
4022   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4023           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4024          "Unexpected source value type.");
4025   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4026           RetVT == MVT::i64) && "Unexpected return value type.");
4027
4028   bool Is64Bit = (RetVT == MVT::i64);
4029   unsigned RegSize = Is64Bit ? 64 : 32;
4030   unsigned DstBits = RetVT.getSizeInBits();
4031   unsigned SrcBits = SrcVT.getSizeInBits();
4032   const TargetRegisterClass *RC =
4033       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4034
4035   // Just emit a copy for "zero" shifts.
4036   if (Shift == 0) {
4037     if (RetVT == SrcVT) {
4038       unsigned ResultReg = createResultReg(RC);
4039       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4040               TII.get(TargetOpcode::COPY), ResultReg)
4041           .addReg(Op0, getKillRegState(Op0IsKill));
4042       return ResultReg;
4043     } else
4044       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4045   }
4046
4047   // Don't deal with undefined shifts.
4048   if (Shift >= DstBits)
4049     return 0;
4050
4051   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4052   // {S|U}BFM Wd, Wn, #r, #s
4053   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4054
4055   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4056   // %2 = shl i16 %1, 4
4057   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4058   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4059   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4060   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4061
4062   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4063   // %2 = shl i16 %1, 8
4064   // Wd<32+7-24,32-24> = Wn<7:0>
4065   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4066   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4067   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4068
4069   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4070   // %2 = shl i16 %1, 12
4071   // Wd<32+3-20,32-20> = Wn<3:0>
4072   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4073   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4074   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4075
4076   unsigned ImmR = RegSize - Shift;
4077   // Limit the width to the length of the source type.
4078   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4079   static const unsigned OpcTable[2][2] = {
4080     {AArch64::SBFMWri, AArch64::SBFMXri},
4081     {AArch64::UBFMWri, AArch64::UBFMXri}
4082   };
4083   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4084   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4085     unsigned TmpReg = MRI.createVirtualRegister(RC);
4086     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4087             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4088         .addImm(0)
4089         .addReg(Op0, getKillRegState(Op0IsKill))
4090         .addImm(AArch64::sub_32);
4091     Op0 = TmpReg;
4092     Op0IsKill = true;
4093   }
4094   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4095 }
4096
4097 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4098                                      unsigned Op1Reg, bool Op1IsKill) {
4099   unsigned Opc = 0;
4100   bool NeedTrunc = false;
4101   uint64_t Mask = 0;
4102   switch (RetVT.SimpleTy) {
4103   default: return 0;
4104   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4105   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4106   case MVT::i32: Opc = AArch64::LSRVWr; break;
4107   case MVT::i64: Opc = AArch64::LSRVXr; break;
4108   }
4109
4110   const TargetRegisterClass *RC =
4111       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4112   if (NeedTrunc) {
4113     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4114     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4115     Op0IsKill = Op1IsKill = true;
4116   }
4117   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4118                                        Op1IsKill);
4119   if (NeedTrunc)
4120     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4121   return ResultReg;
4122 }
4123
4124 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4125                                      bool Op0IsKill, uint64_t Shift,
4126                                      bool IsZExt) {
4127   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4128          "Unexpected source/return type pair.");
4129   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4130           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4131          "Unexpected source value type.");
4132   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4133           RetVT == MVT::i64) && "Unexpected return value type.");
4134
4135   bool Is64Bit = (RetVT == MVT::i64);
4136   unsigned RegSize = Is64Bit ? 64 : 32;
4137   unsigned DstBits = RetVT.getSizeInBits();
4138   unsigned SrcBits = SrcVT.getSizeInBits();
4139   const TargetRegisterClass *RC =
4140       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4141
4142   // Just emit a copy for "zero" shifts.
4143   if (Shift == 0) {
4144     if (RetVT == SrcVT) {
4145       unsigned ResultReg = createResultReg(RC);
4146       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4147               TII.get(TargetOpcode::COPY), ResultReg)
4148       .addReg(Op0, getKillRegState(Op0IsKill));
4149       return ResultReg;
4150     } else
4151       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4152   }
4153
4154   // Don't deal with undefined shifts.
4155   if (Shift >= DstBits)
4156     return 0;
4157
4158   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4159   // {S|U}BFM Wd, Wn, #r, #s
4160   // Wd<s-r:0> = Wn<s:r> when r <= s
4161
4162   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4163   // %2 = lshr i16 %1, 4
4164   // Wd<7-4:0> = Wn<7:4>
4165   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4166   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4167   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4168
4169   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4170   // %2 = lshr i16 %1, 8
4171   // Wd<7-7,0> = Wn<7:7>
4172   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4173   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4174   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4175
4176   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4177   // %2 = lshr i16 %1, 12
4178   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4179   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4180   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4181   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4182
4183   if (Shift >= SrcBits && IsZExt)
4184     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4185
4186   // It is not possible to fold a sign-extend into the LShr instruction. In this
4187   // case emit a sign-extend.
4188   if (!IsZExt) {
4189     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4190     if (!Op0)
4191       return 0;
4192     Op0IsKill = true;
4193     SrcVT = RetVT;
4194     SrcBits = SrcVT.getSizeInBits();
4195     IsZExt = true;
4196   }
4197
4198   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4199   unsigned ImmS = SrcBits - 1;
4200   static const unsigned OpcTable[2][2] = {
4201     {AArch64::SBFMWri, AArch64::SBFMXri},
4202     {AArch64::UBFMWri, AArch64::UBFMXri}
4203   };
4204   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4205   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4206     unsigned TmpReg = MRI.createVirtualRegister(RC);
4207     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4208             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4209         .addImm(0)
4210         .addReg(Op0, getKillRegState(Op0IsKill))
4211         .addImm(AArch64::sub_32);
4212     Op0 = TmpReg;
4213     Op0IsKill = true;
4214   }
4215   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4216 }
4217
4218 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4219                                      unsigned Op1Reg, bool Op1IsKill) {
4220   unsigned Opc = 0;
4221   bool NeedTrunc = false;
4222   uint64_t Mask = 0;
4223   switch (RetVT.SimpleTy) {
4224   default: return 0;
4225   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4226   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4227   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4228   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4229   }
4230
4231   const TargetRegisterClass *RC =
4232       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4233   if (NeedTrunc) {
4234     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4235     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4236     Op0IsKill = Op1IsKill = true;
4237   }
4238   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4239                                        Op1IsKill);
4240   if (NeedTrunc)
4241     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4242   return ResultReg;
4243 }
4244
4245 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4246                                      bool Op0IsKill, uint64_t Shift,
4247                                      bool IsZExt) {
4248   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4249          "Unexpected source/return type pair.");
4250   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4251           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4252          "Unexpected source value type.");
4253   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4254           RetVT == MVT::i64) && "Unexpected return value type.");
4255
4256   bool Is64Bit = (RetVT == MVT::i64);
4257   unsigned RegSize = Is64Bit ? 64 : 32;
4258   unsigned DstBits = RetVT.getSizeInBits();
4259   unsigned SrcBits = SrcVT.getSizeInBits();
4260   const TargetRegisterClass *RC =
4261       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4262
4263   // Just emit a copy for "zero" shifts.
4264   if (Shift == 0) {
4265     if (RetVT == SrcVT) {
4266       unsigned ResultReg = createResultReg(RC);
4267       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4268               TII.get(TargetOpcode::COPY), ResultReg)
4269       .addReg(Op0, getKillRegState(Op0IsKill));
4270       return ResultReg;
4271     } else
4272       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273   }
4274
4275   // Don't deal with undefined shifts.
4276   if (Shift >= DstBits)
4277     return 0;
4278
4279   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4280   // {S|U}BFM Wd, Wn, #r, #s
4281   // Wd<s-r:0> = Wn<s:r> when r <= s
4282
4283   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4284   // %2 = ashr i16 %1, 4
4285   // Wd<7-4:0> = Wn<7:4>
4286   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4287   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4288   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4289
4290   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4291   // %2 = ashr i16 %1, 8
4292   // Wd<7-7,0> = Wn<7:7>
4293   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4294   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4295   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4296
4297   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4298   // %2 = ashr i16 %1, 12
4299   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4300   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4301   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4302   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4303
4304   if (Shift >= SrcBits && IsZExt)
4305     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4306
4307   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4308   unsigned ImmS = SrcBits - 1;
4309   static const unsigned OpcTable[2][2] = {
4310     {AArch64::SBFMWri, AArch64::SBFMXri},
4311     {AArch64::UBFMWri, AArch64::UBFMXri}
4312   };
4313   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4314   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4315     unsigned TmpReg = MRI.createVirtualRegister(RC);
4316     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4317             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4318         .addImm(0)
4319         .addReg(Op0, getKillRegState(Op0IsKill))
4320         .addImm(AArch64::sub_32);
4321     Op0 = TmpReg;
4322     Op0IsKill = true;
4323   }
4324   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4325 }
4326
4327 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4328                                      bool IsZExt) {
4329   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4330
4331   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4332   // DestVT are odd things, so test to make sure that they are both types we can
4333   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4334   // bail out to SelectionDAG.
4335   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4336        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4337       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4338        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4339     return 0;
4340
4341   unsigned Opc;
4342   unsigned Imm = 0;
4343
4344   switch (SrcVT.SimpleTy) {
4345   default:
4346     return 0;
4347   case MVT::i1:
4348     return emiti1Ext(SrcReg, DestVT, IsZExt);
4349   case MVT::i8:
4350     if (DestVT == MVT::i64)
4351       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4352     else
4353       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4354     Imm = 7;
4355     break;
4356   case MVT::i16:
4357     if (DestVT == MVT::i64)
4358       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4359     else
4360       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4361     Imm = 15;
4362     break;
4363   case MVT::i32:
4364     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4365     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4366     Imm = 31;
4367     break;
4368   }
4369
4370   // Handle i8 and i16 as i32.
4371   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4372     DestVT = MVT::i32;
4373   else if (DestVT == MVT::i64) {
4374     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4375     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4376             TII.get(AArch64::SUBREG_TO_REG), Src64)
4377         .addImm(0)
4378         .addReg(SrcReg)
4379         .addImm(AArch64::sub_32);
4380     SrcReg = Src64;
4381   }
4382
4383   const TargetRegisterClass *RC =
4384       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4385   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4386 }
4387
4388 static bool isZExtLoad(const MachineInstr *LI) {
4389   switch (LI->getOpcode()) {
4390   default:
4391     return false;
4392   case AArch64::LDURBBi:
4393   case AArch64::LDURHHi:
4394   case AArch64::LDURWi:
4395   case AArch64::LDRBBui:
4396   case AArch64::LDRHHui:
4397   case AArch64::LDRWui:
4398   case AArch64::LDRBBroX:
4399   case AArch64::LDRHHroX:
4400   case AArch64::LDRWroX:
4401   case AArch64::LDRBBroW:
4402   case AArch64::LDRHHroW:
4403   case AArch64::LDRWroW:
4404     return true;
4405   }
4406 }
4407
4408 static bool isSExtLoad(const MachineInstr *LI) {
4409   switch (LI->getOpcode()) {
4410   default:
4411     return false;
4412   case AArch64::LDURSBWi:
4413   case AArch64::LDURSHWi:
4414   case AArch64::LDURSBXi:
4415   case AArch64::LDURSHXi:
4416   case AArch64::LDURSWi:
4417   case AArch64::LDRSBWui:
4418   case AArch64::LDRSHWui:
4419   case AArch64::LDRSBXui:
4420   case AArch64::LDRSHXui:
4421   case AArch64::LDRSWui:
4422   case AArch64::LDRSBWroX:
4423   case AArch64::LDRSHWroX:
4424   case AArch64::LDRSBXroX:
4425   case AArch64::LDRSHXroX:
4426   case AArch64::LDRSWroX:
4427   case AArch64::LDRSBWroW:
4428   case AArch64::LDRSHWroW:
4429   case AArch64::LDRSBXroW:
4430   case AArch64::LDRSHXroW:
4431   case AArch64::LDRSWroW:
4432     return true;
4433   }
4434 }
4435
4436 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4437                                          MVT SrcVT) {
4438   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4439   if (!LI || !LI->hasOneUse())
4440     return false;
4441
4442   // Check if the load instruction has already been selected.
4443   unsigned Reg = lookUpRegForValue(LI);
4444   if (!Reg)
4445     return false;
4446
4447   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4448   if (!MI)
4449     return false;
4450
4451   // Check if the correct load instruction has been emitted - SelectionDAG might
4452   // have emitted a zero-extending load, but we need a sign-extending load.
4453   bool IsZExt = isa<ZExtInst>(I);
4454   const auto *LoadMI = MI;
4455   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4456       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4457     unsigned LoadReg = MI->getOperand(1).getReg();
4458     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4459     assert(LoadMI && "Expected valid instruction");
4460   }
4461   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4462     return false;
4463
4464   // Nothing to be done.
4465   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4466     updateValueMap(I, Reg);
4467     return true;
4468   }
4469
4470   if (IsZExt) {
4471     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4472     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4473             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4474         .addImm(0)
4475         .addReg(Reg, getKillRegState(true))
4476         .addImm(AArch64::sub_32);
4477     Reg = Reg64;
4478   } else {
4479     assert((MI->getOpcode() == TargetOpcode::COPY &&
4480             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4481            "Expected copy instruction");
4482     Reg = MI->getOperand(1).getReg();
4483     MI->eraseFromParent();
4484   }
4485   updateValueMap(I, Reg);
4486   return true;
4487 }
4488
4489 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4490   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4491          "Unexpected integer extend instruction.");
4492   MVT RetVT;
4493   MVT SrcVT;
4494   if (!isTypeSupported(I->getType(), RetVT))
4495     return false;
4496
4497   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4498     return false;
4499
4500   // Try to optimize already sign-/zero-extended values from load instructions.
4501   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4502     return true;
4503
4504   unsigned SrcReg = getRegForValue(I->getOperand(0));
4505   if (!SrcReg)
4506     return false;
4507   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4508
4509   // Try to optimize already sign-/zero-extended values from function arguments.
4510   bool IsZExt = isa<ZExtInst>(I);
4511   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4512     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4513       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4514         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4515         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4516                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4517             .addImm(0)
4518             .addReg(SrcReg, getKillRegState(SrcIsKill))
4519             .addImm(AArch64::sub_32);
4520         SrcReg = ResultReg;
4521       }
4522       // Conservatively clear all kill flags from all uses, because we are
4523       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4524       // level. The result of the instruction at IR level might have been
4525       // trivially dead, which is now not longer true.
4526       unsigned UseReg = lookUpRegForValue(I);
4527       if (UseReg)
4528         MRI.clearKillFlags(UseReg);
4529
4530       updateValueMap(I, SrcReg);
4531       return true;
4532     }
4533   }
4534
4535   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4536   if (!ResultReg)
4537     return false;
4538
4539   updateValueMap(I, ResultReg);
4540   return true;
4541 }
4542
4543 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4544   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4545   if (!DestEVT.isSimple())
4546     return false;
4547
4548   MVT DestVT = DestEVT.getSimpleVT();
4549   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4550     return false;
4551
4552   unsigned DivOpc;
4553   bool Is64bit = (DestVT == MVT::i64);
4554   switch (ISDOpcode) {
4555   default:
4556     return false;
4557   case ISD::SREM:
4558     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4559     break;
4560   case ISD::UREM:
4561     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4562     break;
4563   }
4564   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4565   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4566   if (!Src0Reg)
4567     return false;
4568   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4569
4570   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4571   if (!Src1Reg)
4572     return false;
4573   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4574
4575   const TargetRegisterClass *RC =
4576       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4577   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4578                                      Src1Reg, /*IsKill=*/false);
4579   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4580   // The remainder is computed as numerator - (quotient * denominator) using the
4581   // MSUB instruction.
4582   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4583                                         Src1Reg, Src1IsKill, Src0Reg,
4584                                         Src0IsKill);
4585   updateValueMap(I, ResultReg);
4586   return true;
4587 }
4588
4589 bool AArch64FastISel::selectMul(const Instruction *I) {
4590   MVT VT;
4591   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4592     return false;
4593
4594   if (VT.isVector())
4595     return selectBinaryOp(I, ISD::MUL);
4596
4597   const Value *Src0 = I->getOperand(0);
4598   const Value *Src1 = I->getOperand(1);
4599   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4600     if (C->getValue().isPowerOf2())
4601       std::swap(Src0, Src1);
4602
4603   // Try to simplify to a shift instruction.
4604   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4605     if (C->getValue().isPowerOf2()) {
4606       uint64_t ShiftVal = C->getValue().logBase2();
4607       MVT SrcVT = VT;
4608       bool IsZExt = true;
4609       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4610         if (!isIntExtFree(ZExt)) {
4611           MVT VT;
4612           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4613             SrcVT = VT;
4614             IsZExt = true;
4615             Src0 = ZExt->getOperand(0);
4616           }
4617         }
4618       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4619         if (!isIntExtFree(SExt)) {
4620           MVT VT;
4621           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4622             SrcVT = VT;
4623             IsZExt = false;
4624             Src0 = SExt->getOperand(0);
4625           }
4626         }
4627       }
4628
4629       unsigned Src0Reg = getRegForValue(Src0);
4630       if (!Src0Reg)
4631         return false;
4632       bool Src0IsKill = hasTrivialKill(Src0);
4633
4634       unsigned ResultReg =
4635           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4636
4637       if (ResultReg) {
4638         updateValueMap(I, ResultReg);
4639         return true;
4640       }
4641     }
4642
4643   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4644   if (!Src0Reg)
4645     return false;
4646   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4647
4648   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4649   if (!Src1Reg)
4650     return false;
4651   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4652
4653   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4654
4655   if (!ResultReg)
4656     return false;
4657
4658   updateValueMap(I, ResultReg);
4659   return true;
4660 }
4661
4662 bool AArch64FastISel::selectShift(const Instruction *I) {
4663   MVT RetVT;
4664   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4665     return false;
4666
4667   if (RetVT.isVector())
4668     return selectOperator(I, I->getOpcode());
4669
4670   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4671     unsigned ResultReg = 0;
4672     uint64_t ShiftVal = C->getZExtValue();
4673     MVT SrcVT = RetVT;
4674     bool IsZExt = I->getOpcode() != Instruction::AShr;
4675     const Value *Op0 = I->getOperand(0);
4676     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4677       if (!isIntExtFree(ZExt)) {
4678         MVT TmpVT;
4679         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4680           SrcVT = TmpVT;
4681           IsZExt = true;
4682           Op0 = ZExt->getOperand(0);
4683         }
4684       }
4685     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4686       if (!isIntExtFree(SExt)) {
4687         MVT TmpVT;
4688         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4689           SrcVT = TmpVT;
4690           IsZExt = false;
4691           Op0 = SExt->getOperand(0);
4692         }
4693       }
4694     }
4695
4696     unsigned Op0Reg = getRegForValue(Op0);
4697     if (!Op0Reg)
4698       return false;
4699     bool Op0IsKill = hasTrivialKill(Op0);
4700
4701     switch (I->getOpcode()) {
4702     default: llvm_unreachable("Unexpected instruction.");
4703     case Instruction::Shl:
4704       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4705       break;
4706     case Instruction::AShr:
4707       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4708       break;
4709     case Instruction::LShr:
4710       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4711       break;
4712     }
4713     if (!ResultReg)
4714       return false;
4715
4716     updateValueMap(I, ResultReg);
4717     return true;
4718   }
4719
4720   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4721   if (!Op0Reg)
4722     return false;
4723   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4724
4725   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4726   if (!Op1Reg)
4727     return false;
4728   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4729
4730   unsigned ResultReg = 0;
4731   switch (I->getOpcode()) {
4732   default: llvm_unreachable("Unexpected instruction.");
4733   case Instruction::Shl:
4734     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4735     break;
4736   case Instruction::AShr:
4737     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4738     break;
4739   case Instruction::LShr:
4740     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4741     break;
4742   }
4743
4744   if (!ResultReg)
4745     return false;
4746
4747   updateValueMap(I, ResultReg);
4748   return true;
4749 }
4750
4751 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4752   MVT RetVT, SrcVT;
4753
4754   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4755     return false;
4756   if (!isTypeLegal(I->getType(), RetVT))
4757     return false;
4758
4759   unsigned Opc;
4760   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4761     Opc = AArch64::FMOVWSr;
4762   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4763     Opc = AArch64::FMOVXDr;
4764   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4765     Opc = AArch64::FMOVSWr;
4766   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4767     Opc = AArch64::FMOVDXr;
4768   else
4769     return false;
4770
4771   const TargetRegisterClass *RC = nullptr;
4772   switch (RetVT.SimpleTy) {
4773   default: llvm_unreachable("Unexpected value type.");
4774   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4775   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4776   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4777   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4778   }
4779   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4780   if (!Op0Reg)
4781     return false;
4782   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4783   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4784
4785   if (!ResultReg)
4786     return false;
4787
4788   updateValueMap(I, ResultReg);
4789   return true;
4790 }
4791
4792 bool AArch64FastISel::selectFRem(const Instruction *I) {
4793   MVT RetVT;
4794   if (!isTypeLegal(I->getType(), RetVT))
4795     return false;
4796
4797   RTLIB::Libcall LC;
4798   switch (RetVT.SimpleTy) {
4799   default:
4800     return false;
4801   case MVT::f32:
4802     LC = RTLIB::REM_F32;
4803     break;
4804   case MVT::f64:
4805     LC = RTLIB::REM_F64;
4806     break;
4807   }
4808
4809   ArgListTy Args;
4810   Args.reserve(I->getNumOperands());
4811
4812   // Populate the argument list.
4813   for (auto &Arg : I->operands()) {
4814     ArgListEntry Entry;
4815     Entry.Val = Arg;
4816     Entry.Ty = Arg->getType();
4817     Args.push_back(Entry);
4818   }
4819
4820   CallLoweringInfo CLI;
4821   MCContext &Ctx = MF->getContext();
4822   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4823                 TLI.getLibcallName(LC), std::move(Args));
4824   if (!lowerCallTo(CLI))
4825     return false;
4826   updateValueMap(I, CLI.ResultReg);
4827   return true;
4828 }
4829
4830 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4831   MVT VT;
4832   if (!isTypeLegal(I->getType(), VT))
4833     return false;
4834
4835   if (!isa<ConstantInt>(I->getOperand(1)))
4836     return selectBinaryOp(I, ISD::SDIV);
4837
4838   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4839   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4840       !(C.isPowerOf2() || (-C).isPowerOf2()))
4841     return selectBinaryOp(I, ISD::SDIV);
4842
4843   unsigned Lg2 = C.countTrailingZeros();
4844   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4845   if (!Src0Reg)
4846     return false;
4847   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4848
4849   if (cast<BinaryOperator>(I)->isExact()) {
4850     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4851     if (!ResultReg)
4852       return false;
4853     updateValueMap(I, ResultReg);
4854     return true;
4855   }
4856
4857   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4858   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4859   if (!AddReg)
4860     return false;
4861
4862   // (Src0 < 0) ? Pow2 - 1 : 0;
4863   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4864     return false;
4865
4866   unsigned SelectOpc;
4867   const TargetRegisterClass *RC;
4868   if (VT == MVT::i64) {
4869     SelectOpc = AArch64::CSELXr;
4870     RC = &AArch64::GPR64RegClass;
4871   } else {
4872     SelectOpc = AArch64::CSELWr;
4873     RC = &AArch64::GPR32RegClass;
4874   }
4875   unsigned SelectReg =
4876       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4877                        Src0IsKill, AArch64CC::LT);
4878   if (!SelectReg)
4879     return false;
4880
4881   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4882   // negate the result.
4883   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4884   unsigned ResultReg;
4885   if (C.isNegative())
4886     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4887                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4888   else
4889     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4890
4891   if (!ResultReg)
4892     return false;
4893
4894   updateValueMap(I, ResultReg);
4895   return true;
4896 }
4897
4898 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4899 /// have to duplicate it for AArch64, because otherwise we would fail during the
4900 /// sign-extend emission.
4901 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4902   unsigned IdxN = getRegForValue(Idx);
4903   if (IdxN == 0)
4904     // Unhandled operand. Halt "fast" selection and bail.
4905     return std::pair<unsigned, bool>(0, false);
4906
4907   bool IdxNIsKill = hasTrivialKill(Idx);
4908
4909   // If the index is smaller or larger than intptr_t, truncate or extend it.
4910   MVT PtrVT = TLI.getPointerTy(DL);
4911   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4912   if (IdxVT.bitsLT(PtrVT)) {
4913     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4914     IdxNIsKill = true;
4915   } else if (IdxVT.bitsGT(PtrVT))
4916     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4917   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4918 }
4919
4920 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4921 /// duplicate it for AArch64, because otherwise we would bail out even for
4922 /// simple cases. This is because the standard fastEmit functions don't cover
4923 /// MUL at all and ADD is lowered very inefficientily.
4924 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4925   unsigned N = getRegForValue(I->getOperand(0));
4926   if (!N)
4927     return false;
4928   bool NIsKill = hasTrivialKill(I->getOperand(0));
4929
4930   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4931   // into a single N = N + TotalOffset.
4932   uint64_t TotalOffs = 0;
4933   MVT VT = TLI.getPointerTy(DL);
4934   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4935        GTI != E; ++GTI) {
4936     const Value *Idx = GTI.getOperand();
4937     if (auto *StTy = GTI.getStructTypeOrNull()) {
4938       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4939       // N = N + Offset
4940       if (Field)
4941         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4942     } else {
4943       Type *Ty = GTI.getIndexedType();
4944
4945       // If this is a constant subscript, handle it quickly.
4946       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4947         if (CI->isZero())
4948           continue;
4949         // N = N + Offset
4950         TotalOffs +=
4951             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4952         continue;
4953       }
4954       if (TotalOffs) {
4955         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4956         if (!N)
4957           return false;
4958         NIsKill = true;
4959         TotalOffs = 0;
4960       }
4961
4962       // N = N + Idx * ElementSize;
4963       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4964       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4965       unsigned IdxN = Pair.first;
4966       bool IdxNIsKill = Pair.second;
4967       if (!IdxN)
4968         return false;
4969
4970       if (ElementSize != 1) {
4971         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4972         if (!C)
4973           return false;
4974         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4975         if (!IdxN)
4976           return false;
4977         IdxNIsKill = true;
4978       }
4979       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4980       if (!N)
4981         return false;
4982     }
4983   }
4984   if (TotalOffs) {
4985     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4986     if (!N)
4987       return false;
4988   }
4989   updateValueMap(I, N);
4990   return true;
4991 }
4992
4993 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4994   assert(TM.getOptLevel() == CodeGenOpt::None &&
4995          "cmpxchg survived AtomicExpand at optlevel > -O0");
4996
4997   auto *RetPairTy = cast<StructType>(I->getType());
4998   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4999   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5000          "cmpxchg has a non-i1 status result");
5001
5002   MVT VT;
5003   if (!isTypeLegal(RetTy, VT))
5004     return false;
5005
5006   const TargetRegisterClass *ResRC;
5007   unsigned Opc, CmpOpc;
5008   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5009   // extractvalue selection doesn't support that.
5010   if (VT == MVT::i32) {
5011     Opc = AArch64::CMP_SWAP_32;
5012     CmpOpc = AArch64::SUBSWrs;
5013     ResRC = &AArch64::GPR32RegClass;
5014   } else if (VT == MVT::i64) {
5015     Opc = AArch64::CMP_SWAP_64;
5016     CmpOpc = AArch64::SUBSXrs;
5017     ResRC = &AArch64::GPR64RegClass;
5018   } else {
5019     return false;
5020   }
5021
5022   const MCInstrDesc &II = TII.get(Opc);
5023
5024   const unsigned AddrReg = constrainOperandRegClass(
5025       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5026   const unsigned DesiredReg = constrainOperandRegClass(
5027       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5028   const unsigned NewReg = constrainOperandRegClass(
5029       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5030
5031   const unsigned ResultReg1 = createResultReg(ResRC);
5032   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5033   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5034
5035   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5036   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5037       .addDef(ResultReg1)
5038       .addDef(ScratchReg)
5039       .addUse(AddrReg)
5040       .addUse(DesiredReg)
5041       .addUse(NewReg);
5042
5043   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5044       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5045       .addUse(ResultReg1)
5046       .addUse(DesiredReg)
5047       .addImm(0);
5048
5049   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5050       .addDef(ResultReg2)
5051       .addUse(AArch64::WZR)
5052       .addUse(AArch64::WZR)
5053       .addImm(AArch64CC::NE);
5054
5055   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5056   updateValueMap(I, ResultReg1, 2);
5057   return true;
5058 }
5059
5060 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5061   switch (I->getOpcode()) {
5062   default:
5063     break;
5064   case Instruction::Add:
5065   case Instruction::Sub:
5066     return selectAddSub(I);
5067   case Instruction::Mul:
5068     return selectMul(I);
5069   case Instruction::SDiv:
5070     return selectSDiv(I);
5071   case Instruction::SRem:
5072     if (!selectBinaryOp(I, ISD::SREM))
5073       return selectRem(I, ISD::SREM);
5074     return true;
5075   case Instruction::URem:
5076     if (!selectBinaryOp(I, ISD::UREM))
5077       return selectRem(I, ISD::UREM);
5078     return true;
5079   case Instruction::Shl:
5080   case Instruction::LShr:
5081   case Instruction::AShr:
5082     return selectShift(I);
5083   case Instruction::And:
5084   case Instruction::Or:
5085   case Instruction::Xor:
5086     return selectLogicalOp(I);
5087   case Instruction::Br:
5088     return selectBranch(I);
5089   case Instruction::IndirectBr:
5090     return selectIndirectBr(I);
5091   case Instruction::BitCast:
5092     if (!FastISel::selectBitCast(I))
5093       return selectBitCast(I);
5094     return true;
5095   case Instruction::FPToSI:
5096     if (!selectCast(I, ISD::FP_TO_SINT))
5097       return selectFPToInt(I, /*Signed=*/true);
5098     return true;
5099   case Instruction::FPToUI:
5100     return selectFPToInt(I, /*Signed=*/false);
5101   case Instruction::ZExt:
5102   case Instruction::SExt:
5103     return selectIntExt(I);
5104   case Instruction::Trunc:
5105     if (!selectCast(I, ISD::TRUNCATE))
5106       return selectTrunc(I);
5107     return true;
5108   case Instruction::FPExt:
5109     return selectFPExt(I);
5110   case Instruction::FPTrunc:
5111     return selectFPTrunc(I);
5112   case Instruction::SIToFP:
5113     if (!selectCast(I, ISD::SINT_TO_FP))
5114       return selectIntToFP(I, /*Signed=*/true);
5115     return true;
5116   case Instruction::UIToFP:
5117     return selectIntToFP(I, /*Signed=*/false);
5118   case Instruction::Load:
5119     return selectLoad(I);
5120   case Instruction::Store:
5121     return selectStore(I);
5122   case Instruction::FCmp:
5123   case Instruction::ICmp:
5124     return selectCmp(I);
5125   case Instruction::Select:
5126     return selectSelect(I);
5127   case Instruction::Ret:
5128     return selectRet(I);
5129   case Instruction::FRem:
5130     return selectFRem(I);
5131   case Instruction::GetElementPtr:
5132     return selectGetElementPtr(I);
5133   case Instruction::AtomicCmpXchg:
5134     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5135   }
5136
5137   // fall-back to target-independent instruction selection.
5138   return selectOperator(I, I->getOpcode());
5139   // Silence warnings.
5140   (void)&CC_AArch64_DarwinPCS_VarArg;
5141   (void)&CC_AArch64_Win64_VarArg;
5142 }
5143
5144 namespace llvm {
5145
5146 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5147                                         const TargetLibraryInfo *LibInfo) {
5148   return new AArch64FastISel(FuncInfo, LibInfo);
5149 }
5150
5151 } // end namespace llvm