contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp

   1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64CallingConvention.h"
  18 #include "AArch64RegisterInfo.h"
  19 #include "AArch64Subtarget.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "Utils/AArch64BaseInfo.h"
  22 #include "llvm/ADT/APFloat.h"
  23 #include "llvm/ADT/APInt.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/Analysis/BranchProbabilityInfo.h"
  27 #include "llvm/CodeGen/CallingConvLower.h"
  28 #include "llvm/CodeGen/FastISel.h"
  29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  30 #include "llvm/CodeGen/ISDOpcodes.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineConstantPool.h"
  33 #include "llvm/CodeGen/MachineFrameInfo.h"
  34 #include "llvm/CodeGen/MachineInstr.h"
  35 #include "llvm/CodeGen/MachineInstrBuilder.h"
  36 #include "llvm/CodeGen/MachineMemOperand.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/MachineValueType.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/ValueTypes.h"
  41 #include "llvm/IR/Argument.h"
  42 #include "llvm/IR/Attributes.h"
  43 #include "llvm/IR/BasicBlock.h"
  44 #include "llvm/IR/CallingConv.h"
  45 #include "llvm/IR/Constant.h"
  46 #include "llvm/IR/Constants.h"
  47 #include "llvm/IR/DataLayout.h"
  48 #include "llvm/IR/DerivedTypes.h"
  49 #include "llvm/IR/Function.h"
  50 #include "llvm/IR/GetElementPtrTypeIterator.h"
  51 #include "llvm/IR/GlobalValue.h"
  52 #include "llvm/IR/InstrTypes.h"
  53 #include "llvm/IR/Instruction.h"
  54 #include "llvm/IR/Instructions.h"
  55 #include "llvm/IR/IntrinsicInst.h"
  56 #include "llvm/IR/Intrinsics.h"
  57 #include "llvm/IR/Operator.h"
  58 #include "llvm/IR/Type.h"
  59 #include "llvm/IR/User.h"
  60 #include "llvm/IR/Value.h"
  61 #include "llvm/MC/MCInstrDesc.h"
  62 #include "llvm/MC/MCRegisterInfo.h"
  63 #include "llvm/MC/MCSymbol.h"
  64 #include "llvm/Support/AtomicOrdering.h"
  65 #include "llvm/Support/Casting.h"
  66 #include "llvm/Support/CodeGen.h"
  67 #include "llvm/Support/Compiler.h"
  68 #include "llvm/Support/ErrorHandling.h"
  69 #include "llvm/Support/MathExtras.h"
  70 #include <algorithm>
  71 #include <cassert>
  72 #include <cstdint>
  73 #include <iterator>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 namespace {
  79
  80 class AArch64FastISel final : public FastISel {
  81   class Address {
  82   public:
  83     using BaseKind = enum {
  84       RegBase,
  85       FrameIndexBase
  86     };
  87
  88   private:
  89     BaseKind Kind = RegBase;
  90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
  91     union {
  92       unsigned Reg;
  93       int FI;
  94     } Base;
  95     unsigned OffsetReg = 0;
  96     unsigned Shift = 0;
  97     int64_t Offset = 0;
  98     const GlobalValue *GV = nullptr;
  99
 100   public:
 101     Address() { Base.Reg = 0; }
 102
 103     void setKind(BaseKind K) { Kind = K; }
 104     BaseKind getKind() const { return Kind; }
 105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
 106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
 107     bool isRegBase() const { return Kind == RegBase; }
 108     bool isFIBase() const { return Kind == FrameIndexBase; }
 109
 110     void setReg(unsigned Reg) {
 111       assert(isRegBase() && "Invalid base register access!");
 112       Base.Reg = Reg;
 113     }
 114
 115     unsigned getReg() const {
 116       assert(isRegBase() && "Invalid base register access!");
 117       return Base.Reg;
 118     }
 119
 120     void setOffsetReg(unsigned Reg) {
 121       OffsetReg = Reg;
 122     }
 123
 124     unsigned getOffsetReg() const {
 125       return OffsetReg;
 126     }
 127
 128     void setFI(unsigned FI) {
 129       assert(isFIBase() && "Invalid base frame index  access!");
 130       Base.FI = FI;
 131     }
 132
 133     unsigned getFI() const {
 134       assert(isFIBase() && "Invalid base frame index access!");
 135       return Base.FI;
 136     }
 137
 138     void setOffset(int64_t O) { Offset = O; }
 139     int64_t getOffset() { return Offset; }
 140     void setShift(unsigned S) { Shift = S; }
 141     unsigned getShift() { return Shift; }
 142
 143     void setGlobalValue(const GlobalValue *G) { GV = G; }
 144     const GlobalValue *getGlobalValue() { return GV; }
 145   };
 146
 147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 148   /// make the right decision when generating code for different targets.
 149   const AArch64Subtarget *Subtarget;
 150   LLVMContext *Context;
 151
 152   bool fastLowerArguments() override;
 153   bool fastLowerCall(CallLoweringInfo &CLI) override;
 154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 155
 156 private:
 157   // Selection routines.
 158   bool selectAddSub(const Instruction *I);
 159   bool selectLogicalOp(const Instruction *I);
 160   bool selectLoad(const Instruction *I);
 161   bool selectStore(const Instruction *I);
 162   bool selectBranch(const Instruction *I);
 163   bool selectIndirectBr(const Instruction *I);
 164   bool selectCmp(const Instruction *I);
 165   bool selectSelect(const Instruction *I);
 166   bool selectFPExt(const Instruction *I);
 167   bool selectFPTrunc(const Instruction *I);
 168   bool selectFPToInt(const Instruction *I, bool Signed);
 169   bool selectIntToFP(const Instruction *I, bool Signed);
 170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 171   bool selectRet(const Instruction *I);
 172   bool selectTrunc(const Instruction *I);
 173   bool selectIntExt(const Instruction *I);
 174   bool selectMul(const Instruction *I);
 175   bool selectShift(const Instruction *I);
 176   bool selectBitCast(const Instruction *I);
 177   bool selectFRem(const Instruction *I);
 178   bool selectSDiv(const Instruction *I);
 179   bool selectGetElementPtr(const Instruction *I);
 180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 181
 182   // Utility helper routines.
 183   bool isTypeLegal(Type *Ty, MVT &VT);
 184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 185   bool isValueAvailable(const Value *V) const;
 186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 187   bool computeCallAddress(const Value *V, Address &Addr);
 188   bool simplifyAddress(Address &Addr, MVT VT);
 189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 190                             MachineMemOperand::Flags Flags,
 191                             unsigned ScaleFactor, MachineMemOperand *MMO);
 192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 194                           unsigned Alignment);
 195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 196                          const Value *Cond);
 197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 198   bool optimizeSelect(const SelectInst *SI);
 199   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 200
 201   // Emit helper routines.
 202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 203                       const Value *RHS, bool SetFlags = false,
 204                       bool WantResult = true,  bool IsZExt = false);
 205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 206                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 207                          bool SetFlags = false, bool WantResult = true);
 208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 209                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 210                          bool WantResult = true);
 211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 212                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 213                          AArch64_AM::ShiftExtendType ShiftType,
 214                          uint64_t ShiftImm, bool SetFlags = false,
 215                          bool WantResult = true);
 216   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 217                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 218                           AArch64_AM::ShiftExtendType ExtType,
 219                           uint64_t ShiftImm, bool SetFlags = false,
 220                          bool WantResult = true);
 221
 222   // Emit functions.
 223   bool emitCompareAndBranch(const BranchInst *BI);
 224   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 225   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 226   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 227   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 228   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 229                     MachineMemOperand *MMO = nullptr);
 230   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 231                  MachineMemOperand *MMO = nullptr);
 232   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
 233                         MachineMemOperand *MMO = nullptr);
 234   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 235   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 236   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 237                    bool SetFlags = false, bool WantResult = true,
 238                    bool IsZExt = false);
 239   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 240   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 241                    bool SetFlags = false, bool WantResult = true,
 242                    bool IsZExt = false);
 243   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 244                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 245   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 246                        unsigned RHSReg, bool RHSIsKill,
 247                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 248                        bool WantResult = true);
 249   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 250                          const Value *RHS);
 251   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 252                             bool LHSIsKill, uint64_t Imm);
 253   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 254                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 255                             uint64_t ShiftImm);
 256   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 257   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 258                       unsigned Op1, bool Op1IsKill);
 259   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 260                         unsigned Op1, bool Op1IsKill);
 261   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 262                         unsigned Op1, bool Op1IsKill);
 263   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 264                       unsigned Op1Reg, bool Op1IsKill);
 265   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 266                       uint64_t Imm, bool IsZExt = true);
 267   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 268                       unsigned Op1Reg, bool Op1IsKill);
 269   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 270                       uint64_t Imm, bool IsZExt = true);
 271   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 272                       unsigned Op1Reg, bool Op1IsKill);
 273   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 274                       uint64_t Imm, bool IsZExt = false);
 275
 276   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 277   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 278   unsigned materializeGV(const GlobalValue *GV);
 279
 280   // Call handling routines.
 281 private:
 282   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 283   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 284                        unsigned &NumBytes);
 285   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 286
 287 public:
 288   // Backend specific FastISel code.
 289   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 290   unsigned fastMaterializeConstant(const Constant *C) override;
 291   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 292
 293   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 294                            const TargetLibraryInfo *LibInfo)
 295       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 296     Subtarget =
 297         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
 298     Context = &FuncInfo.Fn->getContext();
 299   }
 300
 301   bool fastSelectInstruction(const Instruction *I) override;
 302
 303 #include "AArch64GenFastISel.inc"
 304 };
 305
 306 } // end anonymous namespace
 307
 308 #include "AArch64GenCallingConv.inc"
 309
 310 /// \brief Check if the sign-/zero-extend will be a noop.
 311 static bool isIntExtFree(const Instruction *I) {
 312   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 313          "Unexpected integer extend instruction.");
 314   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 315          "Unexpected value type.");
 316   bool IsZExt = isa<ZExtInst>(I);
 317
 318   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 319     if (LI->hasOneUse())
 320       return true;
 321
 322   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 323     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 324       return true;
 325
 326   return false;
 327 }
 328
 329 /// \brief Determine the implicit scale factor that is applied by a memory
 330 /// operation for a given value type.
 331 static unsigned getImplicitScaleFactor(MVT VT) {
 332   switch (VT.SimpleTy) {
 333   default:
 334     return 0;    // invalid
 335   case MVT::i1:  // fall-through
 336   case MVT::i8:
 337     return 1;
 338   case MVT::i16:
 339     return 2;
 340   case MVT::i32: // fall-through
 341   case MVT::f32:
 342     return 4;
 343   case MVT::i64: // fall-through
 344   case MVT::f64:
 345     return 8;
 346   }
 347 }
 348
 349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 350   if (CC == CallingConv::WebKit_JS)
 351     return CC_AArch64_WebKit_JS;
 352   if (CC == CallingConv::GHC)
 353     return CC_AArch64_GHC;
 354   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 355 }
 356
 357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 358   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
 359          "Alloca should always return a pointer.");
 360
 361   // Don't handle dynamic allocas.
 362   if (!FuncInfo.StaticAllocaMap.count(AI))
 363     return 0;
 364
 365   DenseMap<const AllocaInst *, int>::iterator SI =
 366       FuncInfo.StaticAllocaMap.find(AI);
 367
 368   if (SI != FuncInfo.StaticAllocaMap.end()) {
 369     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 371             ResultReg)
 372         .addFrameIndex(SI->second)
 373         .addImm(0)
 374         .addImm(0);
 375     return ResultReg;
 376   }
 377
 378   return 0;
 379 }
 380
 381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 382   if (VT > MVT::i64)
 383     return 0;
 384
 385   if (!CI->isZero())
 386     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 387
 388   // Create a copy from the zero register to materialize a "0" value.
 389   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 390                                                    : &AArch64::GPR32RegClass;
 391   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 392   unsigned ResultReg = createResultReg(RC);
 393   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 394           ResultReg).addReg(ZeroReg, getKillRegState(true));
 395   return ResultReg;
 396 }
 397
 398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 399   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 400   // register, because the immediate version of fmov cannot encode zero.
 401   if (CFP->isNullValue())
 402     return fastMaterializeFloatZero(CFP);
 403
 404   if (VT != MVT::f32 && VT != MVT::f64)
 405     return 0;
 406
 407   const APFloat Val = CFP->getValueAPF();
 408   bool Is64Bit = (VT == MVT::f64);
 409   // This checks to see if we can use FMOV instructions to materialize
 410   // a constant, otherwise we have to materialize via the constant pool.
 411   if (TLI.isFPImmLegal(Val, VT)) {
 412     int Imm =
 413         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 414     assert((Imm != -1) && "Cannot encode floating-point constant.");
 415     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 416     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 417   }
 418
 419   // For the MachO large code model materialize the FP constant in code.
 420   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
 421     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 422     const TargetRegisterClass *RC = Is64Bit ?
 423         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 424
 425     unsigned TmpReg = createResultReg(RC);
 426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
 427         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
 428
 429     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 430     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 431             TII.get(TargetOpcode::COPY), ResultReg)
 432         .addReg(TmpReg, getKillRegState(true));
 433
 434     return ResultReg;
 435   }
 436
 437   // Materialize via constant pool.  MachineConstantPool wants an explicit
 438   // alignment.
 439   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 440   if (Align == 0)
 441     Align = DL.getTypeAllocSize(CFP->getType());
 442
 443   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 444   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 446           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 447
 448   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 449   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 450   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 451       .addReg(ADRPReg)
 452       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 453   return ResultReg;
 454 }
 455
 456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 457   // We can't handle thread-local variables quickly yet.
 458   if (GV->isThreadLocal())
 459     return 0;
 460
 461   // MachO still uses GOT for large code-model accesses, but ELF requires
 462   // movz/movk sequences, which FastISel doesn't handle yet.
 463   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
 464     return 0;
 465
 466   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 467
 468   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
 469   if (!DestEVT.isSimple())
 470     return 0;
 471
 472   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 473   unsigned ResultReg;
 474
 475   if (OpFlags & AArch64II::MO_GOT) {
 476     // ADRP + LDRX
 477     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 478             ADRPReg)
 479         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
 480
 481     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 482     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 483             ResultReg)
 484         .addReg(ADRPReg)
 485         .addGlobalAddress(GV, 0,
 486                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
 487   } else {
 488     // ADRP + ADDX
 489     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 490             ADRPReg)
 491         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
 492
 493     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 494     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 495             ResultReg)
 496         .addReg(ADRPReg)
 497         .addGlobalAddress(GV, 0,
 498                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
 499         .addImm(0);
 500   }
 501   return ResultReg;
 502 }
 503
 504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 505   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 506
 507   // Only handle simple types.
 508   if (!CEVT.isSimple())
 509     return 0;
 510   MVT VT = CEVT.getSimpleVT();
 511
 512   if (const auto *CI = dyn_cast<ConstantInt>(C))
 513     return materializeInt(CI, VT);
 514   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 515     return materializeFP(CFP, VT);
 516   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 517     return materializeGV(GV);
 518
 519   return 0;
 520 }
 521
 522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 523   assert(CFP->isNullValue() &&
 524          "Floating-point constant is not a positive zero.");
 525   MVT VT;
 526   if (!isTypeLegal(CFP->getType(), VT))
 527     return 0;
 528
 529   if (VT != MVT::f32 && VT != MVT::f64)
 530     return 0;
 531
 532   bool Is64Bit = (VT == MVT::f64);
 533   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 534   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 535   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 536 }
 537
 538 /// \brief Check if the multiply is by a power-of-2 constant.
 539 static bool isMulPowOf2(const Value *I) {
 540   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 541     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 542       if (C->getValue().isPowerOf2())
 543         return true;
 544     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 545       if (C->getValue().isPowerOf2())
 546         return true;
 547   }
 548   return false;
 549 }
 550
 551 // Computes the address to get to an object.
 552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 553 {
 554   const User *U = nullptr;
 555   unsigned Opcode = Instruction::UserOp1;
 556   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 557     // Don't walk into other basic blocks unless the object is an alloca from
 558     // another block, otherwise it may not have a virtual register assigned.
 559     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 560         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 561       Opcode = I->getOpcode();
 562       U = I;
 563     }
 564   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 565     Opcode = C->getOpcode();
 566     U = C;
 567   }
 568
 569   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
 570     if (Ty->getAddressSpace() > 255)
 571       // Fast instruction selection doesn't support the special
 572       // address spaces.
 573       return false;
 574
 575   switch (Opcode) {
 576   default:
 577     break;
 578   case Instruction::BitCast:
 579     // Look through bitcasts.
 580     return computeAddress(U->getOperand(0), Addr, Ty);
 581
 582   case Instruction::IntToPtr:
 583     // Look past no-op inttoptrs.
 584     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 585         TLI.getPointerTy(DL))
 586       return computeAddress(U->getOperand(0), Addr, Ty);
 587     break;
 588
 589   case Instruction::PtrToInt:
 590     // Look past no-op ptrtoints.
 591     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 592       return computeAddress(U->getOperand(0), Addr, Ty);
 593     break;
 594
 595   case Instruction::GetElementPtr: {
 596     Address SavedAddr = Addr;
 597     uint64_t TmpOffset = Addr.getOffset();
 598
 599     // Iterate through the GEP folding the constants into offsets where
 600     // we can.
 601     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
 602          GTI != E; ++GTI) {
 603       const Value *Op = GTI.getOperand();
 604       if (StructType *STy = GTI.getStructTypeOrNull()) {
 605         const StructLayout *SL = DL.getStructLayout(STy);
 606         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 607         TmpOffset += SL->getElementOffset(Idx);
 608       } else {
 609         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 610         while (true) {
 611           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 612             // Constant-offset addressing.
 613             TmpOffset += CI->getSExtValue() * S;
 614             break;
 615           }
 616           if (canFoldAddIntoGEP(U, Op)) {
 617             // A compatible add with a constant operand. Fold the constant.
 618             ConstantInt *CI =
 619                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 620             TmpOffset += CI->getSExtValue() * S;
 621             // Iterate on the other operand.
 622             Op = cast<AddOperator>(Op)->getOperand(0);
 623             continue;
 624           }
 625           // Unsupported
 626           goto unsupported_gep;
 627         }
 628       }
 629     }
 630
 631     // Try to grab the base operand now.
 632     Addr.setOffset(TmpOffset);
 633     if (computeAddress(U->getOperand(0), Addr, Ty))
 634       return true;
 635
 636     // We failed, restore everything and try the other options.
 637     Addr = SavedAddr;
 638
 639   unsupported_gep:
 640     break;
 641   }
 642   case Instruction::Alloca: {
 643     const AllocaInst *AI = cast<AllocaInst>(Obj);
 644     DenseMap<const AllocaInst *, int>::iterator SI =
 645         FuncInfo.StaticAllocaMap.find(AI);
 646     if (SI != FuncInfo.StaticAllocaMap.end()) {
 647       Addr.setKind(Address::FrameIndexBase);
 648       Addr.setFI(SI->second);
 649       return true;
 650     }
 651     break;
 652   }
 653   case Instruction::Add: {
 654     // Adds of constants are common and easy enough.
 655     const Value *LHS = U->getOperand(0);
 656     const Value *RHS = U->getOperand(1);
 657
 658     if (isa<ConstantInt>(LHS))
 659       std::swap(LHS, RHS);
 660
 661     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 662       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 663       return computeAddress(LHS, Addr, Ty);
 664     }
 665
 666     Address Backup = Addr;
 667     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 668       return true;
 669     Addr = Backup;
 670
 671     break;
 672   }
 673   case Instruction::Sub: {
 674     // Subs of constants are common and easy enough.
 675     const Value *LHS = U->getOperand(0);
 676     const Value *RHS = U->getOperand(1);
 677
 678     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 679       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 680       return computeAddress(LHS, Addr, Ty);
 681     }
 682     break;
 683   }
 684   case Instruction::Shl: {
 685     if (Addr.getOffsetReg())
 686       break;
 687
 688     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 689     if (!CI)
 690       break;
 691
 692     unsigned Val = CI->getZExtValue();
 693     if (Val < 1 || Val > 3)
 694       break;
 695
 696     uint64_t NumBytes = 0;
 697     if (Ty && Ty->isSized()) {
 698       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 699       NumBytes = NumBits / 8;
 700       if (!isPowerOf2_64(NumBits))
 701         NumBytes = 0;
 702     }
 703
 704     if (NumBytes != (1ULL << Val))
 705       break;
 706
 707     Addr.setShift(Val);
 708     Addr.setExtendType(AArch64_AM::LSL);
 709
 710     const Value *Src = U->getOperand(0);
 711     if (const auto *I = dyn_cast<Instruction>(Src)) {
 712       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 713         // Fold the zext or sext when it won't become a noop.
 714         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 715           if (!isIntExtFree(ZE) &&
 716               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 717             Addr.setExtendType(AArch64_AM::UXTW);
 718             Src = ZE->getOperand(0);
 719           }
 720         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 721           if (!isIntExtFree(SE) &&
 722               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 723             Addr.setExtendType(AArch64_AM::SXTW);
 724             Src = SE->getOperand(0);
 725           }
 726         }
 727       }
 728     }
 729
 730     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 731       if (AI->getOpcode() == Instruction::And) {
 732         const Value *LHS = AI->getOperand(0);
 733         const Value *RHS = AI->getOperand(1);
 734
 735         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 736           if (C->getValue() == 0xffffffff)
 737             std::swap(LHS, RHS);
 738
 739         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 740           if (C->getValue() == 0xffffffff) {
 741             Addr.setExtendType(AArch64_AM::UXTW);
 742             unsigned Reg = getRegForValue(LHS);
 743             if (!Reg)
 744               return false;
 745             bool RegIsKill = hasTrivialKill(LHS);
 746             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 747                                              AArch64::sub_32);
 748             Addr.setOffsetReg(Reg);
 749             return true;
 750           }
 751       }
 752
 753     unsigned Reg = getRegForValue(Src);
 754     if (!Reg)
 755       return false;
 756     Addr.setOffsetReg(Reg);
 757     return true;
 758   }
 759   case Instruction::Mul: {
 760     if (Addr.getOffsetReg())
 761       break;
 762
 763     if (!isMulPowOf2(U))
 764       break;
 765
 766     const Value *LHS = U->getOperand(0);
 767     const Value *RHS = U->getOperand(1);
 768
 769     // Canonicalize power-of-2 value to the RHS.
 770     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 771       if (C->getValue().isPowerOf2())
 772         std::swap(LHS, RHS);
 773
 774     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 775     const auto *C = cast<ConstantInt>(RHS);
 776     unsigned Val = C->getValue().logBase2();
 777     if (Val < 1 || Val > 3)
 778       break;
 779
 780     uint64_t NumBytes = 0;
 781     if (Ty && Ty->isSized()) {
 782       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 783       NumBytes = NumBits / 8;
 784       if (!isPowerOf2_64(NumBits))
 785         NumBytes = 0;
 786     }
 787
 788     if (NumBytes != (1ULL << Val))
 789       break;
 790
 791     Addr.setShift(Val);
 792     Addr.setExtendType(AArch64_AM::LSL);
 793
 794     const Value *Src = LHS;
 795     if (const auto *I = dyn_cast<Instruction>(Src)) {
 796       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 797         // Fold the zext or sext when it won't become a noop.
 798         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 799           if (!isIntExtFree(ZE) &&
 800               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 801             Addr.setExtendType(AArch64_AM::UXTW);
 802             Src = ZE->getOperand(0);
 803           }
 804         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 805           if (!isIntExtFree(SE) &&
 806               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 807             Addr.setExtendType(AArch64_AM::SXTW);
 808             Src = SE->getOperand(0);
 809           }
 810         }
 811       }
 812     }
 813
 814     unsigned Reg = getRegForValue(Src);
 815     if (!Reg)
 816       return false;
 817     Addr.setOffsetReg(Reg);
 818     return true;
 819   }
 820   case Instruction::And: {
 821     if (Addr.getOffsetReg())
 822       break;
 823
 824     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
 825       break;
 826
 827     const Value *LHS = U->getOperand(0);
 828     const Value *RHS = U->getOperand(1);
 829
 830     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 831       if (C->getValue() == 0xffffffff)
 832         std::swap(LHS, RHS);
 833
 834     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 835       if (C->getValue() == 0xffffffff) {
 836         Addr.setShift(0);
 837         Addr.setExtendType(AArch64_AM::LSL);
 838         Addr.setExtendType(AArch64_AM::UXTW);
 839
 840         unsigned Reg = getRegForValue(LHS);
 841         if (!Reg)
 842           return false;
 843         bool RegIsKill = hasTrivialKill(LHS);
 844         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 845                                          AArch64::sub_32);
 846         Addr.setOffsetReg(Reg);
 847         return true;
 848       }
 849     break;
 850   }
 851   case Instruction::SExt:
 852   case Instruction::ZExt: {
 853     if (!Addr.getReg() || Addr.getOffsetReg())
 854       break;
 855
 856     const Value *Src = nullptr;
 857     // Fold the zext or sext when it won't become a noop.
 858     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 859       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 860         Addr.setExtendType(AArch64_AM::UXTW);
 861         Src = ZE->getOperand(0);
 862       }
 863     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 864       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 865         Addr.setExtendType(AArch64_AM::SXTW);
 866         Src = SE->getOperand(0);
 867       }
 868     }
 869
 870     if (!Src)
 871       break;
 872
 873     Addr.setShift(0);
 874     unsigned Reg = getRegForValue(Src);
 875     if (!Reg)
 876       return false;
 877     Addr.setOffsetReg(Reg);
 878     return true;
 879   }
 880   } // end switch
 881
 882   if (Addr.isRegBase() && !Addr.getReg()) {
 883     unsigned Reg = getRegForValue(Obj);
 884     if (!Reg)
 885       return false;
 886     Addr.setReg(Reg);
 887     return true;
 888   }
 889
 890   if (!Addr.getOffsetReg()) {
 891     unsigned Reg = getRegForValue(Obj);
 892     if (!Reg)
 893       return false;
 894     Addr.setOffsetReg(Reg);
 895     return true;
 896   }
 897
 898   return false;
 899 }
 900
 901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 902   const User *U = nullptr;
 903   unsigned Opcode = Instruction::UserOp1;
 904   bool InMBB = true;
 905
 906   if (const auto *I = dyn_cast<Instruction>(V)) {
 907     Opcode = I->getOpcode();
 908     U = I;
 909     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 910   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 911     Opcode = C->getOpcode();
 912     U = C;
 913   }
 914
 915   switch (Opcode) {
 916   default: break;
 917   case Instruction::BitCast:
 918     // Look past bitcasts if its operand is in the same BB.
 919     if (InMBB)
 920       return computeCallAddress(U->getOperand(0), Addr);
 921     break;
 922   case Instruction::IntToPtr:
 923     // Look past no-op inttoptrs if its operand is in the same BB.
 924     if (InMBB &&
 925         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 926             TLI.getPointerTy(DL))
 927       return computeCallAddress(U->getOperand(0), Addr);
 928     break;
 929   case Instruction::PtrToInt:
 930     // Look past no-op ptrtoints if its operand is in the same BB.
 931     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 932       return computeCallAddress(U->getOperand(0), Addr);
 933     break;
 934   }
 935
 936   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 937     Addr.setGlobalValue(GV);
 938     return true;
 939   }
 940
 941   // If all else fails, try to materialize the value in a register.
 942   if (!Addr.getGlobalValue()) {
 943     Addr.setReg(getRegForValue(V));
 944     return Addr.getReg() != 0;
 945   }
 946
 947   return false;
 948 }
 949
 950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 951   EVT evt = TLI.getValueType(DL, Ty, true);
 952
 953   // Only handle simple types.
 954   if (evt == MVT::Other || !evt.isSimple())
 955     return false;
 956   VT = evt.getSimpleVT();
 957
 958   // This is a legal type, but it's not something we handle in fast-isel.
 959   if (VT == MVT::f128)
 960     return false;
 961
 962   // Handle all other legal types, i.e. a register that will directly hold this
 963   // value.
 964   return TLI.isTypeLegal(VT);
 965 }
 966
 967 /// \brief Determine if the value type is supported by FastISel.
 968 ///
 969 /// FastISel for AArch64 can handle more value types than are legal. This adds
 970 /// simple value type such as i1, i8, and i16.
 971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 972   if (Ty->isVectorTy() && !IsVectorAllowed)
 973     return false;
 974
 975   if (isTypeLegal(Ty, VT))
 976     return true;
 977
 978   // If this is a type than can be sign or zero-extended to a basic operation
 979   // go ahead and accept it now.
 980   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 981     return true;
 982
 983   return false;
 984 }
 985
 986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 987   if (!isa<Instruction>(V))
 988     return true;
 989
 990   const auto *I = cast<Instruction>(V);
 991   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 992 }
 993
 994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 995   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 996   if (!ScaleFactor)
 997     return false;
 998
 999   bool ImmediateOffsetNeedsLowering = false;
1000   bool RegisterOffsetNeedsLowering = false;
1001   int64_t Offset = Addr.getOffset();
1002   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003     ImmediateOffsetNeedsLowering = true;
1004   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005            !isUInt<12>(Offset / ScaleFactor))
1006     ImmediateOffsetNeedsLowering = true;
1007
1008   // Cannot encode an offset register and an immediate offset in the same
1009   // instruction. Fold the immediate offset into the load/store instruction and
1010   // emit an additional add to take care of the offset register.
1011   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012     RegisterOffsetNeedsLowering = true;
1013
1014   // Cannot encode zero register as base.
1015   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016     RegisterOffsetNeedsLowering = true;
1017
1018   // If this is a stack pointer and the offset needs to be simplified then put
1019   // the alloca address into a register, set the base type back to register and
1020   // continue. This should almost never happen.
1021   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022   {
1023     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025             ResultReg)
1026       .addFrameIndex(Addr.getFI())
1027       .addImm(0)
1028       .addImm(0);
1029     Addr.setKind(Address::RegBase);
1030     Addr.setReg(ResultReg);
1031   }
1032
1033   if (RegisterOffsetNeedsLowering) {
1034     unsigned ResultReg = 0;
1035     if (Addr.getReg()) {
1036       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037           Addr.getExtendType() == AArch64_AM::UXTW   )
1038         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1041                                   Addr.getShift());
1042       else
1043         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046                                   Addr.getShift());
1047     } else {
1048       if (Addr.getExtendType() == AArch64_AM::UXTW)
1049         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050                                /*Op0IsKill=*/false, Addr.getShift(),
1051                                /*IsZExt=*/true);
1052       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054                                /*Op0IsKill=*/false, Addr.getShift(),
1055                                /*IsZExt=*/false);
1056       else
1057         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058                                /*Op0IsKill=*/false, Addr.getShift());
1059     }
1060     if (!ResultReg)
1061       return false;
1062
1063     Addr.setReg(ResultReg);
1064     Addr.setOffsetReg(0);
1065     Addr.setShift(0);
1066     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067   }
1068
1069   // Since the offset is too large for the load/store instruction get the
1070   // reg+offset into a register.
1071   if (ImmediateOffsetNeedsLowering) {
1072     unsigned ResultReg;
1073     if (Addr.getReg())
1074       // Try to fold the immediate into the add instruction.
1075       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076     else
1077       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078
1079     if (!ResultReg)
1080       return false;
1081     Addr.setReg(ResultReg);
1082     Addr.setOffset(0);
1083   }
1084   return true;
1085 }
1086
1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088                                            const MachineInstrBuilder &MIB,
1089                                            MachineMemOperand::Flags Flags,
1090                                            unsigned ScaleFactor,
1091                                            MachineMemOperand *MMO) {
1092   int64_t Offset = Addr.getOffset() / ScaleFactor;
1093   // Frame base works a bit differently. Handle it separately.
1094   if (Addr.isFIBase()) {
1095     int FI = Addr.getFI();
1096     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1097     // and alignment should be based on the VT.
1098     MMO = FuncInfo.MF->getMachineMemOperand(
1099         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101     // Now add the rest of the operands.
1102     MIB.addFrameIndex(FI).addImm(Offset);
1103   } else {
1104     assert(Addr.isRegBase() && "Unexpected address kind.");
1105     const MCInstrDesc &II = MIB->getDesc();
1106     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107     Addr.setReg(
1108       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109     Addr.setOffsetReg(
1110       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111     if (Addr.getOffsetReg()) {
1112       assert(Addr.getOffset() == 0 && "Unexpected offset");
1113       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114                       Addr.getExtendType() == AArch64_AM::SXTX;
1115       MIB.addReg(Addr.getReg());
1116       MIB.addReg(Addr.getOffsetReg());
1117       MIB.addImm(IsSigned);
1118       MIB.addImm(Addr.getShift() != 0);
1119     } else
1120       MIB.addReg(Addr.getReg()).addImm(Offset);
1121   }
1122
1123   if (MMO)
1124     MIB.addMemOperand(MMO);
1125 }
1126
1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128                                      const Value *RHS, bool SetFlags,
1129                                      bool WantResult,  bool IsZExt) {
1130   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1131   bool NeedExtend = false;
1132   switch (RetVT.SimpleTy) {
1133   default:
1134     return 0;
1135   case MVT::i1:
1136     NeedExtend = true;
1137     break;
1138   case MVT::i8:
1139     NeedExtend = true;
1140     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141     break;
1142   case MVT::i16:
1143     NeedExtend = true;
1144     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145     break;
1146   case MVT::i32:  // fall-through
1147   case MVT::i64:
1148     break;
1149   }
1150   MVT SrcVT = RetVT;
1151   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152
1153   // Canonicalize immediates to the RHS first.
1154   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155     std::swap(LHS, RHS);
1156
1157   // Canonicalize mul by power of 2 to the RHS.
1158   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159     if (isMulPowOf2(LHS))
1160       std::swap(LHS, RHS);
1161
1162   // Canonicalize shift immediate to the RHS.
1163   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165       if (isa<ConstantInt>(SI->getOperand(1)))
1166         if (SI->getOpcode() == Instruction::Shl  ||
1167             SI->getOpcode() == Instruction::LShr ||
1168             SI->getOpcode() == Instruction::AShr   )
1169           std::swap(LHS, RHS);
1170
1171   unsigned LHSReg = getRegForValue(LHS);
1172   if (!LHSReg)
1173     return 0;
1174   bool LHSIsKill = hasTrivialKill(LHS);
1175
1176   if (NeedExtend)
1177     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178
1179   unsigned ResultReg = 0;
1180   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182     if (C->isNegative())
1183       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184                                 SetFlags, WantResult);
1185     else
1186       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187                                 WantResult);
1188   } else if (const auto *C = dyn_cast<Constant>(RHS))
1189     if (C->isNullValue())
1190       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191                                 WantResult);
1192
1193   if (ResultReg)
1194     return ResultReg;
1195
1196   // Only extend the RHS within the instruction if there is a valid extend type.
1197   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198       isValueAvailable(RHS)) {
1199     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203           if (!RHSReg)
1204             return 0;
1205           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207                                RHSIsKill, ExtendType, C->getZExtValue(),
1208                                SetFlags, WantResult);
1209         }
1210     unsigned RHSReg = getRegForValue(RHS);
1211     if (!RHSReg)
1212       return 0;
1213     bool RHSIsKill = hasTrivialKill(RHS);
1214     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215                          ExtendType, 0, SetFlags, WantResult);
1216   }
1217
1218   // Check if the mul can be folded into the instruction.
1219   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220     if (isMulPowOf2(RHS)) {
1221       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223
1224       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225         if (C->getValue().isPowerOf2())
1226           std::swap(MulLHS, MulRHS);
1227
1228       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230       unsigned RHSReg = getRegForValue(MulLHS);
1231       if (!RHSReg)
1232         return 0;
1233       bool RHSIsKill = hasTrivialKill(MulLHS);
1234       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236                                 WantResult);
1237       if (ResultReg)
1238         return ResultReg;
1239     }
1240   }
1241
1242   // Check if the shift can be folded into the instruction.
1243   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1247         switch (SI->getOpcode()) {
1248         default: break;
1249         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1250         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252         }
1253         uint64_t ShiftVal = C->getZExtValue();
1254         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256           if (!RHSReg)
1257             return 0;
1258           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261                                     WantResult);
1262           if (ResultReg)
1263             return ResultReg;
1264         }
1265       }
1266     }
1267   }
1268
1269   unsigned RHSReg = getRegForValue(RHS);
1270   if (!RHSReg)
1271     return 0;
1272   bool RHSIsKill = hasTrivialKill(RHS);
1273
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278                        SetFlags, WantResult);
1279 }
1280
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282                                         bool LHSIsKill, unsigned RHSReg,
1283                                         bool RHSIsKill, bool SetFlags,
1284                                         bool WantResult) {
1285   assert(LHSReg && RHSReg && "Invalid register number.");
1286
1287   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289     return 0;
1290
1291   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292     return 0;
1293
1294   static const unsigned OpcTable[2][2][2] = {
1295     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1296       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1297     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1299   };
1300   bool Is64Bit = RetVT == MVT::i64;
1301   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302   const TargetRegisterClass *RC =
1303       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304   unsigned ResultReg;
1305   if (WantResult)
1306     ResultReg = createResultReg(RC);
1307   else
1308     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309
1310   const MCInstrDesc &II = TII.get(Opc);
1311   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314       .addReg(LHSReg, getKillRegState(LHSIsKill))
1315       .addReg(RHSReg, getKillRegState(RHSIsKill));
1316   return ResultReg;
1317 }
1318
1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320                                         bool LHSIsKill, uint64_t Imm,
1321                                         bool SetFlags, bool WantResult) {
1322   assert(LHSReg && "Invalid register number.");
1323
1324   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325     return 0;
1326
1327   unsigned ShiftImm;
1328   if (isUInt<12>(Imm))
1329     ShiftImm = 0;
1330   else if ((Imm & 0xfff000) == Imm) {
1331     ShiftImm = 12;
1332     Imm >>= 12;
1333   } else
1334     return 0;
1335
1336   static const unsigned OpcTable[2][2][2] = {
1337     { { AArch64::SUBWri,  AArch64::SUBXri  },
1338       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1339     { { AArch64::SUBSWri, AArch64::SUBSXri },
1340       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1341   };
1342   bool Is64Bit = RetVT == MVT::i64;
1343   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344   const TargetRegisterClass *RC;
1345   if (SetFlags)
1346     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347   else
1348     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349   unsigned ResultReg;
1350   if (WantResult)
1351     ResultReg = createResultReg(RC);
1352   else
1353     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354
1355   const MCInstrDesc &II = TII.get(Opc);
1356   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358       .addReg(LHSReg, getKillRegState(LHSIsKill))
1359       .addImm(Imm)
1360       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361   return ResultReg;
1362 }
1363
1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365                                         bool LHSIsKill, unsigned RHSReg,
1366                                         bool RHSIsKill,
1367                                         AArch64_AM::ShiftExtendType ShiftType,
1368                                         uint64_t ShiftImm, bool SetFlags,
1369                                         bool WantResult) {
1370   assert(LHSReg && RHSReg && "Invalid register number.");
1371   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373
1374   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375     return 0;
1376
1377   // Don't deal with undefined shifts.
1378   if (ShiftImm >= RetVT.getSizeInBits())
1379     return 0;
1380
1381   static const unsigned OpcTable[2][2][2] = {
1382     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1383       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1384     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1386   };
1387   bool Is64Bit = RetVT == MVT::i64;
1388   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389   const TargetRegisterClass *RC =
1390       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391   unsigned ResultReg;
1392   if (WantResult)
1393     ResultReg = createResultReg(RC);
1394   else
1395     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396
1397   const MCInstrDesc &II = TII.get(Opc);
1398   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401       .addReg(LHSReg, getKillRegState(LHSIsKill))
1402       .addReg(RHSReg, getKillRegState(RHSIsKill))
1403       .addImm(getShifterImm(ShiftType, ShiftImm));
1404   return ResultReg;
1405 }
1406
1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408                                         bool LHSIsKill, unsigned RHSReg,
1409                                         bool RHSIsKill,
1410                                         AArch64_AM::ShiftExtendType ExtType,
1411                                         uint64_t ShiftImm, bool SetFlags,
1412                                         bool WantResult) {
1413   assert(LHSReg && RHSReg && "Invalid register number.");
1414   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416
1417   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418     return 0;
1419
1420   if (ShiftImm >= 4)
1421     return 0;
1422
1423   static const unsigned OpcTable[2][2][2] = {
1424     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1425       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1426     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1428   };
1429   bool Is64Bit = RetVT == MVT::i64;
1430   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431   const TargetRegisterClass *RC = nullptr;
1432   if (SetFlags)
1433     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434   else
1435     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436   unsigned ResultReg;
1437   if (WantResult)
1438     ResultReg = createResultReg(RC);
1439   else
1440     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441
1442   const MCInstrDesc &II = TII.get(Opc);
1443   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446       .addReg(LHSReg, getKillRegState(LHSIsKill))
1447       .addReg(RHSReg, getKillRegState(RHSIsKill))
1448       .addImm(getArithExtendImm(ExtType, ShiftImm));
1449   return ResultReg;
1450 }
1451
1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453   Type *Ty = LHS->getType();
1454   EVT EVT = TLI.getValueType(DL, Ty, true);
1455   if (!EVT.isSimple())
1456     return false;
1457   MVT VT = EVT.getSimpleVT();
1458
1459   switch (VT.SimpleTy) {
1460   default:
1461     return false;
1462   case MVT::i1:
1463   case MVT::i8:
1464   case MVT::i16:
1465   case MVT::i32:
1466   case MVT::i64:
1467     return emitICmp(VT, LHS, RHS, IsZExt);
1468   case MVT::f32:
1469   case MVT::f64:
1470     return emitFCmp(VT, LHS, RHS);
1471   }
1472 }
1473
1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475                                bool IsZExt) {
1476   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477                  IsZExt) != 0;
1478 }
1479
1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481                                   uint64_t Imm) {
1482   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485
1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488     return false;
1489
1490   // Check to see if the 2nd operand is a constant that we can encode directly
1491   // in the compare.
1492   bool UseImm = false;
1493   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494     if (CFP->isZero() && !CFP->isNegative())
1495       UseImm = true;
1496
1497   unsigned LHSReg = getRegForValue(LHS);
1498   if (!LHSReg)
1499     return false;
1500   bool LHSIsKill = hasTrivialKill(LHS);
1501
1502   if (UseImm) {
1503     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505         .addReg(LHSReg, getKillRegState(LHSIsKill));
1506     return true;
1507   }
1508
1509   unsigned RHSReg = getRegForValue(RHS);
1510   if (!RHSReg)
1511     return false;
1512   bool RHSIsKill = hasTrivialKill(RHS);
1513
1514   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516       .addReg(LHSReg, getKillRegState(LHSIsKill))
1517       .addReg(RHSReg, getKillRegState(RHSIsKill));
1518   return true;
1519 }
1520
1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522                                   bool SetFlags, bool WantResult, bool IsZExt) {
1523   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524                     IsZExt);
1525 }
1526
1527 /// \brief This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533                                       int64_t Imm) {
1534   unsigned ResultReg;
1535   if (Imm < 0)
1536     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537   else
1538     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539
1540   if (ResultReg)
1541     return ResultReg;
1542
1543   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544   if (!CReg)
1545     return 0;
1546
1547   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548   return ResultReg;
1549 }
1550
1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552                                   bool SetFlags, bool WantResult, bool IsZExt) {
1553   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554                     IsZExt);
1555 }
1556
1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558                                       bool LHSIsKill, unsigned RHSReg,
1559                                       bool RHSIsKill, bool WantResult) {
1560   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561                        RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563
1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565                                       bool LHSIsKill, unsigned RHSReg,
1566                                       bool RHSIsKill,
1567                                       AArch64_AM::ShiftExtendType ShiftType,
1568                                       uint64_t ShiftImm, bool WantResult) {
1569   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571                        WantResult);
1572 }
1573
1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575                                         const Value *LHS, const Value *RHS) {
1576   // Canonicalize immediates to the RHS first.
1577   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578     std::swap(LHS, RHS);
1579
1580   // Canonicalize mul by power-of-2 to the RHS.
1581   if (LHS->hasOneUse() && isValueAvailable(LHS))
1582     if (isMulPowOf2(LHS))
1583       std::swap(LHS, RHS);
1584
1585   // Canonicalize shift immediate to the RHS.
1586   if (LHS->hasOneUse() && isValueAvailable(LHS))
1587     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588       if (isa<ConstantInt>(SI->getOperand(1)))
1589         std::swap(LHS, RHS);
1590
1591   unsigned LHSReg = getRegForValue(LHS);
1592   if (!LHSReg)
1593     return 0;
1594   bool LHSIsKill = hasTrivialKill(LHS);
1595
1596   unsigned ResultReg = 0;
1597   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598     uint64_t Imm = C->getZExtValue();
1599     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600   }
1601   if (ResultReg)
1602     return ResultReg;
1603
1604   // Check if the mul can be folded into the instruction.
1605   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606     if (isMulPowOf2(RHS)) {
1607       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609
1610       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611         if (C->getValue().isPowerOf2())
1612           std::swap(MulLHS, MulRHS);
1613
1614       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616
1617       unsigned RHSReg = getRegForValue(MulLHS);
1618       if (!RHSReg)
1619         return 0;
1620       bool RHSIsKill = hasTrivialKill(MulLHS);
1621       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622                                    RHSIsKill, ShiftVal);
1623       if (ResultReg)
1624         return ResultReg;
1625     }
1626   }
1627
1628   // Check if the shift can be folded into the instruction.
1629   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632         uint64_t ShiftVal = C->getZExtValue();
1633         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634         if (!RHSReg)
1635           return 0;
1636         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638                                      RHSIsKill, ShiftVal);
1639         if (ResultReg)
1640           return ResultReg;
1641       }
1642   }
1643
1644   unsigned RHSReg = getRegForValue(RHS);
1645   if (!RHSReg)
1646     return 0;
1647   bool RHSIsKill = hasTrivialKill(RHS);
1648
1649   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654   }
1655   return ResultReg;
1656 }
1657
1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659                                            unsigned LHSReg, bool LHSIsKill,
1660                                            uint64_t Imm) {
1661   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662                 "ISD nodes are not consecutive!");
1663   static const unsigned OpcTable[3][2] = {
1664     { AArch64::ANDWri, AArch64::ANDXri },
1665     { AArch64::ORRWri, AArch64::ORRXri },
1666     { AArch64::EORWri, AArch64::EORXri }
1667   };
1668   const TargetRegisterClass *RC;
1669   unsigned Opc;
1670   unsigned RegSize;
1671   switch (RetVT.SimpleTy) {
1672   default:
1673     return 0;
1674   case MVT::i1:
1675   case MVT::i8:
1676   case MVT::i16:
1677   case MVT::i32: {
1678     unsigned Idx = ISDOpc - ISD::AND;
1679     Opc = OpcTable[Idx][0];
1680     RC = &AArch64::GPR32spRegClass;
1681     RegSize = 32;
1682     break;
1683   }
1684   case MVT::i64:
1685     Opc = OpcTable[ISDOpc - ISD::AND][1];
1686     RC = &AArch64::GPR64spRegClass;
1687     RegSize = 64;
1688     break;
1689   }
1690
1691   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692     return 0;
1693
1694   unsigned ResultReg =
1695       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700   }
1701   return ResultReg;
1702 }
1703
1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705                                            unsigned LHSReg, bool LHSIsKill,
1706                                            unsigned RHSReg, bool RHSIsKill,
1707                                            uint64_t ShiftImm) {
1708   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709                 "ISD nodes are not consecutive!");
1710   static const unsigned OpcTable[3][2] = {
1711     { AArch64::ANDWrs, AArch64::ANDXrs },
1712     { AArch64::ORRWrs, AArch64::ORRXrs },
1713     { AArch64::EORWrs, AArch64::EORXrs }
1714   };
1715
1716   // Don't deal with undefined shifts.
1717   if (ShiftImm >= RetVT.getSizeInBits())
1718     return 0;
1719
1720   const TargetRegisterClass *RC;
1721   unsigned Opc;
1722   switch (RetVT.SimpleTy) {
1723   default:
1724     return 0;
1725   case MVT::i1:
1726   case MVT::i8:
1727   case MVT::i16:
1728   case MVT::i32:
1729     Opc = OpcTable[ISDOpc - ISD::AND][0];
1730     RC = &AArch64::GPR32RegClass;
1731     break;
1732   case MVT::i64:
1733     Opc = OpcTable[ISDOpc - ISD::AND][1];
1734     RC = &AArch64::GPR64RegClass;
1735     break;
1736   }
1737   unsigned ResultReg =
1738       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743   }
1744   return ResultReg;
1745 }
1746
1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748                                      uint64_t Imm) {
1749   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751
1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753                                    bool WantZExt, MachineMemOperand *MMO) {
1754   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755     return 0;
1756
1757   // Simplify this down to something we can handle.
1758   if (!simplifyAddress(Addr, VT))
1759     return 0;
1760
1761   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762   if (!ScaleFactor)
1763     llvm_unreachable("Unexpected value type.");
1764
1765   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767   bool UseScaled = true;
1768   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769     UseScaled = false;
1770     ScaleFactor = 1;
1771   }
1772
1773   static const unsigned GPOpcTable[2][8][4] = {
1774     // Sign-extend.
1775     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1778         AArch64::LDURXi  },
1779       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1782         AArch64::LDRXui  },
1783       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786         AArch64::LDRXroX },
1787       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788         AArch64::LDRXroW },
1789       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790         AArch64::LDRXroW }
1791     },
1792     // Zero-extend.
1793     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794         AArch64::LDURXi  },
1795       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1796         AArch64::LDURXi  },
1797       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798         AArch64::LDRXui  },
1799       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1800         AArch64::LDRXui  },
1801       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802         AArch64::LDRXroX },
1803       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1804         AArch64::LDRXroX },
1805       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806         AArch64::LDRXroW },
1807       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1808         AArch64::LDRXroW }
1809     }
1810   };
1811
1812   static const unsigned FPOpcTable[4][2] = {
1813     { AArch64::LDURSi,  AArch64::LDURDi  },
1814     { AArch64::LDRSui,  AArch64::LDRDui  },
1815     { AArch64::LDRSroX, AArch64::LDRDroX },
1816     { AArch64::LDRSroW, AArch64::LDRDroW }
1817   };
1818
1819   unsigned Opc;
1820   const TargetRegisterClass *RC;
1821   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822                       Addr.getOffsetReg();
1823   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825       Addr.getExtendType() == AArch64_AM::SXTW)
1826     Idx++;
1827
1828   bool IsRet64Bit = RetVT == MVT::i64;
1829   switch (VT.SimpleTy) {
1830   default:
1831     llvm_unreachable("Unexpected value type.");
1832   case MVT::i1: // Intentional fall-through.
1833   case MVT::i8:
1834     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835     RC = (IsRet64Bit && !WantZExt) ?
1836              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837     break;
1838   case MVT::i16:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840     RC = (IsRet64Bit && !WantZExt) ?
1841              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842     break;
1843   case MVT::i32:
1844     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845     RC = (IsRet64Bit && !WantZExt) ?
1846              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847     break;
1848   case MVT::i64:
1849     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850     RC = &AArch64::GPR64RegClass;
1851     break;
1852   case MVT::f32:
1853     Opc = FPOpcTable[Idx][0];
1854     RC = &AArch64::FPR32RegClass;
1855     break;
1856   case MVT::f64:
1857     Opc = FPOpcTable[Idx][1];
1858     RC = &AArch64::FPR64RegClass;
1859     break;
1860   }
1861
1862   // Create the base instruction, then add the operands.
1863   unsigned ResultReg = createResultReg(RC);
1864   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865                                     TII.get(Opc), ResultReg);
1866   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868   // Loading an i1 requires special handling.
1869   if (VT == MVT::i1) {
1870     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871     assert(ANDReg && "Unexpected AND instruction emission failure.");
1872     ResultReg = ANDReg;
1873   }
1874
1875   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876   // the 32bit reg to a 64bit reg.
1877   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881         .addImm(0)
1882         .addReg(ResultReg, getKillRegState(true))
1883         .addImm(AArch64::sub_32);
1884     ResultReg = Reg64;
1885   }
1886   return ResultReg;
1887 }
1888
1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890   MVT VT;
1891   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892     return false;
1893
1894   if (VT.isVector())
1895     return selectOperator(I, I->getOpcode());
1896
1897   unsigned ResultReg;
1898   switch (I->getOpcode()) {
1899   default:
1900     llvm_unreachable("Unexpected instruction.");
1901   case Instruction::Add:
1902     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903     break;
1904   case Instruction::Sub:
1905     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906     break;
1907   }
1908   if (!ResultReg)
1909     return false;
1910
1911   updateValueMap(I, ResultReg);
1912   return true;
1913 }
1914
1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916   MVT VT;
1917   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918     return false;
1919
1920   if (VT.isVector())
1921     return selectOperator(I, I->getOpcode());
1922
1923   unsigned ResultReg;
1924   switch (I->getOpcode()) {
1925   default:
1926     llvm_unreachable("Unexpected instruction.");
1927   case Instruction::And:
1928     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929     break;
1930   case Instruction::Or:
1931     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932     break;
1933   case Instruction::Xor:
1934     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935     break;
1936   }
1937   if (!ResultReg)
1938     return false;
1939
1940   updateValueMap(I, ResultReg);
1941   return true;
1942 }
1943
1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945   MVT VT;
1946   // Verify we have a legal type before going any further.  Currently, we handle
1947   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950       cast<LoadInst>(I)->isAtomic())
1951     return false;
1952
1953   const Value *SV = I->getOperand(0);
1954   if (TLI.supportSwiftError()) {
1955     // Swifterror values can come from either a function parameter with
1956     // swifterror attribute or an alloca with swifterror attribute.
1957     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958       if (Arg->hasSwiftErrorAttr())
1959         return false;
1960     }
1961
1962     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963       if (Alloca->isSwiftError())
1964         return false;
1965     }
1966   }
1967
1968   // See if we can handle this address.
1969   Address Addr;
1970   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971     return false;
1972
1973   // Fold the following sign-/zero-extend into the load instruction.
1974   bool WantZExt = true;
1975   MVT RetVT = VT;
1976   const Value *IntExtVal = nullptr;
1977   if (I->hasOneUse()) {
1978     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979       if (isTypeSupported(ZE->getType(), RetVT))
1980         IntExtVal = ZE;
1981       else
1982         RetVT = VT;
1983     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984       if (isTypeSupported(SE->getType(), RetVT))
1985         IntExtVal = SE;
1986       else
1987         RetVT = VT;
1988       WantZExt = false;
1989     }
1990   }
1991
1992   unsigned ResultReg =
1993       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994   if (!ResultReg)
1995     return false;
1996
1997   // There are a few different cases we have to handle, because the load or the
1998   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999   // SelectionDAG. There is also an ordering issue when both instructions are in
2000   // different basic blocks.
2001   // 1.) The load instruction is selected by FastISel, but the integer extend
2002   //     not. This usually happens when the integer extend is in a different
2003   //     basic block and SelectionDAG took over for that basic block.
2004   // 2.) The load instruction is selected before the integer extend. This only
2005   //     happens when the integer extend is in a different basic block.
2006   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007   //     by FastISel. This happens if there are instructions between the load
2008   //     and the integer extend that couldn't be selected by FastISel.
2009   if (IntExtVal) {
2010     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012     // it when it selects the integer extend.
2013     unsigned Reg = lookUpRegForValue(IntExtVal);
2014     auto *MI = MRI.getUniqueVRegDef(Reg);
2015     if (!MI) {
2016       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017         if (WantZExt) {
2018           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019           std::prev(FuncInfo.InsertPt)->eraseFromParent();
2020           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2021         } else
2022           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023                                                  /*IsKill=*/true,
2024                                                  AArch64::sub_32);
2025       }
2026       updateValueMap(I, ResultReg);
2027       return true;
2028     }
2029
2030     // The integer extend has already been emitted - delete all the instructions
2031     // that have been emitted by the integer extend lowering code and use the
2032     // result from the load instruction directly.
2033     while (MI) {
2034       Reg = 0;
2035       for (auto &Opnd : MI->uses()) {
2036         if (Opnd.isReg()) {
2037           Reg = Opnd.getReg();
2038           break;
2039         }
2040       }
2041       MI->eraseFromParent();
2042       MI = nullptr;
2043       if (Reg)
2044         MI = MRI.getUniqueVRegDef(Reg);
2045     }
2046     updateValueMap(IntExtVal, ResultReg);
2047     return true;
2048   }
2049
2050   updateValueMap(I, ResultReg);
2051   return true;
2052 }
2053
2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055                                        unsigned AddrReg,
2056                                        MachineMemOperand *MMO) {
2057   unsigned Opc;
2058   switch (VT.SimpleTy) {
2059   default: return false;
2060   case MVT::i8:  Opc = AArch64::STLRB; break;
2061   case MVT::i16: Opc = AArch64::STLRH; break;
2062   case MVT::i32: Opc = AArch64::STLRW; break;
2063   case MVT::i64: Opc = AArch64::STLRX; break;
2064   }
2065
2066   const MCInstrDesc &II = TII.get(Opc);
2067   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2070       .addReg(SrcReg)
2071       .addReg(AddrReg)
2072       .addMemOperand(MMO);
2073   return true;
2074 }
2075
2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077                                 MachineMemOperand *MMO) {
2078   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079     return false;
2080
2081   // Simplify this down to something we can handle.
2082   if (!simplifyAddress(Addr, VT))
2083     return false;
2084
2085   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086   if (!ScaleFactor)
2087     llvm_unreachable("Unexpected value type.");
2088
2089   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091   bool UseScaled = true;
2092   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093     UseScaled = false;
2094     ScaleFactor = 1;
2095   }
2096
2097   static const unsigned OpcTable[4][6] = {
2098     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2099       AArch64::STURSi,   AArch64::STURDi },
2100     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2101       AArch64::STRSui,   AArch64::STRDui },
2102     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103       AArch64::STRSroX,  AArch64::STRDroX },
2104     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105       AArch64::STRSroW,  AArch64::STRDroW }
2106   };
2107
2108   unsigned Opc;
2109   bool VTIsi1 = false;
2110   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111                       Addr.getOffsetReg();
2112   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114       Addr.getExtendType() == AArch64_AM::SXTW)
2115     Idx++;
2116
2117   switch (VT.SimpleTy) {
2118   default: llvm_unreachable("Unexpected value type.");
2119   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2120   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2121   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126   }
2127
2128   // Storing an i1 requires special handling.
2129   if (VTIsi1 && SrcReg != AArch64::WZR) {
2130     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2131     assert(ANDReg && "Unexpected AND instruction emission failure.");
2132     SrcReg = ANDReg;
2133   }
2134   // Create the base instruction, then add the operands.
2135   const MCInstrDesc &II = TII.get(Opc);
2136   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137   MachineInstrBuilder MIB =
2138       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2139   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141   return true;
2142 }
2143
2144 bool AArch64FastISel::selectStore(const Instruction *I) {
2145   MVT VT;
2146   const Value *Op0 = I->getOperand(0);
2147   // Verify we have a legal type before going any further.  Currently, we handle
2148   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151     return false;
2152
2153   const Value *PtrV = I->getOperand(1);
2154   if (TLI.supportSwiftError()) {
2155     // Swifterror values can come from either a function parameter with
2156     // swifterror attribute or an alloca with swifterror attribute.
2157     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158       if (Arg->hasSwiftErrorAttr())
2159         return false;
2160     }
2161
2162     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163       if (Alloca->isSwiftError())
2164         return false;
2165     }
2166   }
2167
2168   // Get the value to be stored into a register. Use the zero register directly
2169   // when possible to avoid an unnecessary copy and a wasted register.
2170   unsigned SrcReg = 0;
2171   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172     if (CI->isZero())
2173       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175     if (CF->isZero() && !CF->isNegative()) {
2176       VT = MVT::getIntegerVT(VT.getSizeInBits());
2177       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178     }
2179   }
2180
2181   if (!SrcReg)
2182     SrcReg = getRegForValue(Op0);
2183
2184   if (!SrcReg)
2185     return false;
2186
2187   auto *SI = cast<StoreInst>(I);
2188
2189   // Try to emit a STLR for seq_cst/release.
2190   if (SI->isAtomic()) {
2191     AtomicOrdering Ord = SI->getOrdering();
2192     // The non-atomic instructions are sufficient for relaxed stores.
2193     if (isReleaseOrStronger(Ord)) {
2194       // The STLR addressing mode only supports a base reg; pass that directly.
2195       unsigned AddrReg = getRegForValue(PtrV);
2196       return emitStoreRelease(VT, SrcReg, AddrReg,
2197                               createMachineMemOperandFor(I));
2198     }
2199   }
2200
2201   // See if we can handle this address.
2202   Address Addr;
2203   if (!computeAddress(PtrV, Addr, Op0->getType()))
2204     return false;
2205
2206   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207     return false;
2208   return true;
2209 }
2210
2211 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212   switch (Pred) {
2213   case CmpInst::FCMP_ONE:
2214   case CmpInst::FCMP_UEQ:
2215   default:
2216     // AL is our "false" for now. The other two need more compares.
2217     return AArch64CC::AL;
2218   case CmpInst::ICMP_EQ:
2219   case CmpInst::FCMP_OEQ:
2220     return AArch64CC::EQ;
2221   case CmpInst::ICMP_SGT:
2222   case CmpInst::FCMP_OGT:
2223     return AArch64CC::GT;
2224   case CmpInst::ICMP_SGE:
2225   case CmpInst::FCMP_OGE:
2226     return AArch64CC::GE;
2227   case CmpInst::ICMP_UGT:
2228   case CmpInst::FCMP_UGT:
2229     return AArch64CC::HI;
2230   case CmpInst::FCMP_OLT:
2231     return AArch64CC::MI;
2232   case CmpInst::ICMP_ULE:
2233   case CmpInst::FCMP_OLE:
2234     return AArch64CC::LS;
2235   case CmpInst::FCMP_ORD:
2236     return AArch64CC::VC;
2237   case CmpInst::FCMP_UNO:
2238     return AArch64CC::VS;
2239   case CmpInst::FCMP_UGE:
2240     return AArch64CC::PL;
2241   case CmpInst::ICMP_SLT:
2242   case CmpInst::FCMP_ULT:
2243     return AArch64CC::LT;
2244   case CmpInst::ICMP_SLE:
2245   case CmpInst::FCMP_ULE:
2246     return AArch64CC::LE;
2247   case CmpInst::FCMP_UNE:
2248   case CmpInst::ICMP_NE:
2249     return AArch64CC::NE;
2250   case CmpInst::ICMP_UGE:
2251     return AArch64CC::HS;
2252   case CmpInst::ICMP_ULT:
2253     return AArch64CC::LO;
2254   }
2255 }
2256
2257 /// \brief Try to emit a combined compare-and-branch instruction.
2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2260   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2261   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2262
2263   const Value *LHS = CI->getOperand(0);
2264   const Value *RHS = CI->getOperand(1);
2265
2266   MVT VT;
2267   if (!isTypeSupported(LHS->getType(), VT))
2268     return false;
2269
2270   unsigned BW = VT.getSizeInBits();
2271   if (BW > 64)
2272     return false;
2273
2274   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2275   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2276
2277   // Try to take advantage of fallthrough opportunities.
2278   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2279     std::swap(TBB, FBB);
2280     Predicate = CmpInst::getInversePredicate(Predicate);
2281   }
2282
2283   int TestBit = -1;
2284   bool IsCmpNE;
2285   switch (Predicate) {
2286   default:
2287     return false;
2288   case CmpInst::ICMP_EQ:
2289   case CmpInst::ICMP_NE:
2290     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2291       std::swap(LHS, RHS);
2292
2293     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2294       return false;
2295
2296     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2297       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2298         const Value *AndLHS = AI->getOperand(0);
2299         const Value *AndRHS = AI->getOperand(1);
2300
2301         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2302           if (C->getValue().isPowerOf2())
2303             std::swap(AndLHS, AndRHS);
2304
2305         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2306           if (C->getValue().isPowerOf2()) {
2307             TestBit = C->getValue().logBase2();
2308             LHS = AndLHS;
2309           }
2310       }
2311
2312     if (VT == MVT::i1)
2313       TestBit = 0;
2314
2315     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2316     break;
2317   case CmpInst::ICMP_SLT:
2318   case CmpInst::ICMP_SGE:
2319     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2320       return false;
2321
2322     TestBit = BW - 1;
2323     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2324     break;
2325   case CmpInst::ICMP_SGT:
2326   case CmpInst::ICMP_SLE:
2327     if (!isa<ConstantInt>(RHS))
2328       return false;
2329
2330     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2331       return false;
2332
2333     TestBit = BW - 1;
2334     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2335     break;
2336   } // end switch
2337
2338   static const unsigned OpcTable[2][2][2] = {
2339     { {AArch64::CBZW,  AArch64::CBZX },
2340       {AArch64::CBNZW, AArch64::CBNZX} },
2341     { {AArch64::TBZW,  AArch64::TBZX },
2342       {AArch64::TBNZW, AArch64::TBNZX} }
2343   };
2344
2345   bool IsBitTest = TestBit != -1;
2346   bool Is64Bit = BW == 64;
2347   if (TestBit < 32 && TestBit >= 0)
2348     Is64Bit = false;
2349
2350   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2351   const MCInstrDesc &II = TII.get(Opc);
2352
2353   unsigned SrcReg = getRegForValue(LHS);
2354   if (!SrcReg)
2355     return false;
2356   bool SrcIsKill = hasTrivialKill(LHS);
2357
2358   if (BW == 64 && !Is64Bit)
2359     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2360                                         AArch64::sub_32);
2361
2362   if ((BW < 32) && !IsBitTest)
2363     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2364
2365   // Emit the combined compare and branch instruction.
2366   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2367   MachineInstrBuilder MIB =
2368       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2369           .addReg(SrcReg, getKillRegState(SrcIsKill));
2370   if (IsBitTest)
2371     MIB.addImm(TestBit);
2372   MIB.addMBB(TBB);
2373
2374   finishCondBranch(BI->getParent(), TBB, FBB);
2375   return true;
2376 }
2377
2378 bool AArch64FastISel::selectBranch(const Instruction *I) {
2379   const BranchInst *BI = cast<BranchInst>(I);
2380   if (BI->isUnconditional()) {
2381     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2382     fastEmitBranch(MSucc, BI->getDebugLoc());
2383     return true;
2384   }
2385
2386   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2388
2389   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2390     if (CI->hasOneUse() && isValueAvailable(CI)) {
2391       // Try to optimize or fold the cmp.
2392       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2393       switch (Predicate) {
2394       default:
2395         break;
2396       case CmpInst::FCMP_FALSE:
2397         fastEmitBranch(FBB, DbgLoc);
2398         return true;
2399       case CmpInst::FCMP_TRUE:
2400         fastEmitBranch(TBB, DbgLoc);
2401         return true;
2402       }
2403
2404       // Try to emit a combined compare-and-branch first.
2405       if (emitCompareAndBranch(BI))
2406         return true;
2407
2408       // Try to take advantage of fallthrough opportunities.
2409       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2410         std::swap(TBB, FBB);
2411         Predicate = CmpInst::getInversePredicate(Predicate);
2412       }
2413
2414       // Emit the cmp.
2415       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2416         return false;
2417
2418       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2419       // instruction.
2420       AArch64CC::CondCode CC = getCompareCC(Predicate);
2421       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2422       switch (Predicate) {
2423       default:
2424         break;
2425       case CmpInst::FCMP_UEQ:
2426         ExtraCC = AArch64CC::EQ;
2427         CC = AArch64CC::VS;
2428         break;
2429       case CmpInst::FCMP_ONE:
2430         ExtraCC = AArch64CC::MI;
2431         CC = AArch64CC::GT;
2432         break;
2433       }
2434       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2435
2436       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2437       if (ExtraCC != AArch64CC::AL) {
2438         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2439             .addImm(ExtraCC)
2440             .addMBB(TBB);
2441       }
2442
2443       // Emit the branch.
2444       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2445           .addImm(CC)
2446           .addMBB(TBB);
2447
2448       finishCondBranch(BI->getParent(), TBB, FBB);
2449       return true;
2450     }
2451   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2452     uint64_t Imm = CI->getZExtValue();
2453     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2454     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2455         .addMBB(Target);
2456
2457     // Obtain the branch probability and add the target to the successor list.
2458     if (FuncInfo.BPI) {
2459       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2460           BI->getParent(), Target->getBasicBlock());
2461       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2462     } else
2463       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2464     return true;
2465   } else {
2466     AArch64CC::CondCode CC = AArch64CC::NE;
2467     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2468       // Fake request the condition, otherwise the intrinsic might be completely
2469       // optimized away.
2470       unsigned CondReg = getRegForValue(BI->getCondition());
2471       if (!CondReg)
2472         return false;
2473
2474       // Emit the branch.
2475       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476         .addImm(CC)
2477         .addMBB(TBB);
2478
2479       finishCondBranch(BI->getParent(), TBB, FBB);
2480       return true;
2481     }
2482   }
2483
2484   unsigned CondReg = getRegForValue(BI->getCondition());
2485   if (CondReg == 0)
2486     return false;
2487   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2488
2489   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490   unsigned Opcode = AArch64::TBNZW;
2491   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492     std::swap(TBB, FBB);
2493     Opcode = AArch64::TBZW;
2494   }
2495
2496   const MCInstrDesc &II = TII.get(Opcode);
2497   unsigned ConstrainedCondReg
2498     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2500       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2501       .addImm(0)
2502       .addMBB(TBB);
2503
2504   finishCondBranch(BI->getParent(), TBB, FBB);
2505   return true;
2506 }
2507
2508 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2511   if (AddrReg == 0)
2512     return false;
2513
2514   // Emit the indirect branch.
2515   const MCInstrDesc &II = TII.get(AArch64::BR);
2516   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2517   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2518
2519   // Make sure the CFG is up-to-date.
2520   for (auto *Succ : BI->successors())
2521     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2522
2523   return true;
2524 }
2525
2526 bool AArch64FastISel::selectCmp(const Instruction *I) {
2527   const CmpInst *CI = cast<CmpInst>(I);
2528
2529   // Vectors of i1 are weird: bail out.
2530   if (CI->getType()->isVectorTy())
2531     return false;
2532
2533   // Try to optimize or fold the cmp.
2534   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535   unsigned ResultReg = 0;
2536   switch (Predicate) {
2537   default:
2538     break;
2539   case CmpInst::FCMP_FALSE:
2540     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2542             TII.get(TargetOpcode::COPY), ResultReg)
2543         .addReg(AArch64::WZR, getKillRegState(true));
2544     break;
2545   case CmpInst::FCMP_TRUE:
2546     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547     break;
2548   }
2549
2550   if (ResultReg) {
2551     updateValueMap(I, ResultReg);
2552     return true;
2553   }
2554
2555   // Emit the cmp.
2556   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557     return false;
2558
2559   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560
2561   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562   // condition codes are inverted, because they are used by CSINC.
2563   static unsigned CondCodeTable[2][2] = {
2564     { AArch64CC::NE, AArch64CC::VC },
2565     { AArch64CC::PL, AArch64CC::LE }
2566   };
2567   unsigned *CondCodes = nullptr;
2568   switch (Predicate) {
2569   default:
2570     break;
2571   case CmpInst::FCMP_UEQ:
2572     CondCodes = &CondCodeTable[0][0];
2573     break;
2574   case CmpInst::FCMP_ONE:
2575     CondCodes = &CondCodeTable[1][0];
2576     break;
2577   }
2578
2579   if (CondCodes) {
2580     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2582             TmpReg1)
2583         .addReg(AArch64::WZR, getKillRegState(true))
2584         .addReg(AArch64::WZR, getKillRegState(true))
2585         .addImm(CondCodes[0]);
2586     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587             ResultReg)
2588         .addReg(TmpReg1, getKillRegState(true))
2589         .addReg(AArch64::WZR, getKillRegState(true))
2590         .addImm(CondCodes[1]);
2591
2592     updateValueMap(I, ResultReg);
2593     return true;
2594   }
2595
2596   // Now set a register based on the comparison.
2597   AArch64CC::CondCode CC = getCompareCC(Predicate);
2598   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2601           ResultReg)
2602       .addReg(AArch64::WZR, getKillRegState(true))
2603       .addReg(AArch64::WZR, getKillRegState(true))
2604       .addImm(invertedCC);
2605
2606   updateValueMap(I, ResultReg);
2607   return true;
2608 }
2609
2610 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611 /// value.
2612 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613   if (!SI->getType()->isIntegerTy(1))
2614     return false;
2615
2616   const Value *Src1Val, *Src2Val;
2617   unsigned Opc = 0;
2618   bool NeedExtraOp = false;
2619   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620     if (CI->isOne()) {
2621       Src1Val = SI->getCondition();
2622       Src2Val = SI->getFalseValue();
2623       Opc = AArch64::ORRWrr;
2624     } else {
2625       assert(CI->isZero());
2626       Src1Val = SI->getFalseValue();
2627       Src2Val = SI->getCondition();
2628       Opc = AArch64::BICWrr;
2629     }
2630   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631     if (CI->isOne()) {
2632       Src1Val = SI->getCondition();
2633       Src2Val = SI->getTrueValue();
2634       Opc = AArch64::ORRWrr;
2635       NeedExtraOp = true;
2636     } else {
2637       assert(CI->isZero());
2638       Src1Val = SI->getCondition();
2639       Src2Val = SI->getTrueValue();
2640       Opc = AArch64::ANDWrr;
2641     }
2642   }
2643
2644   if (!Opc)
2645     return false;
2646
2647   unsigned Src1Reg = getRegForValue(Src1Val);
2648   if (!Src1Reg)
2649     return false;
2650   bool Src1IsKill = hasTrivialKill(Src1Val);
2651
2652   unsigned Src2Reg = getRegForValue(Src2Val);
2653   if (!Src2Reg)
2654     return false;
2655   bool Src2IsKill = hasTrivialKill(Src2Val);
2656
2657   if (NeedExtraOp) {
2658     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2659     Src1IsKill = true;
2660   }
2661   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2662                                        Src1IsKill, Src2Reg, Src2IsKill);
2663   updateValueMap(SI, ResultReg);
2664   return true;
2665 }
2666
2667 bool AArch64FastISel::selectSelect(const Instruction *I) {
2668   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2669   MVT VT;
2670   if (!isTypeSupported(I->getType(), VT))
2671     return false;
2672
2673   unsigned Opc;
2674   const TargetRegisterClass *RC;
2675   switch (VT.SimpleTy) {
2676   default:
2677     return false;
2678   case MVT::i1:
2679   case MVT::i8:
2680   case MVT::i16:
2681   case MVT::i32:
2682     Opc = AArch64::CSELWr;
2683     RC = &AArch64::GPR32RegClass;
2684     break;
2685   case MVT::i64:
2686     Opc = AArch64::CSELXr;
2687     RC = &AArch64::GPR64RegClass;
2688     break;
2689   case MVT::f32:
2690     Opc = AArch64::FCSELSrrr;
2691     RC = &AArch64::FPR32RegClass;
2692     break;
2693   case MVT::f64:
2694     Opc = AArch64::FCSELDrrr;
2695     RC = &AArch64::FPR64RegClass;
2696     break;
2697   }
2698
2699   const SelectInst *SI = cast<SelectInst>(I);
2700   const Value *Cond = SI->getCondition();
2701   AArch64CC::CondCode CC = AArch64CC::NE;
2702   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2703
2704   if (optimizeSelect(SI))
2705     return true;
2706
2707   // Try to pickup the flags, so we don't have to emit another compare.
2708   if (foldXALUIntrinsic(CC, I, Cond)) {
2709     // Fake request the condition to force emission of the XALU intrinsic.
2710     unsigned CondReg = getRegForValue(Cond);
2711     if (!CondReg)
2712       return false;
2713   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2714              isValueAvailable(Cond)) {
2715     const auto *Cmp = cast<CmpInst>(Cond);
2716     // Try to optimize or fold the cmp.
2717     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2718     const Value *FoldSelect = nullptr;
2719     switch (Predicate) {
2720     default:
2721       break;
2722     case CmpInst::FCMP_FALSE:
2723       FoldSelect = SI->getFalseValue();
2724       break;
2725     case CmpInst::FCMP_TRUE:
2726       FoldSelect = SI->getTrueValue();
2727       break;
2728     }
2729
2730     if (FoldSelect) {
2731       unsigned SrcReg = getRegForValue(FoldSelect);
2732       if (!SrcReg)
2733         return false;
2734       unsigned UseReg = lookUpRegForValue(SI);
2735       if (UseReg)
2736         MRI.clearKillFlags(UseReg);
2737
2738       updateValueMap(I, SrcReg);
2739       return true;
2740     }
2741
2742     // Emit the cmp.
2743     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744       return false;
2745
2746     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747     CC = getCompareCC(Predicate);
2748     switch (Predicate) {
2749     default:
2750       break;
2751     case CmpInst::FCMP_UEQ:
2752       ExtraCC = AArch64CC::EQ;
2753       CC = AArch64CC::VS;
2754       break;
2755     case CmpInst::FCMP_ONE:
2756       ExtraCC = AArch64CC::MI;
2757       CC = AArch64CC::GT;
2758       break;
2759     }
2760     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761   } else {
2762     unsigned CondReg = getRegForValue(Cond);
2763     if (!CondReg)
2764       return false;
2765     bool CondIsKill = hasTrivialKill(Cond);
2766
2767     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768     CondReg = constrainOperandRegClass(II, CondReg, 1);
2769
2770     // Emit a TST instruction (ANDS wzr, reg, #imm).
2771     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2772             AArch64::WZR)
2773         .addReg(CondReg, getKillRegState(CondIsKill))
2774         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2775   }
2776
2777   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2778   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2779
2780   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2781   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2782
2783   if (!Src1Reg || !Src2Reg)
2784     return false;
2785
2786   if (ExtraCC != AArch64CC::AL) {
2787     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2788                                Src2IsKill, ExtraCC);
2789     Src2IsKill = true;
2790   }
2791   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2792                                         Src2IsKill, CC);
2793   updateValueMap(I, ResultReg);
2794   return true;
2795 }
2796
2797 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798   Value *V = I->getOperand(0);
2799   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800     return false;
2801
2802   unsigned Op = getRegForValue(V);
2803   if (Op == 0)
2804     return false;
2805
2806   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2808           ResultReg).addReg(Op);
2809   updateValueMap(I, ResultReg);
2810   return true;
2811 }
2812
2813 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814   Value *V = I->getOperand(0);
2815   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816     return false;
2817
2818   unsigned Op = getRegForValue(V);
2819   if (Op == 0)
2820     return false;
2821
2822   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2824           ResultReg).addReg(Op);
2825   updateValueMap(I, ResultReg);
2826   return true;
2827 }
2828
2829 // FPToUI and FPToSI
2830 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831   MVT DestVT;
2832   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833     return false;
2834
2835   unsigned SrcReg = getRegForValue(I->getOperand(0));
2836   if (SrcReg == 0)
2837     return false;
2838
2839   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841     return false;
2842
2843   unsigned Opc;
2844   if (SrcVT == MVT::f64) {
2845     if (Signed)
2846       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847     else
2848       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849   } else {
2850     if (Signed)
2851       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852     else
2853       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854   }
2855   unsigned ResultReg = createResultReg(
2856       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2858       .addReg(SrcReg);
2859   updateValueMap(I, ResultReg);
2860   return true;
2861 }
2862
2863 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864   MVT DestVT;
2865   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866     return false;
2867   // Let regular ISEL handle FP16
2868   if (DestVT == MVT::f16)
2869     return false;
2870
2871   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872          "Unexpected value type.");
2873
2874   unsigned SrcReg = getRegForValue(I->getOperand(0));
2875   if (!SrcReg)
2876     return false;
2877   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2878
2879   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880
2881   // Handle sign-extension.
2882   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2883     SrcReg =
2884         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2885     if (!SrcReg)
2886       return false;
2887     SrcIsKill = true;
2888   }
2889
2890   unsigned Opc;
2891   if (SrcVT == MVT::i64) {
2892     if (Signed)
2893       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2894     else
2895       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2896   } else {
2897     if (Signed)
2898       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2899     else
2900       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2901   }
2902
2903   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2904                                       SrcIsKill);
2905   updateValueMap(I, ResultReg);
2906   return true;
2907 }
2908
2909 bool AArch64FastISel::fastLowerArguments() {
2910   if (!FuncInfo.CanLowerReturn)
2911     return false;
2912
2913   const Function *F = FuncInfo.Fn;
2914   if (F->isVarArg())
2915     return false;
2916
2917   CallingConv::ID CC = F->getCallingConv();
2918   if (CC != CallingConv::C && CC != CallingConv::Swift)
2919     return false;
2920
2921   // Only handle simple cases of up to 8 GPR and FPR each.
2922   unsigned GPRCnt = 0;
2923   unsigned FPRCnt = 0;
2924   for (auto const &Arg : F->args()) {
2925     if (Arg.hasAttribute(Attribute::ByVal) ||
2926         Arg.hasAttribute(Attribute::InReg) ||
2927         Arg.hasAttribute(Attribute::StructRet) ||
2928         Arg.hasAttribute(Attribute::SwiftSelf) ||
2929         Arg.hasAttribute(Attribute::SwiftError) ||
2930         Arg.hasAttribute(Attribute::Nest))
2931       return false;
2932
2933     Type *ArgTy = Arg.getType();
2934     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2935       return false;
2936
2937     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2938     if (!ArgVT.isSimple())
2939       return false;
2940
2941     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2942     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2943       return false;
2944
2945     if (VT.isVector() &&
2946         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2947       return false;
2948
2949     if (VT >= MVT::i1 && VT <= MVT::i64)
2950       ++GPRCnt;
2951     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2952              VT.is128BitVector())
2953       ++FPRCnt;
2954     else
2955       return false;
2956
2957     if (GPRCnt > 8 || FPRCnt > 8)
2958       return false;
2959   }
2960
2961   static const MCPhysReg Registers[6][8] = {
2962     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2963       AArch64::W5, AArch64::W6, AArch64::W7 },
2964     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2965       AArch64::X5, AArch64::X6, AArch64::X7 },
2966     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2967       AArch64::H5, AArch64::H6, AArch64::H7 },
2968     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2969       AArch64::S5, AArch64::S6, AArch64::S7 },
2970     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2971       AArch64::D5, AArch64::D6, AArch64::D7 },
2972     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2973       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2974   };
2975
2976   unsigned GPRIdx = 0;
2977   unsigned FPRIdx = 0;
2978   for (auto const &Arg : F->args()) {
2979     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2980     unsigned SrcReg;
2981     const TargetRegisterClass *RC;
2982     if (VT >= MVT::i1 && VT <= MVT::i32) {
2983       SrcReg = Registers[0][GPRIdx++];
2984       RC = &AArch64::GPR32RegClass;
2985       VT = MVT::i32;
2986     } else if (VT == MVT::i64) {
2987       SrcReg = Registers[1][GPRIdx++];
2988       RC = &AArch64::GPR64RegClass;
2989     } else if (VT == MVT::f16) {
2990       SrcReg = Registers[2][FPRIdx++];
2991       RC = &AArch64::FPR16RegClass;
2992     } else if (VT ==  MVT::f32) {
2993       SrcReg = Registers[3][FPRIdx++];
2994       RC = &AArch64::FPR32RegClass;
2995     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2996       SrcReg = Registers[4][FPRIdx++];
2997       RC = &AArch64::FPR64RegClass;
2998     } else if (VT.is128BitVector()) {
2999       SrcReg = Registers[5][FPRIdx++];
3000       RC = &AArch64::FPR128RegClass;
3001     } else
3002       llvm_unreachable("Unexpected value type.");
3003
3004     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3005     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3006     // Without this, EmitLiveInCopies may eliminate the livein if its only
3007     // use is a bitcast (which isn't turned into an instruction).
3008     unsigned ResultReg = createResultReg(RC);
3009     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3010             TII.get(TargetOpcode::COPY), ResultReg)
3011         .addReg(DstReg, getKillRegState(true));
3012     updateValueMap(&Arg, ResultReg);
3013   }
3014   return true;
3015 }
3016
3017 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3018                                       SmallVectorImpl<MVT> &OutVTs,
3019                                       unsigned &NumBytes) {
3020   CallingConv::ID CC = CLI.CallConv;
3021   SmallVector<CCValAssign, 16> ArgLocs;
3022   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3023   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3024
3025   // Get a count of how many bytes are to be pushed on the stack.
3026   NumBytes = CCInfo.getNextStackOffset();
3027
3028   // Issue CALLSEQ_START
3029   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3030   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3031     .addImm(NumBytes).addImm(0);
3032
3033   // Process the args.
3034   for (CCValAssign &VA : ArgLocs) {
3035     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3036     MVT ArgVT = OutVTs[VA.getValNo()];
3037
3038     unsigned ArgReg = getRegForValue(ArgVal);
3039     if (!ArgReg)
3040       return false;
3041
3042     // Handle arg promotion: SExt, ZExt, AExt.
3043     switch (VA.getLocInfo()) {
3044     case CCValAssign::Full:
3045       break;
3046     case CCValAssign::SExt: {
3047       MVT DestVT = VA.getLocVT();
3048       MVT SrcVT = ArgVT;
3049       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3050       if (!ArgReg)
3051         return false;
3052       break;
3053     }
3054     case CCValAssign::AExt:
3055     // Intentional fall-through.
3056     case CCValAssign::ZExt: {
3057       MVT DestVT = VA.getLocVT();
3058       MVT SrcVT = ArgVT;
3059       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3060       if (!ArgReg)
3061         return false;
3062       break;
3063     }
3064     default:
3065       llvm_unreachable("Unknown arg promotion!");
3066     }
3067
3068     // Now copy/store arg to correct locations.
3069     if (VA.isRegLoc() && !VA.needsCustom()) {
3070       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3071               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3072       CLI.OutRegs.push_back(VA.getLocReg());
3073     } else if (VA.needsCustom()) {
3074       // FIXME: Handle custom args.
3075       return false;
3076     } else {
3077       assert(VA.isMemLoc() && "Assuming store on stack.");
3078
3079       // Don't emit stores for undef values.
3080       if (isa<UndefValue>(ArgVal))
3081         continue;
3082
3083       // Need to store on the stack.
3084       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3085
3086       unsigned BEAlign = 0;
3087       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3088         BEAlign = 8 - ArgSize;
3089
3090       Address Addr;
3091       Addr.setKind(Address::RegBase);
3092       Addr.setReg(AArch64::SP);
3093       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3094
3095       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3096       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3097           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3098           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3099
3100       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3101         return false;
3102     }
3103   }
3104   return true;
3105 }
3106
3107 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3108                                  unsigned NumBytes) {
3109   CallingConv::ID CC = CLI.CallConv;
3110
3111   // Issue CALLSEQ_END
3112   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3113   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3114     .addImm(NumBytes).addImm(0);
3115
3116   // Now the return value.
3117   if (RetVT != MVT::isVoid) {
3118     SmallVector<CCValAssign, 16> RVLocs;
3119     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3120     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3121
3122     // Only handle a single return value.
3123     if (RVLocs.size() != 1)
3124       return false;
3125
3126     // Copy all of the result registers out of their specified physreg.
3127     MVT CopyVT = RVLocs[0].getValVT();
3128
3129     // TODO: Handle big-endian results
3130     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3131       return false;
3132
3133     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3134     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3135             TII.get(TargetOpcode::COPY), ResultReg)
3136         .addReg(RVLocs[0].getLocReg());
3137     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3138
3139     CLI.ResultReg = ResultReg;
3140     CLI.NumResultRegs = 1;
3141   }
3142
3143   return true;
3144 }
3145
3146 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3147   CallingConv::ID CC  = CLI.CallConv;
3148   bool IsTailCall     = CLI.IsTailCall;
3149   bool IsVarArg       = CLI.IsVarArg;
3150   const Value *Callee = CLI.Callee;
3151   MCSymbol *Symbol = CLI.Symbol;
3152
3153   if (!Callee && !Symbol)
3154     return false;
3155
3156   // Allow SelectionDAG isel to handle tail calls.
3157   if (IsTailCall)
3158     return false;
3159
3160   CodeModel::Model CM = TM.getCodeModel();
3161   // Only support the small-addressing and large code models.
3162   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3163     return false;
3164
3165   // FIXME: Add large code model support for ELF.
3166   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3167     return false;
3168
3169   // Let SDISel handle vararg functions.
3170   if (IsVarArg)
3171     return false;
3172
3173   // FIXME: Only handle *simple* calls for now.
3174   MVT RetVT;
3175   if (CLI.RetTy->isVoidTy())
3176     RetVT = MVT::isVoid;
3177   else if (!isTypeLegal(CLI.RetTy, RetVT))
3178     return false;
3179
3180   for (auto Flag : CLI.OutFlags)
3181     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3182         Flag.isSwiftSelf() || Flag.isSwiftError())
3183       return false;
3184
3185   // Set up the argument vectors.
3186   SmallVector<MVT, 16> OutVTs;
3187   OutVTs.reserve(CLI.OutVals.size());
3188
3189   for (auto *Val : CLI.OutVals) {
3190     MVT VT;
3191     if (!isTypeLegal(Val->getType(), VT) &&
3192         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3193       return false;
3194
3195     // We don't handle vector parameters yet.
3196     if (VT.isVector() || VT.getSizeInBits() > 64)
3197       return false;
3198
3199     OutVTs.push_back(VT);
3200   }
3201
3202   Address Addr;
3203   if (Callee && !computeCallAddress(Callee, Addr))
3204     return false;
3205
3206   // Handle the arguments now that we've gotten them.
3207   unsigned NumBytes;
3208   if (!processCallArgs(CLI, OutVTs, NumBytes))
3209     return false;
3210
3211   // Issue the call.
3212   MachineInstrBuilder MIB;
3213   if (Subtarget->useSmallAddressing()) {
3214     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3215     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3216     if (Symbol)
3217       MIB.addSym(Symbol, 0);
3218     else if (Addr.getGlobalValue())
3219       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3220     else if (Addr.getReg()) {
3221       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3222       MIB.addReg(Reg);
3223     } else
3224       return false;
3225   } else {
3226     unsigned CallReg = 0;
3227     if (Symbol) {
3228       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3229       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3230               ADRPReg)
3231           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3232
3233       CallReg = createResultReg(&AArch64::GPR64RegClass);
3234       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3235               TII.get(AArch64::LDRXui), CallReg)
3236           .addReg(ADRPReg)
3237           .addSym(Symbol,
3238                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3239     } else if (Addr.getGlobalValue())
3240       CallReg = materializeGV(Addr.getGlobalValue());
3241     else if (Addr.getReg())
3242       CallReg = Addr.getReg();
3243
3244     if (!CallReg)
3245       return false;
3246
3247     const MCInstrDesc &II = TII.get(AArch64::BLR);
3248     CallReg = constrainOperandRegClass(II, CallReg, 0);
3249     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3250   }
3251
3252   // Add implicit physical register uses to the call.
3253   for (auto Reg : CLI.OutRegs)
3254     MIB.addReg(Reg, RegState::Implicit);
3255
3256   // Add a register mask with the call-preserved registers.
3257   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3258   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3259
3260   CLI.Call = MIB;
3261
3262   // Finish off the call including any return values.
3263   return finishCall(CLI, RetVT, NumBytes);
3264 }
3265
3266 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3267   if (Alignment)
3268     return Len / Alignment <= 4;
3269   else
3270     return Len < 32;
3271 }
3272
3273 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3274                                          uint64_t Len, unsigned Alignment) {
3275   // Make sure we don't bloat code by inlining very large memcpy's.
3276   if (!isMemCpySmall(Len, Alignment))
3277     return false;
3278
3279   int64_t UnscaledOffset = 0;
3280   Address OrigDest = Dest;
3281   Address OrigSrc = Src;
3282
3283   while (Len) {
3284     MVT VT;
3285     if (!Alignment || Alignment >= 8) {
3286       if (Len >= 8)
3287         VT = MVT::i64;
3288       else if (Len >= 4)
3289         VT = MVT::i32;
3290       else if (Len >= 2)
3291         VT = MVT::i16;
3292       else {
3293         VT = MVT::i8;
3294       }
3295     } else {
3296       // Bound based on alignment.
3297       if (Len >= 4 && Alignment == 4)
3298         VT = MVT::i32;
3299       else if (Len >= 2 && Alignment == 2)
3300         VT = MVT::i16;
3301       else {
3302         VT = MVT::i8;
3303       }
3304     }
3305
3306     unsigned ResultReg = emitLoad(VT, VT, Src);
3307     if (!ResultReg)
3308       return false;
3309
3310     if (!emitStore(VT, ResultReg, Dest))
3311       return false;
3312
3313     int64_t Size = VT.getSizeInBits() / 8;
3314     Len -= Size;
3315     UnscaledOffset += Size;
3316
3317     // We need to recompute the unscaled offset for each iteration.
3318     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3319     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3320   }
3321
3322   return true;
3323 }
3324
3325 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3326 /// into the user. The condition code will only be updated on success.
3327 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3328                                         const Instruction *I,
3329                                         const Value *Cond) {
3330   if (!isa<ExtractValueInst>(Cond))
3331     return false;
3332
3333   const auto *EV = cast<ExtractValueInst>(Cond);
3334   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3335     return false;
3336
3337   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3338   MVT RetVT;
3339   const Function *Callee = II->getCalledFunction();
3340   Type *RetTy =
3341   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3342   if (!isTypeLegal(RetTy, RetVT))
3343     return false;
3344
3345   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3346     return false;
3347
3348   const Value *LHS = II->getArgOperand(0);
3349   const Value *RHS = II->getArgOperand(1);
3350
3351   // Canonicalize immediate to the RHS.
3352   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3353       isCommutativeIntrinsic(II))
3354     std::swap(LHS, RHS);
3355
3356   // Simplify multiplies.
3357   Intrinsic::ID IID = II->getIntrinsicID();
3358   switch (IID) {
3359   default:
3360     break;
3361   case Intrinsic::smul_with_overflow:
3362     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3363       if (C->getValue() == 2)
3364         IID = Intrinsic::sadd_with_overflow;
3365     break;
3366   case Intrinsic::umul_with_overflow:
3367     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3368       if (C->getValue() == 2)
3369         IID = Intrinsic::uadd_with_overflow;
3370     break;
3371   }
3372
3373   AArch64CC::CondCode TmpCC;
3374   switch (IID) {
3375   default:
3376     return false;
3377   case Intrinsic::sadd_with_overflow:
3378   case Intrinsic::ssub_with_overflow:
3379     TmpCC = AArch64CC::VS;
3380     break;
3381   case Intrinsic::uadd_with_overflow:
3382     TmpCC = AArch64CC::HS;
3383     break;
3384   case Intrinsic::usub_with_overflow:
3385     TmpCC = AArch64CC::LO;
3386     break;
3387   case Intrinsic::smul_with_overflow:
3388   case Intrinsic::umul_with_overflow:
3389     TmpCC = AArch64CC::NE;
3390     break;
3391   }
3392
3393   // Check if both instructions are in the same basic block.
3394   if (!isValueAvailable(II))
3395     return false;
3396
3397   // Make sure nothing is in the way
3398   BasicBlock::const_iterator Start(I);
3399   BasicBlock::const_iterator End(II);
3400   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3401     // We only expect extractvalue instructions between the intrinsic and the
3402     // instruction to be selected.
3403     if (!isa<ExtractValueInst>(Itr))
3404       return false;
3405
3406     // Check that the extractvalue operand comes from the intrinsic.
3407     const auto *EVI = cast<ExtractValueInst>(Itr);
3408     if (EVI->getAggregateOperand() != II)
3409       return false;
3410   }
3411
3412   CC = TmpCC;
3413   return true;
3414 }
3415
3416 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3417   // FIXME: Handle more intrinsics.
3418   switch (II->getIntrinsicID()) {
3419   default: return false;
3420   case Intrinsic::frameaddress: {
3421     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3422     MFI.setFrameAddressIsTaken(true);
3423
3424     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3425     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3426     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3427     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3428             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3429     // Recursively load frame address
3430     // ldr x0, [fp]
3431     // ldr x0, [x0]
3432     // ldr x0, [x0]
3433     // ...
3434     unsigned DestReg;
3435     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3436     while (Depth--) {
3437       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3438                                 SrcReg, /*IsKill=*/true, 0);
3439       assert(DestReg && "Unexpected LDR instruction emission failure.");
3440       SrcReg = DestReg;
3441     }
3442
3443     updateValueMap(II, SrcReg);
3444     return true;
3445   }
3446   case Intrinsic::memcpy:
3447   case Intrinsic::memmove: {
3448     const auto *MTI = cast<MemTransferInst>(II);
3449     // Don't handle volatile.
3450     if (MTI->isVolatile())
3451       return false;
3452
3453     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3454     // we would emit dead code because we don't currently handle memmoves.
3455     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3456     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3457       // Small memcpy's are common enough that we want to do them without a call
3458       // if possible.
3459       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3460       unsigned Alignment = MTI->getAlignment();
3461       if (isMemCpySmall(Len, Alignment)) {
3462         Address Dest, Src;
3463         if (!computeAddress(MTI->getRawDest(), Dest) ||
3464             !computeAddress(MTI->getRawSource(), Src))
3465           return false;
3466         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3467           return true;
3468       }
3469     }
3470
3471     if (!MTI->getLength()->getType()->isIntegerTy(64))
3472       return false;
3473
3474     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3475       // Fast instruction selection doesn't support the special
3476       // address spaces.
3477       return false;
3478
3479     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3480     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3481   }
3482   case Intrinsic::memset: {
3483     const MemSetInst *MSI = cast<MemSetInst>(II);
3484     // Don't handle volatile.
3485     if (MSI->isVolatile())
3486       return false;
3487
3488     if (!MSI->getLength()->getType()->isIntegerTy(64))
3489       return false;
3490
3491     if (MSI->getDestAddressSpace() > 255)
3492       // Fast instruction selection doesn't support the special
3493       // address spaces.
3494       return false;
3495
3496     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3497   }
3498   case Intrinsic::sin:
3499   case Intrinsic::cos:
3500   case Intrinsic::pow: {
3501     MVT RetVT;
3502     if (!isTypeLegal(II->getType(), RetVT))
3503       return false;
3504
3505     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3506       return false;
3507
3508     static const RTLIB::Libcall LibCallTable[3][2] = {
3509       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3510       { RTLIB::COS_F32, RTLIB::COS_F64 },
3511       { RTLIB::POW_F32, RTLIB::POW_F64 }
3512     };
3513     RTLIB::Libcall LC;
3514     bool Is64Bit = RetVT == MVT::f64;
3515     switch (II->getIntrinsicID()) {
3516     default:
3517       llvm_unreachable("Unexpected intrinsic.");
3518     case Intrinsic::sin:
3519       LC = LibCallTable[0][Is64Bit];
3520       break;
3521     case Intrinsic::cos:
3522       LC = LibCallTable[1][Is64Bit];
3523       break;
3524     case Intrinsic::pow:
3525       LC = LibCallTable[2][Is64Bit];
3526       break;
3527     }
3528
3529     ArgListTy Args;
3530     Args.reserve(II->getNumArgOperands());
3531
3532     // Populate the argument list.
3533     for (auto &Arg : II->arg_operands()) {
3534       ArgListEntry Entry;
3535       Entry.Val = Arg;
3536       Entry.Ty = Arg->getType();
3537       Args.push_back(Entry);
3538     }
3539
3540     CallLoweringInfo CLI;
3541     MCContext &Ctx = MF->getContext();
3542     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3543                   TLI.getLibcallName(LC), std::move(Args));
3544     if (!lowerCallTo(CLI))
3545       return false;
3546     updateValueMap(II, CLI.ResultReg);
3547     return true;
3548   }
3549   case Intrinsic::fabs: {
3550     MVT VT;
3551     if (!isTypeLegal(II->getType(), VT))
3552       return false;
3553
3554     unsigned Opc;
3555     switch (VT.SimpleTy) {
3556     default:
3557       return false;
3558     case MVT::f32:
3559       Opc = AArch64::FABSSr;
3560       break;
3561     case MVT::f64:
3562       Opc = AArch64::FABSDr;
3563       break;
3564     }
3565     unsigned SrcReg = getRegForValue(II->getOperand(0));
3566     if (!SrcReg)
3567       return false;
3568     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3569     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3570     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3571       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3572     updateValueMap(II, ResultReg);
3573     return true;
3574   }
3575   case Intrinsic::trap:
3576     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3577         .addImm(1);
3578     return true;
3579
3580   case Intrinsic::sqrt: {
3581     Type *RetTy = II->getCalledFunction()->getReturnType();
3582
3583     MVT VT;
3584     if (!isTypeLegal(RetTy, VT))
3585       return false;
3586
3587     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3588     if (!Op0Reg)
3589       return false;
3590     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3591
3592     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3593     if (!ResultReg)
3594       return false;
3595
3596     updateValueMap(II, ResultReg);
3597     return true;
3598   }
3599   case Intrinsic::sadd_with_overflow:
3600   case Intrinsic::uadd_with_overflow:
3601   case Intrinsic::ssub_with_overflow:
3602   case Intrinsic::usub_with_overflow:
3603   case Intrinsic::smul_with_overflow:
3604   case Intrinsic::umul_with_overflow: {
3605     // This implements the basic lowering of the xalu with overflow intrinsics.
3606     const Function *Callee = II->getCalledFunction();
3607     auto *Ty = cast<StructType>(Callee->getReturnType());
3608     Type *RetTy = Ty->getTypeAtIndex(0U);
3609
3610     MVT VT;
3611     if (!isTypeLegal(RetTy, VT))
3612       return false;
3613
3614     if (VT != MVT::i32 && VT != MVT::i64)
3615       return false;
3616
3617     const Value *LHS = II->getArgOperand(0);
3618     const Value *RHS = II->getArgOperand(1);
3619     // Canonicalize immediate to the RHS.
3620     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3621         isCommutativeIntrinsic(II))
3622       std::swap(LHS, RHS);
3623
3624     // Simplify multiplies.
3625     Intrinsic::ID IID = II->getIntrinsicID();
3626     switch (IID) {
3627     default:
3628       break;
3629     case Intrinsic::smul_with_overflow:
3630       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3631         if (C->getValue() == 2) {
3632           IID = Intrinsic::sadd_with_overflow;
3633           RHS = LHS;
3634         }
3635       break;
3636     case Intrinsic::umul_with_overflow:
3637       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3638         if (C->getValue() == 2) {
3639           IID = Intrinsic::uadd_with_overflow;
3640           RHS = LHS;
3641         }
3642       break;
3643     }
3644
3645     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3646     AArch64CC::CondCode CC = AArch64CC::Invalid;
3647     switch (IID) {
3648     default: llvm_unreachable("Unexpected intrinsic!");
3649     case Intrinsic::sadd_with_overflow:
3650       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3651       CC = AArch64CC::VS;
3652       break;
3653     case Intrinsic::uadd_with_overflow:
3654       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3655       CC = AArch64CC::HS;
3656       break;
3657     case Intrinsic::ssub_with_overflow:
3658       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3659       CC = AArch64CC::VS;
3660       break;
3661     case Intrinsic::usub_with_overflow:
3662       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3663       CC = AArch64CC::LO;
3664       break;
3665     case Intrinsic::smul_with_overflow: {
3666       CC = AArch64CC::NE;
3667       unsigned LHSReg = getRegForValue(LHS);
3668       if (!LHSReg)
3669         return false;
3670       bool LHSIsKill = hasTrivialKill(LHS);
3671
3672       unsigned RHSReg = getRegForValue(RHS);
3673       if (!RHSReg)
3674         return false;
3675       bool RHSIsKill = hasTrivialKill(RHS);
3676
3677       if (VT == MVT::i32) {
3678         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3679         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3680                                        /*IsKill=*/false, 32);
3681         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3682                                             AArch64::sub_32);
3683         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3684                                               AArch64::sub_32);
3685         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3686                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3687       } else {
3688         assert(VT == MVT::i64 && "Unexpected value type.");
3689         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3690         // reused in the next instruction.
3691         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3692                             /*IsKill=*/false);
3693         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3694                                         RHSReg, RHSIsKill);
3695         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3696                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3697       }
3698       break;
3699     }
3700     case Intrinsic::umul_with_overflow: {
3701       CC = AArch64CC::NE;
3702       unsigned LHSReg = getRegForValue(LHS);
3703       if (!LHSReg)
3704         return false;
3705       bool LHSIsKill = hasTrivialKill(LHS);
3706
3707       unsigned RHSReg = getRegForValue(RHS);
3708       if (!RHSReg)
3709         return false;
3710       bool RHSIsKill = hasTrivialKill(RHS);
3711
3712       if (VT == MVT::i32) {
3713         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3714         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3715                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3716                     /*WantResult=*/false);
3717         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718                                             AArch64::sub_32);
3719       } else {
3720         assert(VT == MVT::i64 && "Unexpected value type.");
3721         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3722         // reused in the next instruction.
3723         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3724                             /*IsKill=*/false);
3725         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3726                                         RHSReg, RHSIsKill);
3727         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3728                     /*IsKill=*/false, /*WantResult=*/false);
3729       }
3730       break;
3731     }
3732     }
3733
3734     if (MulReg) {
3735       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3736       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3737               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3738     }
3739
3740     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3741                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3742                                   /*IsKill=*/true, getInvertedCondCode(CC));
3743     (void)ResultReg2;
3744     assert((ResultReg1 + 1) == ResultReg2 &&
3745            "Nonconsecutive result registers.");
3746     updateValueMap(II, ResultReg1, 2);
3747     return true;
3748   }
3749   }
3750   return false;
3751 }
3752
3753 bool AArch64FastISel::selectRet(const Instruction *I) {
3754   const ReturnInst *Ret = cast<ReturnInst>(I);
3755   const Function &F = *I->getParent()->getParent();
3756
3757   if (!FuncInfo.CanLowerReturn)
3758     return false;
3759
3760   if (F.isVarArg())
3761     return false;
3762
3763   if (TLI.supportSwiftError() &&
3764       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3765     return false;
3766
3767   if (TLI.supportSplitCSR(FuncInfo.MF))
3768     return false;
3769
3770   // Build a list of return value registers.
3771   SmallVector<unsigned, 4> RetRegs;
3772
3773   if (Ret->getNumOperands() > 0) {
3774     CallingConv::ID CC = F.getCallingConv();
3775     SmallVector<ISD::OutputArg, 4> Outs;
3776     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3777
3778     // Analyze operands of the call, assigning locations to each operand.
3779     SmallVector<CCValAssign, 16> ValLocs;
3780     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3781     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3782                                                      : RetCC_AArch64_AAPCS;
3783     CCInfo.AnalyzeReturn(Outs, RetCC);
3784
3785     // Only handle a single return value for now.
3786     if (ValLocs.size() != 1)
3787       return false;
3788
3789     CCValAssign &VA = ValLocs[0];
3790     const Value *RV = Ret->getOperand(0);
3791
3792     // Don't bother handling odd stuff for now.
3793     if ((VA.getLocInfo() != CCValAssign::Full) &&
3794         (VA.getLocInfo() != CCValAssign::BCvt))
3795       return false;
3796
3797     // Only handle register returns for now.
3798     if (!VA.isRegLoc())
3799       return false;
3800
3801     unsigned Reg = getRegForValue(RV);
3802     if (Reg == 0)
3803       return false;
3804
3805     unsigned SrcReg = Reg + VA.getValNo();
3806     unsigned DestReg = VA.getLocReg();
3807     // Avoid a cross-class copy. This is very unlikely.
3808     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3809       return false;
3810
3811     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3812     if (!RVEVT.isSimple())
3813       return false;
3814
3815     // Vectors (of > 1 lane) in big endian need tricky handling.
3816     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3817         !Subtarget->isLittleEndian())
3818       return false;
3819
3820     MVT RVVT = RVEVT.getSimpleVT();
3821     if (RVVT == MVT::f128)
3822       return false;
3823
3824     MVT DestVT = VA.getValVT();
3825     // Special handling for extended integers.
3826     if (RVVT != DestVT) {
3827       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3828         return false;
3829
3830       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3831         return false;
3832
3833       bool IsZExt = Outs[0].Flags.isZExt();
3834       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3835       if (SrcReg == 0)
3836         return false;
3837     }
3838
3839     // Make the copy.
3840     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3841             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3842
3843     // Add register to return instruction.
3844     RetRegs.push_back(VA.getLocReg());
3845   }
3846
3847   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3848                                     TII.get(AArch64::RET_ReallyLR));
3849   for (unsigned RetReg : RetRegs)
3850     MIB.addReg(RetReg, RegState::Implicit);
3851   return true;
3852 }
3853
3854 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3855   Type *DestTy = I->getType();
3856   Value *Op = I->getOperand(0);
3857   Type *SrcTy = Op->getType();
3858
3859   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3860   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3861   if (!SrcEVT.isSimple())
3862     return false;
3863   if (!DestEVT.isSimple())
3864     return false;
3865
3866   MVT SrcVT = SrcEVT.getSimpleVT();
3867   MVT DestVT = DestEVT.getSimpleVT();
3868
3869   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3870       SrcVT != MVT::i8)
3871     return false;
3872   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3873       DestVT != MVT::i1)
3874     return false;
3875
3876   unsigned SrcReg = getRegForValue(Op);
3877   if (!SrcReg)
3878     return false;
3879   bool SrcIsKill = hasTrivialKill(Op);
3880
3881   // If we're truncating from i64 to a smaller non-legal type then generate an
3882   // AND. Otherwise, we know the high bits are undefined and a truncate only
3883   // generate a COPY. We cannot mark the source register also as result
3884   // register, because this can incorrectly transfer the kill flag onto the
3885   // source register.
3886   unsigned ResultReg;
3887   if (SrcVT == MVT::i64) {
3888     uint64_t Mask = 0;
3889     switch (DestVT.SimpleTy) {
3890     default:
3891       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3892       return false;
3893     case MVT::i1:
3894       Mask = 0x1;
3895       break;
3896     case MVT::i8:
3897       Mask = 0xff;
3898       break;
3899     case MVT::i16:
3900       Mask = 0xffff;
3901       break;
3902     }
3903     // Issue an extract_subreg to get the lower 32-bits.
3904     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3905                                                 AArch64::sub_32);
3906     // Create the AND instruction which performs the actual truncation.
3907     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3908     assert(ResultReg && "Unexpected AND instruction emission failure.");
3909   } else {
3910     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3911     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3912             TII.get(TargetOpcode::COPY), ResultReg)
3913         .addReg(SrcReg, getKillRegState(SrcIsKill));
3914   }
3915
3916   updateValueMap(I, ResultReg);
3917   return true;
3918 }
3919
3920 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3921   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3922           DestVT == MVT::i64) &&
3923          "Unexpected value type.");
3924   // Handle i8 and i16 as i32.
3925   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3926     DestVT = MVT::i32;
3927
3928   if (IsZExt) {
3929     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3930     assert(ResultReg && "Unexpected AND instruction emission failure.");
3931     if (DestVT == MVT::i64) {
3932       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3933       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3934       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3935       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3936               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3937           .addImm(0)
3938           .addReg(ResultReg)
3939           .addImm(AArch64::sub_32);
3940       ResultReg = Reg64;
3941     }
3942     return ResultReg;
3943   } else {
3944     if (DestVT == MVT::i64) {
3945       // FIXME: We're SExt i1 to i64.
3946       return 0;
3947     }
3948     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3949                             /*TODO:IsKill=*/false, 0, 0);
3950   }
3951 }
3952
3953 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3954                                       unsigned Op1, bool Op1IsKill) {
3955   unsigned Opc, ZReg;
3956   switch (RetVT.SimpleTy) {
3957   default: return 0;
3958   case MVT::i8:
3959   case MVT::i16:
3960   case MVT::i32:
3961     RetVT = MVT::i32;
3962     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3963   case MVT::i64:
3964     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3965   }
3966
3967   const TargetRegisterClass *RC =
3968       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3969   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3970                           /*IsKill=*/ZReg, true);
3971 }
3972
3973 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3974                                         unsigned Op1, bool Op1IsKill) {
3975   if (RetVT != MVT::i64)
3976     return 0;
3977
3978   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3979                           Op0, Op0IsKill, Op1, Op1IsKill,
3980                           AArch64::XZR, /*IsKill=*/true);
3981 }
3982
3983 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3984                                         unsigned Op1, bool Op1IsKill) {
3985   if (RetVT != MVT::i64)
3986     return 0;
3987
3988   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3989                           Op0, Op0IsKill, Op1, Op1IsKill,
3990                           AArch64::XZR, /*IsKill=*/true);
3991 }
3992
3993 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3994                                      unsigned Op1Reg, bool Op1IsKill) {
3995   unsigned Opc = 0;
3996   bool NeedTrunc = false;
3997   uint64_t Mask = 0;
3998   switch (RetVT.SimpleTy) {
3999   default: return 0;
4000   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4001   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4002   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4003   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4004   }
4005
4006   const TargetRegisterClass *RC =
4007       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4008   if (NeedTrunc) {
4009     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4010     Op1IsKill = true;
4011   }
4012   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4013                                        Op1IsKill);
4014   if (NeedTrunc)
4015     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4016   return ResultReg;
4017 }
4018
4019 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4020                                      bool Op0IsKill, uint64_t Shift,
4021                                      bool IsZExt) {
4022   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4023          "Unexpected source/return type pair.");
4024   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4025           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4026          "Unexpected source value type.");
4027   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4028           RetVT == MVT::i64) && "Unexpected return value type.");
4029
4030   bool Is64Bit = (RetVT == MVT::i64);
4031   unsigned RegSize = Is64Bit ? 64 : 32;
4032   unsigned DstBits = RetVT.getSizeInBits();
4033   unsigned SrcBits = SrcVT.getSizeInBits();
4034   const TargetRegisterClass *RC =
4035       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4036
4037   // Just emit a copy for "zero" shifts.
4038   if (Shift == 0) {
4039     if (RetVT == SrcVT) {
4040       unsigned ResultReg = createResultReg(RC);
4041       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4042               TII.get(TargetOpcode::COPY), ResultReg)
4043           .addReg(Op0, getKillRegState(Op0IsKill));
4044       return ResultReg;
4045     } else
4046       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4047   }
4048
4049   // Don't deal with undefined shifts.
4050   if (Shift >= DstBits)
4051     return 0;
4052
4053   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4054   // {S|U}BFM Wd, Wn, #r, #s
4055   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4056
4057   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4058   // %2 = shl i16 %1, 4
4059   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4060   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4061   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4062   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4063
4064   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4065   // %2 = shl i16 %1, 8
4066   // Wd<32+7-24,32-24> = Wn<7:0>
4067   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4068   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4069   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4070
4071   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4072   // %2 = shl i16 %1, 12
4073   // Wd<32+3-20,32-20> = Wn<3:0>
4074   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4075   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4076   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4077
4078   unsigned ImmR = RegSize - Shift;
4079   // Limit the width to the length of the source type.
4080   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4081   static const unsigned OpcTable[2][2] = {
4082     {AArch64::SBFMWri, AArch64::SBFMXri},
4083     {AArch64::UBFMWri, AArch64::UBFMXri}
4084   };
4085   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4086   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4087     unsigned TmpReg = MRI.createVirtualRegister(RC);
4088     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4089             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4090         .addImm(0)
4091         .addReg(Op0, getKillRegState(Op0IsKill))
4092         .addImm(AArch64::sub_32);
4093     Op0 = TmpReg;
4094     Op0IsKill = true;
4095   }
4096   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4097 }
4098
4099 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4100                                      unsigned Op1Reg, bool Op1IsKill) {
4101   unsigned Opc = 0;
4102   bool NeedTrunc = false;
4103   uint64_t Mask = 0;
4104   switch (RetVT.SimpleTy) {
4105   default: return 0;
4106   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4107   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4108   case MVT::i32: Opc = AArch64::LSRVWr; break;
4109   case MVT::i64: Opc = AArch64::LSRVXr; break;
4110   }
4111
4112   const TargetRegisterClass *RC =
4113       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4114   if (NeedTrunc) {
4115     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4116     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4117     Op0IsKill = Op1IsKill = true;
4118   }
4119   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4120                                        Op1IsKill);
4121   if (NeedTrunc)
4122     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4123   return ResultReg;
4124 }
4125
4126 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4127                                      bool Op0IsKill, uint64_t Shift,
4128                                      bool IsZExt) {
4129   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4130          "Unexpected source/return type pair.");
4131   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4132           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4133          "Unexpected source value type.");
4134   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4135           RetVT == MVT::i64) && "Unexpected return value type.");
4136
4137   bool Is64Bit = (RetVT == MVT::i64);
4138   unsigned RegSize = Is64Bit ? 64 : 32;
4139   unsigned DstBits = RetVT.getSizeInBits();
4140   unsigned SrcBits = SrcVT.getSizeInBits();
4141   const TargetRegisterClass *RC =
4142       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4143
4144   // Just emit a copy for "zero" shifts.
4145   if (Shift == 0) {
4146     if (RetVT == SrcVT) {
4147       unsigned ResultReg = createResultReg(RC);
4148       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4149               TII.get(TargetOpcode::COPY), ResultReg)
4150       .addReg(Op0, getKillRegState(Op0IsKill));
4151       return ResultReg;
4152     } else
4153       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4154   }
4155
4156   // Don't deal with undefined shifts.
4157   if (Shift >= DstBits)
4158     return 0;
4159
4160   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4161   // {S|U}BFM Wd, Wn, #r, #s
4162   // Wd<s-r:0> = Wn<s:r> when r <= s
4163
4164   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165   // %2 = lshr i16 %1, 4
4166   // Wd<7-4:0> = Wn<7:4>
4167   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4169   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4170
4171   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4172   // %2 = lshr i16 %1, 8
4173   // Wd<7-7,0> = Wn<7:7>
4174   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4175   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4176   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4177
4178   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4179   // %2 = lshr i16 %1, 12
4180   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4181   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4182   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4183   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4184
4185   if (Shift >= SrcBits && IsZExt)
4186     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4187
4188   // It is not possible to fold a sign-extend into the LShr instruction. In this
4189   // case emit a sign-extend.
4190   if (!IsZExt) {
4191     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4192     if (!Op0)
4193       return 0;
4194     Op0IsKill = true;
4195     SrcVT = RetVT;
4196     SrcBits = SrcVT.getSizeInBits();
4197     IsZExt = true;
4198   }
4199
4200   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4201   unsigned ImmS = SrcBits - 1;
4202   static const unsigned OpcTable[2][2] = {
4203     {AArch64::SBFMWri, AArch64::SBFMXri},
4204     {AArch64::UBFMWri, AArch64::UBFMXri}
4205   };
4206   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4207   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4208     unsigned TmpReg = MRI.createVirtualRegister(RC);
4209     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4210             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4211         .addImm(0)
4212         .addReg(Op0, getKillRegState(Op0IsKill))
4213         .addImm(AArch64::sub_32);
4214     Op0 = TmpReg;
4215     Op0IsKill = true;
4216   }
4217   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4218 }
4219
4220 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4221                                      unsigned Op1Reg, bool Op1IsKill) {
4222   unsigned Opc = 0;
4223   bool NeedTrunc = false;
4224   uint64_t Mask = 0;
4225   switch (RetVT.SimpleTy) {
4226   default: return 0;
4227   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4228   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4229   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4230   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4231   }
4232
4233   const TargetRegisterClass *RC =
4234       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4235   if (NeedTrunc) {
4236     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4237     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4238     Op0IsKill = Op1IsKill = true;
4239   }
4240   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4241                                        Op1IsKill);
4242   if (NeedTrunc)
4243     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4244   return ResultReg;
4245 }
4246
4247 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4248                                      bool Op0IsKill, uint64_t Shift,
4249                                      bool IsZExt) {
4250   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4251          "Unexpected source/return type pair.");
4252   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4253           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4254          "Unexpected source value type.");
4255   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4256           RetVT == MVT::i64) && "Unexpected return value type.");
4257
4258   bool Is64Bit = (RetVT == MVT::i64);
4259   unsigned RegSize = Is64Bit ? 64 : 32;
4260   unsigned DstBits = RetVT.getSizeInBits();
4261   unsigned SrcBits = SrcVT.getSizeInBits();
4262   const TargetRegisterClass *RC =
4263       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4264
4265   // Just emit a copy for "zero" shifts.
4266   if (Shift == 0) {
4267     if (RetVT == SrcVT) {
4268       unsigned ResultReg = createResultReg(RC);
4269       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4270               TII.get(TargetOpcode::COPY), ResultReg)
4271       .addReg(Op0, getKillRegState(Op0IsKill));
4272       return ResultReg;
4273     } else
4274       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4275   }
4276
4277   // Don't deal with undefined shifts.
4278   if (Shift >= DstBits)
4279     return 0;
4280
4281   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4282   // {S|U}BFM Wd, Wn, #r, #s
4283   // Wd<s-r:0> = Wn<s:r> when r <= s
4284
4285   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4286   // %2 = ashr i16 %1, 4
4287   // Wd<7-4:0> = Wn<7:4>
4288   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4289   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4290   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4291
4292   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4293   // %2 = ashr i16 %1, 8
4294   // Wd<7-7,0> = Wn<7:7>
4295   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4296   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4297   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4298
4299   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4300   // %2 = ashr i16 %1, 12
4301   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4302   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4303   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4304   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4305
4306   if (Shift >= SrcBits && IsZExt)
4307     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4308
4309   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4310   unsigned ImmS = SrcBits - 1;
4311   static const unsigned OpcTable[2][2] = {
4312     {AArch64::SBFMWri, AArch64::SBFMXri},
4313     {AArch64::UBFMWri, AArch64::UBFMXri}
4314   };
4315   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4316   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4317     unsigned TmpReg = MRI.createVirtualRegister(RC);
4318     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4319             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4320         .addImm(0)
4321         .addReg(Op0, getKillRegState(Op0IsKill))
4322         .addImm(AArch64::sub_32);
4323     Op0 = TmpReg;
4324     Op0IsKill = true;
4325   }
4326   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4327 }
4328
4329 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4330                                      bool IsZExt) {
4331   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4332
4333   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4334   // DestVT are odd things, so test to make sure that they are both types we can
4335   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4336   // bail out to SelectionDAG.
4337   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4338        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4339       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4340        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4341     return 0;
4342
4343   unsigned Opc;
4344   unsigned Imm = 0;
4345
4346   switch (SrcVT.SimpleTy) {
4347   default:
4348     return 0;
4349   case MVT::i1:
4350     return emiti1Ext(SrcReg, DestVT, IsZExt);
4351   case MVT::i8:
4352     if (DestVT == MVT::i64)
4353       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4354     else
4355       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4356     Imm = 7;
4357     break;
4358   case MVT::i16:
4359     if (DestVT == MVT::i64)
4360       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4361     else
4362       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4363     Imm = 15;
4364     break;
4365   case MVT::i32:
4366     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4367     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4368     Imm = 31;
4369     break;
4370   }
4371
4372   // Handle i8 and i16 as i32.
4373   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4374     DestVT = MVT::i32;
4375   else if (DestVT == MVT::i64) {
4376     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4377     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4378             TII.get(AArch64::SUBREG_TO_REG), Src64)
4379         .addImm(0)
4380         .addReg(SrcReg)
4381         .addImm(AArch64::sub_32);
4382     SrcReg = Src64;
4383   }
4384
4385   const TargetRegisterClass *RC =
4386       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4387   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4388 }
4389
4390 static bool isZExtLoad(const MachineInstr *LI) {
4391   switch (LI->getOpcode()) {
4392   default:
4393     return false;
4394   case AArch64::LDURBBi:
4395   case AArch64::LDURHHi:
4396   case AArch64::LDURWi:
4397   case AArch64::LDRBBui:
4398   case AArch64::LDRHHui:
4399   case AArch64::LDRWui:
4400   case AArch64::LDRBBroX:
4401   case AArch64::LDRHHroX:
4402   case AArch64::LDRWroX:
4403   case AArch64::LDRBBroW:
4404   case AArch64::LDRHHroW:
4405   case AArch64::LDRWroW:
4406     return true;
4407   }
4408 }
4409
4410 static bool isSExtLoad(const MachineInstr *LI) {
4411   switch (LI->getOpcode()) {
4412   default:
4413     return false;
4414   case AArch64::LDURSBWi:
4415   case AArch64::LDURSHWi:
4416   case AArch64::LDURSBXi:
4417   case AArch64::LDURSHXi:
4418   case AArch64::LDURSWi:
4419   case AArch64::LDRSBWui:
4420   case AArch64::LDRSHWui:
4421   case AArch64::LDRSBXui:
4422   case AArch64::LDRSHXui:
4423   case AArch64::LDRSWui:
4424   case AArch64::LDRSBWroX:
4425   case AArch64::LDRSHWroX:
4426   case AArch64::LDRSBXroX:
4427   case AArch64::LDRSHXroX:
4428   case AArch64::LDRSWroX:
4429   case AArch64::LDRSBWroW:
4430   case AArch64::LDRSHWroW:
4431   case AArch64::LDRSBXroW:
4432   case AArch64::LDRSHXroW:
4433   case AArch64::LDRSWroW:
4434     return true;
4435   }
4436 }
4437
4438 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4439                                          MVT SrcVT) {
4440   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4441   if (!LI || !LI->hasOneUse())
4442     return false;
4443
4444   // Check if the load instruction has already been selected.
4445   unsigned Reg = lookUpRegForValue(LI);
4446   if (!Reg)
4447     return false;
4448
4449   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4450   if (!MI)
4451     return false;
4452
4453   // Check if the correct load instruction has been emitted - SelectionDAG might
4454   // have emitted a zero-extending load, but we need a sign-extending load.
4455   bool IsZExt = isa<ZExtInst>(I);
4456   const auto *LoadMI = MI;
4457   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4458       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4459     unsigned LoadReg = MI->getOperand(1).getReg();
4460     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4461     assert(LoadMI && "Expected valid instruction");
4462   }
4463   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4464     return false;
4465
4466   // Nothing to be done.
4467   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4468     updateValueMap(I, Reg);
4469     return true;
4470   }
4471
4472   if (IsZExt) {
4473     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4474     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4475             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4476         .addImm(0)
4477         .addReg(Reg, getKillRegState(true))
4478         .addImm(AArch64::sub_32);
4479     Reg = Reg64;
4480   } else {
4481     assert((MI->getOpcode() == TargetOpcode::COPY &&
4482             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4483            "Expected copy instruction");
4484     Reg = MI->getOperand(1).getReg();
4485     MI->eraseFromParent();
4486   }
4487   updateValueMap(I, Reg);
4488   return true;
4489 }
4490
4491 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4492   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4493          "Unexpected integer extend instruction.");
4494   MVT RetVT;
4495   MVT SrcVT;
4496   if (!isTypeSupported(I->getType(), RetVT))
4497     return false;
4498
4499   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4500     return false;
4501
4502   // Try to optimize already sign-/zero-extended values from load instructions.
4503   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4504     return true;
4505
4506   unsigned SrcReg = getRegForValue(I->getOperand(0));
4507   if (!SrcReg)
4508     return false;
4509   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4510
4511   // Try to optimize already sign-/zero-extended values from function arguments.
4512   bool IsZExt = isa<ZExtInst>(I);
4513   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4514     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4515       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4516         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4517         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4518                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4519             .addImm(0)
4520             .addReg(SrcReg, getKillRegState(SrcIsKill))
4521             .addImm(AArch64::sub_32);
4522         SrcReg = ResultReg;
4523       }
4524       // Conservatively clear all kill flags from all uses, because we are
4525       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4526       // level. The result of the instruction at IR level might have been
4527       // trivially dead, which is now not longer true.
4528       unsigned UseReg = lookUpRegForValue(I);
4529       if (UseReg)
4530         MRI.clearKillFlags(UseReg);
4531
4532       updateValueMap(I, SrcReg);
4533       return true;
4534     }
4535   }
4536
4537   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4538   if (!ResultReg)
4539     return false;
4540
4541   updateValueMap(I, ResultReg);
4542   return true;
4543 }
4544
4545 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4546   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4547   if (!DestEVT.isSimple())
4548     return false;
4549
4550   MVT DestVT = DestEVT.getSimpleVT();
4551   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4552     return false;
4553
4554   unsigned DivOpc;
4555   bool Is64bit = (DestVT == MVT::i64);
4556   switch (ISDOpcode) {
4557   default:
4558     return false;
4559   case ISD::SREM:
4560     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4561     break;
4562   case ISD::UREM:
4563     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4564     break;
4565   }
4566   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4567   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4568   if (!Src0Reg)
4569     return false;
4570   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4571
4572   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4573   if (!Src1Reg)
4574     return false;
4575   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4576
4577   const TargetRegisterClass *RC =
4578       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4579   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4580                                      Src1Reg, /*IsKill=*/false);
4581   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4582   // The remainder is computed as numerator - (quotient * denominator) using the
4583   // MSUB instruction.
4584   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4585                                         Src1Reg, Src1IsKill, Src0Reg,
4586                                         Src0IsKill);
4587   updateValueMap(I, ResultReg);
4588   return true;
4589 }
4590
4591 bool AArch64FastISel::selectMul(const Instruction *I) {
4592   MVT VT;
4593   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4594     return false;
4595
4596   if (VT.isVector())
4597     return selectBinaryOp(I, ISD::MUL);
4598
4599   const Value *Src0 = I->getOperand(0);
4600   const Value *Src1 = I->getOperand(1);
4601   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4602     if (C->getValue().isPowerOf2())
4603       std::swap(Src0, Src1);
4604
4605   // Try to simplify to a shift instruction.
4606   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4607     if (C->getValue().isPowerOf2()) {
4608       uint64_t ShiftVal = C->getValue().logBase2();
4609       MVT SrcVT = VT;
4610       bool IsZExt = true;
4611       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4612         if (!isIntExtFree(ZExt)) {
4613           MVT VT;
4614           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4615             SrcVT = VT;
4616             IsZExt = true;
4617             Src0 = ZExt->getOperand(0);
4618           }
4619         }
4620       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4621         if (!isIntExtFree(SExt)) {
4622           MVT VT;
4623           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4624             SrcVT = VT;
4625             IsZExt = false;
4626             Src0 = SExt->getOperand(0);
4627           }
4628         }
4629       }
4630
4631       unsigned Src0Reg = getRegForValue(Src0);
4632       if (!Src0Reg)
4633         return false;
4634       bool Src0IsKill = hasTrivialKill(Src0);
4635
4636       unsigned ResultReg =
4637           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4638
4639       if (ResultReg) {
4640         updateValueMap(I, ResultReg);
4641         return true;
4642       }
4643     }
4644
4645   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4646   if (!Src0Reg)
4647     return false;
4648   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4649
4650   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4651   if (!Src1Reg)
4652     return false;
4653   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4654
4655   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4656
4657   if (!ResultReg)
4658     return false;
4659
4660   updateValueMap(I, ResultReg);
4661   return true;
4662 }
4663
4664 bool AArch64FastISel::selectShift(const Instruction *I) {
4665   MVT RetVT;
4666   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4667     return false;
4668
4669   if (RetVT.isVector())
4670     return selectOperator(I, I->getOpcode());
4671
4672   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4673     unsigned ResultReg = 0;
4674     uint64_t ShiftVal = C->getZExtValue();
4675     MVT SrcVT = RetVT;
4676     bool IsZExt = I->getOpcode() != Instruction::AShr;
4677     const Value *Op0 = I->getOperand(0);
4678     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4679       if (!isIntExtFree(ZExt)) {
4680         MVT TmpVT;
4681         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4682           SrcVT = TmpVT;
4683           IsZExt = true;
4684           Op0 = ZExt->getOperand(0);
4685         }
4686       }
4687     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4688       if (!isIntExtFree(SExt)) {
4689         MVT TmpVT;
4690         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4691           SrcVT = TmpVT;
4692           IsZExt = false;
4693           Op0 = SExt->getOperand(0);
4694         }
4695       }
4696     }
4697
4698     unsigned Op0Reg = getRegForValue(Op0);
4699     if (!Op0Reg)
4700       return false;
4701     bool Op0IsKill = hasTrivialKill(Op0);
4702
4703     switch (I->getOpcode()) {
4704     default: llvm_unreachable("Unexpected instruction.");
4705     case Instruction::Shl:
4706       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4707       break;
4708     case Instruction::AShr:
4709       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4710       break;
4711     case Instruction::LShr:
4712       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4713       break;
4714     }
4715     if (!ResultReg)
4716       return false;
4717
4718     updateValueMap(I, ResultReg);
4719     return true;
4720   }
4721
4722   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4723   if (!Op0Reg)
4724     return false;
4725   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4726
4727   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4728   if (!Op1Reg)
4729     return false;
4730   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4731
4732   unsigned ResultReg = 0;
4733   switch (I->getOpcode()) {
4734   default: llvm_unreachable("Unexpected instruction.");
4735   case Instruction::Shl:
4736     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4737     break;
4738   case Instruction::AShr:
4739     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4740     break;
4741   case Instruction::LShr:
4742     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4743     break;
4744   }
4745
4746   if (!ResultReg)
4747     return false;
4748
4749   updateValueMap(I, ResultReg);
4750   return true;
4751 }
4752
4753 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4754   MVT RetVT, SrcVT;
4755
4756   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4757     return false;
4758   if (!isTypeLegal(I->getType(), RetVT))
4759     return false;
4760
4761   unsigned Opc;
4762   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4763     Opc = AArch64::FMOVWSr;
4764   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4765     Opc = AArch64::FMOVXDr;
4766   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4767     Opc = AArch64::FMOVSWr;
4768   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4769     Opc = AArch64::FMOVDXr;
4770   else
4771     return false;
4772
4773   const TargetRegisterClass *RC = nullptr;
4774   switch (RetVT.SimpleTy) {
4775   default: llvm_unreachable("Unexpected value type.");
4776   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4777   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4778   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4779   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4780   }
4781   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4782   if (!Op0Reg)
4783     return false;
4784   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4785   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4786
4787   if (!ResultReg)
4788     return false;
4789
4790   updateValueMap(I, ResultReg);
4791   return true;
4792 }
4793
4794 bool AArch64FastISel::selectFRem(const Instruction *I) {
4795   MVT RetVT;
4796   if (!isTypeLegal(I->getType(), RetVT))
4797     return false;
4798
4799   RTLIB::Libcall LC;
4800   switch (RetVT.SimpleTy) {
4801   default:
4802     return false;
4803   case MVT::f32:
4804     LC = RTLIB::REM_F32;
4805     break;
4806   case MVT::f64:
4807     LC = RTLIB::REM_F64;
4808     break;
4809   }
4810
4811   ArgListTy Args;
4812   Args.reserve(I->getNumOperands());
4813
4814   // Populate the argument list.
4815   for (auto &Arg : I->operands()) {
4816     ArgListEntry Entry;
4817     Entry.Val = Arg;
4818     Entry.Ty = Arg->getType();
4819     Args.push_back(Entry);
4820   }
4821
4822   CallLoweringInfo CLI;
4823   MCContext &Ctx = MF->getContext();
4824   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4825                 TLI.getLibcallName(LC), std::move(Args));
4826   if (!lowerCallTo(CLI))
4827     return false;
4828   updateValueMap(I, CLI.ResultReg);
4829   return true;
4830 }
4831
4832 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4833   MVT VT;
4834   if (!isTypeLegal(I->getType(), VT))
4835     return false;
4836
4837   if (!isa<ConstantInt>(I->getOperand(1)))
4838     return selectBinaryOp(I, ISD::SDIV);
4839
4840   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4841   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4842       !(C.isPowerOf2() || (-C).isPowerOf2()))
4843     return selectBinaryOp(I, ISD::SDIV);
4844
4845   unsigned Lg2 = C.countTrailingZeros();
4846   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4847   if (!Src0Reg)
4848     return false;
4849   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4850
4851   if (cast<BinaryOperator>(I)->isExact()) {
4852     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4853     if (!ResultReg)
4854       return false;
4855     updateValueMap(I, ResultReg);
4856     return true;
4857   }
4858
4859   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4860   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4861   if (!AddReg)
4862     return false;
4863
4864   // (Src0 < 0) ? Pow2 - 1 : 0;
4865   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4866     return false;
4867
4868   unsigned SelectOpc;
4869   const TargetRegisterClass *RC;
4870   if (VT == MVT::i64) {
4871     SelectOpc = AArch64::CSELXr;
4872     RC = &AArch64::GPR64RegClass;
4873   } else {
4874     SelectOpc = AArch64::CSELWr;
4875     RC = &AArch64::GPR32RegClass;
4876   }
4877   unsigned SelectReg =
4878       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4879                        Src0IsKill, AArch64CC::LT);
4880   if (!SelectReg)
4881     return false;
4882
4883   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4884   // negate the result.
4885   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4886   unsigned ResultReg;
4887   if (C.isNegative())
4888     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4889                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4890   else
4891     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4892
4893   if (!ResultReg)
4894     return false;
4895
4896   updateValueMap(I, ResultReg);
4897   return true;
4898 }
4899
4900 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4901 /// have to duplicate it for AArch64, because otherwise we would fail during the
4902 /// sign-extend emission.
4903 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4904   unsigned IdxN = getRegForValue(Idx);
4905   if (IdxN == 0)
4906     // Unhandled operand. Halt "fast" selection and bail.
4907     return std::pair<unsigned, bool>(0, false);
4908
4909   bool IdxNIsKill = hasTrivialKill(Idx);
4910
4911   // If the index is smaller or larger than intptr_t, truncate or extend it.
4912   MVT PtrVT = TLI.getPointerTy(DL);
4913   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4914   if (IdxVT.bitsLT(PtrVT)) {
4915     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4916     IdxNIsKill = true;
4917   } else if (IdxVT.bitsGT(PtrVT))
4918     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4919   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4920 }
4921
4922 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4923 /// duplicate it for AArch64, because otherwise we would bail out even for
4924 /// simple cases. This is because the standard fastEmit functions don't cover
4925 /// MUL at all and ADD is lowered very inefficientily.
4926 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4927   unsigned N = getRegForValue(I->getOperand(0));
4928   if (!N)
4929     return false;
4930   bool NIsKill = hasTrivialKill(I->getOperand(0));
4931
4932   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4933   // into a single N = N + TotalOffset.
4934   uint64_t TotalOffs = 0;
4935   MVT VT = TLI.getPointerTy(DL);
4936   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4937        GTI != E; ++GTI) {
4938     const Value *Idx = GTI.getOperand();
4939     if (auto *StTy = GTI.getStructTypeOrNull()) {
4940       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4941       // N = N + Offset
4942       if (Field)
4943         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4944     } else {
4945       Type *Ty = GTI.getIndexedType();
4946
4947       // If this is a constant subscript, handle it quickly.
4948       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4949         if (CI->isZero())
4950           continue;
4951         // N = N + Offset
4952         TotalOffs +=
4953             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4954         continue;
4955       }
4956       if (TotalOffs) {
4957         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4958         if (!N)
4959           return false;
4960         NIsKill = true;
4961         TotalOffs = 0;
4962       }
4963
4964       // N = N + Idx * ElementSize;
4965       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4966       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4967       unsigned IdxN = Pair.first;
4968       bool IdxNIsKill = Pair.second;
4969       if (!IdxN)
4970         return false;
4971
4972       if (ElementSize != 1) {
4973         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4974         if (!C)
4975           return false;
4976         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4977         if (!IdxN)
4978           return false;
4979         IdxNIsKill = true;
4980       }
4981       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4982       if (!N)
4983         return false;
4984     }
4985   }
4986   if (TotalOffs) {
4987     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4988     if (!N)
4989       return false;
4990   }
4991   updateValueMap(I, N);
4992   return true;
4993 }
4994
4995 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4996   assert(TM.getOptLevel() == CodeGenOpt::None &&
4997          "cmpxchg survived AtomicExpand at optlevel > -O0");
4998
4999   auto *RetPairTy = cast<StructType>(I->getType());
5000   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5001   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5002          "cmpxchg has a non-i1 status result");
5003
5004   MVT VT;
5005   if (!isTypeLegal(RetTy, VT))
5006     return false;
5007
5008   const TargetRegisterClass *ResRC;
5009   unsigned Opc, CmpOpc;
5010   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5011   // extractvalue selection doesn't support that.
5012   if (VT == MVT::i32) {
5013     Opc = AArch64::CMP_SWAP_32;
5014     CmpOpc = AArch64::SUBSWrs;
5015     ResRC = &AArch64::GPR32RegClass;
5016   } else if (VT == MVT::i64) {
5017     Opc = AArch64::CMP_SWAP_64;
5018     CmpOpc = AArch64::SUBSXrs;
5019     ResRC = &AArch64::GPR64RegClass;
5020   } else {
5021     return false;
5022   }
5023
5024   const MCInstrDesc &II = TII.get(Opc);
5025
5026   const unsigned AddrReg = constrainOperandRegClass(
5027       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5028   const unsigned DesiredReg = constrainOperandRegClass(
5029       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5030   const unsigned NewReg = constrainOperandRegClass(
5031       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5032
5033   const unsigned ResultReg1 = createResultReg(ResRC);
5034   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5035   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5036
5037   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5038   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5039       .addDef(ResultReg1)
5040       .addDef(ScratchReg)
5041       .addUse(AddrReg)
5042       .addUse(DesiredReg)
5043       .addUse(NewReg);
5044
5045   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5046       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5047       .addUse(ResultReg1)
5048       .addUse(DesiredReg)
5049       .addImm(0);
5050
5051   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5052       .addDef(ResultReg2)
5053       .addUse(AArch64::WZR)
5054       .addUse(AArch64::WZR)
5055       .addImm(AArch64CC::NE);
5056
5057   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5058   updateValueMap(I, ResultReg1, 2);
5059   return true;
5060 }
5061
5062 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5063   switch (I->getOpcode()) {
5064   default:
5065     break;
5066   case Instruction::Add:
5067   case Instruction::Sub:
5068     return selectAddSub(I);
5069   case Instruction::Mul:
5070     return selectMul(I);
5071   case Instruction::SDiv:
5072     return selectSDiv(I);
5073   case Instruction::SRem:
5074     if (!selectBinaryOp(I, ISD::SREM))
5075       return selectRem(I, ISD::SREM);
5076     return true;
5077   case Instruction::URem:
5078     if (!selectBinaryOp(I, ISD::UREM))
5079       return selectRem(I, ISD::UREM);
5080     return true;
5081   case Instruction::Shl:
5082   case Instruction::LShr:
5083   case Instruction::AShr:
5084     return selectShift(I);
5085   case Instruction::And:
5086   case Instruction::Or:
5087   case Instruction::Xor:
5088     return selectLogicalOp(I);
5089   case Instruction::Br:
5090     return selectBranch(I);
5091   case Instruction::IndirectBr:
5092     return selectIndirectBr(I);
5093   case Instruction::BitCast:
5094     if (!FastISel::selectBitCast(I))
5095       return selectBitCast(I);
5096     return true;
5097   case Instruction::FPToSI:
5098     if (!selectCast(I, ISD::FP_TO_SINT))
5099       return selectFPToInt(I, /*Signed=*/true);
5100     return true;
5101   case Instruction::FPToUI:
5102     return selectFPToInt(I, /*Signed=*/false);
5103   case Instruction::ZExt:
5104   case Instruction::SExt:
5105     return selectIntExt(I);
5106   case Instruction::Trunc:
5107     if (!selectCast(I, ISD::TRUNCATE))
5108       return selectTrunc(I);
5109     return true;
5110   case Instruction::FPExt:
5111     return selectFPExt(I);
5112   case Instruction::FPTrunc:
5113     return selectFPTrunc(I);
5114   case Instruction::SIToFP:
5115     if (!selectCast(I, ISD::SINT_TO_FP))
5116       return selectIntToFP(I, /*Signed=*/true);
5117     return true;
5118   case Instruction::UIToFP:
5119     return selectIntToFP(I, /*Signed=*/false);
5120   case Instruction::Load:
5121     return selectLoad(I);
5122   case Instruction::Store:
5123     return selectStore(I);
5124   case Instruction::FCmp:
5125   case Instruction::ICmp:
5126     return selectCmp(I);
5127   case Instruction::Select:
5128     return selectSelect(I);
5129   case Instruction::Ret:
5130     return selectRet(I);
5131   case Instruction::FRem:
5132     return selectFRem(I);
5133   case Instruction::GetElementPtr:
5134     return selectGetElementPtr(I);
5135   case Instruction::AtomicCmpXchg:
5136     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5137   }
5138
5139   // Silence warnings.
5140   (void)&CC_AArch64_DarwinPCS_VarArg;
5141   (void)&CC_AArch64_Win64_VarArg;
5142
5143   // fall-back to target-independent instruction selection.
5144   return selectOperator(I, I->getOpcode());
5145 }
5146
5147 namespace llvm {
5148
5149 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5150                                         const TargetLibraryInfo *LibInfo) {
5151   return new AArch64FastISel(FuncInfo, LibInfo);
5152 }
5153
5154 } // end namespace llvm