contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp

   1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64CallingConvention.h"
  18 #include "AArch64RegisterInfo.h"
  19 #include "AArch64Subtarget.h"
  20 #include "MCTargetDesc/AArch64AddressingModes.h"
  21 #include "Utils/AArch64BaseInfo.h"
  22 #include "llvm/ADT/APFloat.h"
  23 #include "llvm/ADT/APInt.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/Analysis/BranchProbabilityInfo.h"
  27 #include "llvm/CodeGen/CallingConvLower.h"
  28 #include "llvm/CodeGen/FastISel.h"
  29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  30 #include "llvm/CodeGen/ISDOpcodes.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineConstantPool.h"
  33 #include "llvm/CodeGen/MachineFrameInfo.h"
  34 #include "llvm/CodeGen/MachineInstr.h"
  35 #include "llvm/CodeGen/MachineInstrBuilder.h"
  36 #include "llvm/CodeGen/MachineMemOperand.h"
  37 #include "llvm/CodeGen/MachineRegisterInfo.h"
  38 #include "llvm/CodeGen/RuntimeLibcalls.h"
  39 #include "llvm/CodeGen/ValueTypes.h"
  40 #include "llvm/IR/Argument.h"
  41 #include "llvm/IR/Attributes.h"
  42 #include "llvm/IR/BasicBlock.h"
  43 #include "llvm/IR/CallingConv.h"
  44 #include "llvm/IR/Constant.h"
  45 #include "llvm/IR/Constants.h"
  46 #include "llvm/IR/DataLayout.h"
  47 #include "llvm/IR/DerivedTypes.h"
  48 #include "llvm/IR/Function.h"
  49 #include "llvm/IR/GetElementPtrTypeIterator.h"
  50 #include "llvm/IR/GlobalValue.h"
  51 #include "llvm/IR/InstrTypes.h"
  52 #include "llvm/IR/Instruction.h"
  53 #include "llvm/IR/Instructions.h"
  54 #include "llvm/IR/IntrinsicInst.h"
  55 #include "llvm/IR/Intrinsics.h"
  56 #include "llvm/IR/Operator.h"
  57 #include "llvm/IR/Type.h"
  58 #include "llvm/IR/User.h"
  59 #include "llvm/IR/Value.h"
  60 #include "llvm/MC/MCInstrDesc.h"
  61 #include "llvm/MC/MCRegisterInfo.h"
  62 #include "llvm/MC/MCSymbol.h"
  63 #include "llvm/Support/AtomicOrdering.h"
  64 #include "llvm/Support/Casting.h"
  65 #include "llvm/Support/CodeGen.h"
  66 #include "llvm/Support/Compiler.h"
  67 #include "llvm/Support/ErrorHandling.h"
  68 #include "llvm/Support/MachineValueType.h"
  69 #include "llvm/Support/MathExtras.h"
  70 #include <algorithm>
  71 #include <cassert>
  72 #include <cstdint>
  73 #include <iterator>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 namespace {
  79
  80 class AArch64FastISel final : public FastISel {
  81   class Address {
  82   public:
  83     using BaseKind = enum {
  84       RegBase,
  85       FrameIndexBase
  86     };
  87
  88   private:
  89     BaseKind Kind = RegBase;
  90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
  91     union {
  92       unsigned Reg;
  93       int FI;
  94     } Base;
  95     unsigned OffsetReg = 0;
  96     unsigned Shift = 0;
  97     int64_t Offset = 0;
  98     const GlobalValue *GV = nullptr;
  99
 100   public:
 101     Address() { Base.Reg = 0; }
 102
 103     void setKind(BaseKind K) { Kind = K; }
 104     BaseKind getKind() const { return Kind; }
 105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
 106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
 107     bool isRegBase() const { return Kind == RegBase; }
 108     bool isFIBase() const { return Kind == FrameIndexBase; }
 109
 110     void setReg(unsigned Reg) {
 111       assert(isRegBase() && "Invalid base register access!");
 112       Base.Reg = Reg;
 113     }
 114
 115     unsigned getReg() const {
 116       assert(isRegBase() && "Invalid base register access!");
 117       return Base.Reg;
 118     }
 119
 120     void setOffsetReg(unsigned Reg) {
 121       OffsetReg = Reg;
 122     }
 123
 124     unsigned getOffsetReg() const {
 125       return OffsetReg;
 126     }
 127
 128     void setFI(unsigned FI) {
 129       assert(isFIBase() && "Invalid base frame index  access!");
 130       Base.FI = FI;
 131     }
 132
 133     unsigned getFI() const {
 134       assert(isFIBase() && "Invalid base frame index access!");
 135       return Base.FI;
 136     }
 137
 138     void setOffset(int64_t O) { Offset = O; }
 139     int64_t getOffset() { return Offset; }
 140     void setShift(unsigned S) { Shift = S; }
 141     unsigned getShift() { return Shift; }
 142
 143     void setGlobalValue(const GlobalValue *G) { GV = G; }
 144     const GlobalValue *getGlobalValue() { return GV; }
 145   };
 146
 147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 148   /// make the right decision when generating code for different targets.
 149   const AArch64Subtarget *Subtarget;
 150   LLVMContext *Context;
 151
 152   bool fastLowerArguments() override;
 153   bool fastLowerCall(CallLoweringInfo &CLI) override;
 154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 155
 156 private:
 157   // Selection routines.
 158   bool selectAddSub(const Instruction *I);
 159   bool selectLogicalOp(const Instruction *I);
 160   bool selectLoad(const Instruction *I);
 161   bool selectStore(const Instruction *I);
 162   bool selectBranch(const Instruction *I);
 163   bool selectIndirectBr(const Instruction *I);
 164   bool selectCmp(const Instruction *I);
 165   bool selectSelect(const Instruction *I);
 166   bool selectFPExt(const Instruction *I);
 167   bool selectFPTrunc(const Instruction *I);
 168   bool selectFPToInt(const Instruction *I, bool Signed);
 169   bool selectIntToFP(const Instruction *I, bool Signed);
 170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 171   bool selectRet(const Instruction *I);
 172   bool selectTrunc(const Instruction *I);
 173   bool selectIntExt(const Instruction *I);
 174   bool selectMul(const Instruction *I);
 175   bool selectShift(const Instruction *I);
 176   bool selectBitCast(const Instruction *I);
 177   bool selectFRem(const Instruction *I);
 178   bool selectSDiv(const Instruction *I);
 179   bool selectGetElementPtr(const Instruction *I);
 180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 181
 182   // Utility helper routines.
 183   bool isTypeLegal(Type *Ty, MVT &VT);
 184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 185   bool isValueAvailable(const Value *V) const;
 186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 187   bool computeCallAddress(const Value *V, Address &Addr);
 188   bool simplifyAddress(Address &Addr, MVT VT);
 189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 190                             MachineMemOperand::Flags Flags,
 191                             unsigned ScaleFactor, MachineMemOperand *MMO);
 192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 194                           unsigned Alignment);
 195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 196                          const Value *Cond);
 197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
 198   bool optimizeSelect(const SelectInst *SI);
 199   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 200
 201   // Emit helper routines.
 202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 203                       const Value *RHS, bool SetFlags = false,
 204                       bool WantResult = true,  bool IsZExt = false);
 205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 206                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 207                          bool SetFlags = false, bool WantResult = true);
 208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 209                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 210                          bool WantResult = true);
 211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 212                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 213                          AArch64_AM::ShiftExtendType ShiftType,
 214                          uint64_t ShiftImm, bool SetFlags = false,
 215                          bool WantResult = true);
 216   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 217                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 218                           AArch64_AM::ShiftExtendType ExtType,
 219                           uint64_t ShiftImm, bool SetFlags = false,
 220                          bool WantResult = true);
 221
 222   // Emit functions.
 223   bool emitCompareAndBranch(const BranchInst *BI);
 224   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 225   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 226   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 227   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 228   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
 229                     MachineMemOperand *MMO = nullptr);
 230   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 231                  MachineMemOperand *MMO = nullptr);
 232   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
 233                         MachineMemOperand *MMO = nullptr);
 234   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 235   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 236   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 237                    bool SetFlags = false, bool WantResult = true,
 238                    bool IsZExt = false);
 239   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
 240   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 241                    bool SetFlags = false, bool WantResult = true,
 242                    bool IsZExt = false);
 243   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 244                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 245   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 246                        unsigned RHSReg, bool RHSIsKill,
 247                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 248                        bool WantResult = true);
 249   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 250                          const Value *RHS);
 251   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 252                             bool LHSIsKill, uint64_t Imm);
 253   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 254                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 255                             uint64_t ShiftImm);
 256   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 257   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 258                       unsigned Op1, bool Op1IsKill);
 259   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 260                         unsigned Op1, bool Op1IsKill);
 261   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 262                         unsigned Op1, bool Op1IsKill);
 263   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 264                       unsigned Op1Reg, bool Op1IsKill);
 265   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 266                       uint64_t Imm, bool IsZExt = true);
 267   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 268                       unsigned Op1Reg, bool Op1IsKill);
 269   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 270                       uint64_t Imm, bool IsZExt = true);
 271   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 272                       unsigned Op1Reg, bool Op1IsKill);
 273   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 274                       uint64_t Imm, bool IsZExt = false);
 275
 276   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 277   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 278   unsigned materializeGV(const GlobalValue *GV);
 279
 280   // Call handling routines.
 281 private:
 282   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 283   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 284                        unsigned &NumBytes);
 285   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 286
 287 public:
 288   // Backend specific FastISel code.
 289   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 290   unsigned fastMaterializeConstant(const Constant *C) override;
 291   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 292
 293   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 294                            const TargetLibraryInfo *LibInfo)
 295       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 296     Subtarget =
 297         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
 298     Context = &FuncInfo.Fn->getContext();
 299   }
 300
 301   bool fastSelectInstruction(const Instruction *I) override;
 302
 303 #include "AArch64GenFastISel.inc"
 304 };
 305
 306 } // end anonymous namespace
 307
 308 #include "AArch64GenCallingConv.inc"
 309
 310 /// Check if the sign-/zero-extend will be a noop.
 311 static bool isIntExtFree(const Instruction *I) {
 312   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 313          "Unexpected integer extend instruction.");
 314   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 315          "Unexpected value type.");
 316   bool IsZExt = isa<ZExtInst>(I);
 317
 318   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 319     if (LI->hasOneUse())
 320       return true;
 321
 322   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 323     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 324       return true;
 325
 326   return false;
 327 }
 328
 329 /// Determine the implicit scale factor that is applied by a memory
 330 /// operation for a given value type.
 331 static unsigned getImplicitScaleFactor(MVT VT) {
 332   switch (VT.SimpleTy) {
 333   default:
 334     return 0;    // invalid
 335   case MVT::i1:  // fall-through
 336   case MVT::i8:
 337     return 1;
 338   case MVT::i16:
 339     return 2;
 340   case MVT::i32: // fall-through
 341   case MVT::f32:
 342     return 4;
 343   case MVT::i64: // fall-through
 344   case MVT::f64:
 345     return 8;
 346   }
 347 }
 348
 349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 350   if (CC == CallingConv::WebKit_JS)
 351     return CC_AArch64_WebKit_JS;
 352   if (CC == CallingConv::GHC)
 353     return CC_AArch64_GHC;
 354   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 355 }
 356
 357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 358   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
 359          "Alloca should always return a pointer.");
 360
 361   // Don't handle dynamic allocas.
 362   if (!FuncInfo.StaticAllocaMap.count(AI))
 363     return 0;
 364
 365   DenseMap<const AllocaInst *, int>::iterator SI =
 366       FuncInfo.StaticAllocaMap.find(AI);
 367
 368   if (SI != FuncInfo.StaticAllocaMap.end()) {
 369     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 371             ResultReg)
 372         .addFrameIndex(SI->second)
 373         .addImm(0)
 374         .addImm(0);
 375     return ResultReg;
 376   }
 377
 378   return 0;
 379 }
 380
 381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 382   if (VT > MVT::i64)
 383     return 0;
 384
 385   if (!CI->isZero())
 386     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 387
 388   // Create a copy from the zero register to materialize a "0" value.
 389   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 390                                                    : &AArch64::GPR32RegClass;
 391   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 392   unsigned ResultReg = createResultReg(RC);
 393   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 394           ResultReg).addReg(ZeroReg, getKillRegState(true));
 395   return ResultReg;
 396 }
 397
 398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 399   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 400   // register, because the immediate version of fmov cannot encode zero.
 401   if (CFP->isNullValue())
 402     return fastMaterializeFloatZero(CFP);
 403
 404   if (VT != MVT::f32 && VT != MVT::f64)
 405     return 0;
 406
 407   const APFloat Val = CFP->getValueAPF();
 408   bool Is64Bit = (VT == MVT::f64);
 409   // This checks to see if we can use FMOV instructions to materialize
 410   // a constant, otherwise we have to materialize via the constant pool.
 411   if (TLI.isFPImmLegal(Val, VT)) {
 412     int Imm =
 413         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 414     assert((Imm != -1) && "Cannot encode floating-point constant.");
 415     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 416     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 417   }
 418
 419   // For the MachO large code model materialize the FP constant in code.
 420   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
 421     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 422     const TargetRegisterClass *RC = Is64Bit ?
 423         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 424
 425     unsigned TmpReg = createResultReg(RC);
 426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
 427         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
 428
 429     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 430     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
 431             TII.get(TargetOpcode::COPY), ResultReg)
 432         .addReg(TmpReg, getKillRegState(true));
 433
 434     return ResultReg;
 435   }
 436
 437   // Materialize via constant pool.  MachineConstantPool wants an explicit
 438   // alignment.
 439   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 440   if (Align == 0)
 441     Align = DL.getTypeAllocSize(CFP->getType());
 442
 443   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 444   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 446           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 447
 448   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 449   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 450   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 451       .addReg(ADRPReg)
 452       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 453   return ResultReg;
 454 }
 455
 456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 457   // We can't handle thread-local variables quickly yet.
 458   if (GV->isThreadLocal())
 459     return 0;
 460
 461   // MachO still uses GOT for large code-model accesses, but ELF requires
 462   // movz/movk sequences, which FastISel doesn't handle yet.
 463   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
 464     return 0;
 465
 466   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 467
 468   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
 469   if (!DestEVT.isSimple())
 470     return 0;
 471
 472   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 473   unsigned ResultReg;
 474
 475   if (OpFlags & AArch64II::MO_GOT) {
 476     // ADRP + LDRX
 477     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 478             ADRPReg)
 479         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
 480
 481     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 482     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 483             ResultReg)
 484         .addReg(ADRPReg)
 485         .addGlobalAddress(GV, 0,
 486                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
 487   } else {
 488     // ADRP + ADDX
 489     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 490             ADRPReg)
 491         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
 492
 493     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 494     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 495             ResultReg)
 496         .addReg(ADRPReg)
 497         .addGlobalAddress(GV, 0,
 498                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
 499         .addImm(0);
 500   }
 501   return ResultReg;
 502 }
 503
 504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 505   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 506
 507   // Only handle simple types.
 508   if (!CEVT.isSimple())
 509     return 0;
 510   MVT VT = CEVT.getSimpleVT();
 511
 512   if (const auto *CI = dyn_cast<ConstantInt>(C))
 513     return materializeInt(CI, VT);
 514   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 515     return materializeFP(CFP, VT);
 516   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 517     return materializeGV(GV);
 518
 519   return 0;
 520 }
 521
 522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 523   assert(CFP->isNullValue() &&
 524          "Floating-point constant is not a positive zero.");
 525   MVT VT;
 526   if (!isTypeLegal(CFP->getType(), VT))
 527     return 0;
 528
 529   if (VT != MVT::f32 && VT != MVT::f64)
 530     return 0;
 531
 532   bool Is64Bit = (VT == MVT::f64);
 533   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 534   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 535   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 536 }
 537
 538 /// Check if the multiply is by a power-of-2 constant.
 539 static bool isMulPowOf2(const Value *I) {
 540   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 541     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 542       if (C->getValue().isPowerOf2())
 543         return true;
 544     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 545       if (C->getValue().isPowerOf2())
 546         return true;
 547   }
 548   return false;
 549 }
 550
 551 // Computes the address to get to an object.
 552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 553 {
 554   const User *U = nullptr;
 555   unsigned Opcode = Instruction::UserOp1;
 556   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 557     // Don't walk into other basic blocks unless the object is an alloca from
 558     // another block, otherwise it may not have a virtual register assigned.
 559     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 560         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 561       Opcode = I->getOpcode();
 562       U = I;
 563     }
 564   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 565     Opcode = C->getOpcode();
 566     U = C;
 567   }
 568
 569   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
 570     if (Ty->getAddressSpace() > 255)
 571       // Fast instruction selection doesn't support the special
 572       // address spaces.
 573       return false;
 574
 575   switch (Opcode) {
 576   default:
 577     break;
 578   case Instruction::BitCast:
 579     // Look through bitcasts.
 580     return computeAddress(U->getOperand(0), Addr, Ty);
 581
 582   case Instruction::IntToPtr:
 583     // Look past no-op inttoptrs.
 584     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 585         TLI.getPointerTy(DL))
 586       return computeAddress(U->getOperand(0), Addr, Ty);
 587     break;
 588
 589   case Instruction::PtrToInt:
 590     // Look past no-op ptrtoints.
 591     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 592       return computeAddress(U->getOperand(0), Addr, Ty);
 593     break;
 594
 595   case Instruction::GetElementPtr: {
 596     Address SavedAddr = Addr;
 597     uint64_t TmpOffset = Addr.getOffset();
 598
 599     // Iterate through the GEP folding the constants into offsets where
 600     // we can.
 601     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
 602          GTI != E; ++GTI) {
 603       const Value *Op = GTI.getOperand();
 604       if (StructType *STy = GTI.getStructTypeOrNull()) {
 605         const StructLayout *SL = DL.getStructLayout(STy);
 606         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 607         TmpOffset += SL->getElementOffset(Idx);
 608       } else {
 609         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 610         while (true) {
 611           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 612             // Constant-offset addressing.
 613             TmpOffset += CI->getSExtValue() * S;
 614             break;
 615           }
 616           if (canFoldAddIntoGEP(U, Op)) {
 617             // A compatible add with a constant operand. Fold the constant.
 618             ConstantInt *CI =
 619                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 620             TmpOffset += CI->getSExtValue() * S;
 621             // Iterate on the other operand.
 622             Op = cast<AddOperator>(Op)->getOperand(0);
 623             continue;
 624           }
 625           // Unsupported
 626           goto unsupported_gep;
 627         }
 628       }
 629     }
 630
 631     // Try to grab the base operand now.
 632     Addr.setOffset(TmpOffset);
 633     if (computeAddress(U->getOperand(0), Addr, Ty))
 634       return true;
 635
 636     // We failed, restore everything and try the other options.
 637     Addr = SavedAddr;
 638
 639   unsupported_gep:
 640     break;
 641   }
 642   case Instruction::Alloca: {
 643     const AllocaInst *AI = cast<AllocaInst>(Obj);
 644     DenseMap<const AllocaInst *, int>::iterator SI =
 645         FuncInfo.StaticAllocaMap.find(AI);
 646     if (SI != FuncInfo.StaticAllocaMap.end()) {
 647       Addr.setKind(Address::FrameIndexBase);
 648       Addr.setFI(SI->second);
 649       return true;
 650     }
 651     break;
 652   }
 653   case Instruction::Add: {
 654     // Adds of constants are common and easy enough.
 655     const Value *LHS = U->getOperand(0);
 656     const Value *RHS = U->getOperand(1);
 657
 658     if (isa<ConstantInt>(LHS))
 659       std::swap(LHS, RHS);
 660
 661     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 662       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 663       return computeAddress(LHS, Addr, Ty);
 664     }
 665
 666     Address Backup = Addr;
 667     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 668       return true;
 669     Addr = Backup;
 670
 671     break;
 672   }
 673   case Instruction::Sub: {
 674     // Subs of constants are common and easy enough.
 675     const Value *LHS = U->getOperand(0);
 676     const Value *RHS = U->getOperand(1);
 677
 678     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 679       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 680       return computeAddress(LHS, Addr, Ty);
 681     }
 682     break;
 683   }
 684   case Instruction::Shl: {
 685     if (Addr.getOffsetReg())
 686       break;
 687
 688     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 689     if (!CI)
 690       break;
 691
 692     unsigned Val = CI->getZExtValue();
 693     if (Val < 1 || Val > 3)
 694       break;
 695
 696     uint64_t NumBytes = 0;
 697     if (Ty && Ty->isSized()) {
 698       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 699       NumBytes = NumBits / 8;
 700       if (!isPowerOf2_64(NumBits))
 701         NumBytes = 0;
 702     }
 703
 704     if (NumBytes != (1ULL << Val))
 705       break;
 706
 707     Addr.setShift(Val);
 708     Addr.setExtendType(AArch64_AM::LSL);
 709
 710     const Value *Src = U->getOperand(0);
 711     if (const auto *I = dyn_cast<Instruction>(Src)) {
 712       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 713         // Fold the zext or sext when it won't become a noop.
 714         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 715           if (!isIntExtFree(ZE) &&
 716               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 717             Addr.setExtendType(AArch64_AM::UXTW);
 718             Src = ZE->getOperand(0);
 719           }
 720         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 721           if (!isIntExtFree(SE) &&
 722               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 723             Addr.setExtendType(AArch64_AM::SXTW);
 724             Src = SE->getOperand(0);
 725           }
 726         }
 727       }
 728     }
 729
 730     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 731       if (AI->getOpcode() == Instruction::And) {
 732         const Value *LHS = AI->getOperand(0);
 733         const Value *RHS = AI->getOperand(1);
 734
 735         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 736           if (C->getValue() == 0xffffffff)
 737             std::swap(LHS, RHS);
 738
 739         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 740           if (C->getValue() == 0xffffffff) {
 741             Addr.setExtendType(AArch64_AM::UXTW);
 742             unsigned Reg = getRegForValue(LHS);
 743             if (!Reg)
 744               return false;
 745             bool RegIsKill = hasTrivialKill(LHS);
 746             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 747                                              AArch64::sub_32);
 748             Addr.setOffsetReg(Reg);
 749             return true;
 750           }
 751       }
 752
 753     unsigned Reg = getRegForValue(Src);
 754     if (!Reg)
 755       return false;
 756     Addr.setOffsetReg(Reg);
 757     return true;
 758   }
 759   case Instruction::Mul: {
 760     if (Addr.getOffsetReg())
 761       break;
 762
 763     if (!isMulPowOf2(U))
 764       break;
 765
 766     const Value *LHS = U->getOperand(0);
 767     const Value *RHS = U->getOperand(1);
 768
 769     // Canonicalize power-of-2 value to the RHS.
 770     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 771       if (C->getValue().isPowerOf2())
 772         std::swap(LHS, RHS);
 773
 774     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 775     const auto *C = cast<ConstantInt>(RHS);
 776     unsigned Val = C->getValue().logBase2();
 777     if (Val < 1 || Val > 3)
 778       break;
 779
 780     uint64_t NumBytes = 0;
 781     if (Ty && Ty->isSized()) {
 782       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 783       NumBytes = NumBits / 8;
 784       if (!isPowerOf2_64(NumBits))
 785         NumBytes = 0;
 786     }
 787
 788     if (NumBytes != (1ULL << Val))
 789       break;
 790
 791     Addr.setShift(Val);
 792     Addr.setExtendType(AArch64_AM::LSL);
 793
 794     const Value *Src = LHS;
 795     if (const auto *I = dyn_cast<Instruction>(Src)) {
 796       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 797         // Fold the zext or sext when it won't become a noop.
 798         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
 799           if (!isIntExtFree(ZE) &&
 800               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 801             Addr.setExtendType(AArch64_AM::UXTW);
 802             Src = ZE->getOperand(0);
 803           }
 804         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
 805           if (!isIntExtFree(SE) &&
 806               SE->getOperand(0)->getType()->isIntegerTy(32)) {
 807             Addr.setExtendType(AArch64_AM::SXTW);
 808             Src = SE->getOperand(0);
 809           }
 810         }
 811       }
 812     }
 813
 814     unsigned Reg = getRegForValue(Src);
 815     if (!Reg)
 816       return false;
 817     Addr.setOffsetReg(Reg);
 818     return true;
 819   }
 820   case Instruction::And: {
 821     if (Addr.getOffsetReg())
 822       break;
 823
 824     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
 825       break;
 826
 827     const Value *LHS = U->getOperand(0);
 828     const Value *RHS = U->getOperand(1);
 829
 830     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 831       if (C->getValue() == 0xffffffff)
 832         std::swap(LHS, RHS);
 833
 834     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 835       if (C->getValue() == 0xffffffff) {
 836         Addr.setShift(0);
 837         Addr.setExtendType(AArch64_AM::LSL);
 838         Addr.setExtendType(AArch64_AM::UXTW);
 839
 840         unsigned Reg = getRegForValue(LHS);
 841         if (!Reg)
 842           return false;
 843         bool RegIsKill = hasTrivialKill(LHS);
 844         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 845                                          AArch64::sub_32);
 846         Addr.setOffsetReg(Reg);
 847         return true;
 848       }
 849     break;
 850   }
 851   case Instruction::SExt:
 852   case Instruction::ZExt: {
 853     if (!Addr.getReg() || Addr.getOffsetReg())
 854       break;
 855
 856     const Value *Src = nullptr;
 857     // Fold the zext or sext when it won't become a noop.
 858     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
 859       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 860         Addr.setExtendType(AArch64_AM::UXTW);
 861         Src = ZE->getOperand(0);
 862       }
 863     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
 864       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 865         Addr.setExtendType(AArch64_AM::SXTW);
 866         Src = SE->getOperand(0);
 867       }
 868     }
 869
 870     if (!Src)
 871       break;
 872
 873     Addr.setShift(0);
 874     unsigned Reg = getRegForValue(Src);
 875     if (!Reg)
 876       return false;
 877     Addr.setOffsetReg(Reg);
 878     return true;
 879   }
 880   } // end switch
 881
 882   if (Addr.isRegBase() && !Addr.getReg()) {
 883     unsigned Reg = getRegForValue(Obj);
 884     if (!Reg)
 885       return false;
 886     Addr.setReg(Reg);
 887     return true;
 888   }
 889
 890   if (!Addr.getOffsetReg()) {
 891     unsigned Reg = getRegForValue(Obj);
 892     if (!Reg)
 893       return false;
 894     Addr.setOffsetReg(Reg);
 895     return true;
 896   }
 897
 898   return false;
 899 }
 900
 901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 902   const User *U = nullptr;
 903   unsigned Opcode = Instruction::UserOp1;
 904   bool InMBB = true;
 905
 906   if (const auto *I = dyn_cast<Instruction>(V)) {
 907     Opcode = I->getOpcode();
 908     U = I;
 909     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 910   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 911     Opcode = C->getOpcode();
 912     U = C;
 913   }
 914
 915   switch (Opcode) {
 916   default: break;
 917   case Instruction::BitCast:
 918     // Look past bitcasts if its operand is in the same BB.
 919     if (InMBB)
 920       return computeCallAddress(U->getOperand(0), Addr);
 921     break;
 922   case Instruction::IntToPtr:
 923     // Look past no-op inttoptrs if its operand is in the same BB.
 924     if (InMBB &&
 925         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
 926             TLI.getPointerTy(DL))
 927       return computeCallAddress(U->getOperand(0), Addr);
 928     break;
 929   case Instruction::PtrToInt:
 930     // Look past no-op ptrtoints if its operand is in the same BB.
 931     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
 932       return computeCallAddress(U->getOperand(0), Addr);
 933     break;
 934   }
 935
 936   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 937     Addr.setGlobalValue(GV);
 938     return true;
 939   }
 940
 941   // If all else fails, try to materialize the value in a register.
 942   if (!Addr.getGlobalValue()) {
 943     Addr.setReg(getRegForValue(V));
 944     return Addr.getReg() != 0;
 945   }
 946
 947   return false;
 948 }
 949
 950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 951   EVT evt = TLI.getValueType(DL, Ty, true);
 952
 953   // Only handle simple types.
 954   if (evt == MVT::Other || !evt.isSimple())
 955     return false;
 956   VT = evt.getSimpleVT();
 957
 958   // This is a legal type, but it's not something we handle in fast-isel.
 959   if (VT == MVT::f128)
 960     return false;
 961
 962   // Handle all other legal types, i.e. a register that will directly hold this
 963   // value.
 964   return TLI.isTypeLegal(VT);
 965 }
 966
 967 /// Determine if the value type is supported by FastISel.
 968 ///
 969 /// FastISel for AArch64 can handle more value types than are legal. This adds
 970 /// simple value type such as i1, i8, and i16.
 971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 972   if (Ty->isVectorTy() && !IsVectorAllowed)
 973     return false;
 974
 975   if (isTypeLegal(Ty, VT))
 976     return true;
 977
 978   // If this is a type than can be sign or zero-extended to a basic operation
 979   // go ahead and accept it now.
 980   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 981     return true;
 982
 983   return false;
 984 }
 985
 986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 987   if (!isa<Instruction>(V))
 988     return true;
 989
 990   const auto *I = cast<Instruction>(V);
 991   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 992 }
 993
 994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 995   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 996   if (!ScaleFactor)
 997     return false;
 998
 999   bool ImmediateOffsetNeedsLowering = false;
1000   bool RegisterOffsetNeedsLowering = false;
1001   int64_t Offset = Addr.getOffset();
1002   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003     ImmediateOffsetNeedsLowering = true;
1004   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005            !isUInt<12>(Offset / ScaleFactor))
1006     ImmediateOffsetNeedsLowering = true;
1007
1008   // Cannot encode an offset register and an immediate offset in the same
1009   // instruction. Fold the immediate offset into the load/store instruction and
1010   // emit an additional add to take care of the offset register.
1011   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012     RegisterOffsetNeedsLowering = true;
1013
1014   // Cannot encode zero register as base.
1015   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016     RegisterOffsetNeedsLowering = true;
1017
1018   // If this is a stack pointer and the offset needs to be simplified then put
1019   // the alloca address into a register, set the base type back to register and
1020   // continue. This should almost never happen.
1021   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022   {
1023     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025             ResultReg)
1026       .addFrameIndex(Addr.getFI())
1027       .addImm(0)
1028       .addImm(0);
1029     Addr.setKind(Address::RegBase);
1030     Addr.setReg(ResultReg);
1031   }
1032
1033   if (RegisterOffsetNeedsLowering) {
1034     unsigned ResultReg = 0;
1035     if (Addr.getReg()) {
1036       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037           Addr.getExtendType() == AArch64_AM::UXTW   )
1038         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1041                                   Addr.getShift());
1042       else
1043         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046                                   Addr.getShift());
1047     } else {
1048       if (Addr.getExtendType() == AArch64_AM::UXTW)
1049         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050                                /*Op0IsKill=*/false, Addr.getShift(),
1051                                /*IsZExt=*/true);
1052       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054                                /*Op0IsKill=*/false, Addr.getShift(),
1055                                /*IsZExt=*/false);
1056       else
1057         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058                                /*Op0IsKill=*/false, Addr.getShift());
1059     }
1060     if (!ResultReg)
1061       return false;
1062
1063     Addr.setReg(ResultReg);
1064     Addr.setOffsetReg(0);
1065     Addr.setShift(0);
1066     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067   }
1068
1069   // Since the offset is too large for the load/store instruction get the
1070   // reg+offset into a register.
1071   if (ImmediateOffsetNeedsLowering) {
1072     unsigned ResultReg;
1073     if (Addr.getReg())
1074       // Try to fold the immediate into the add instruction.
1075       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076     else
1077       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078
1079     if (!ResultReg)
1080       return false;
1081     Addr.setReg(ResultReg);
1082     Addr.setOffset(0);
1083   }
1084   return true;
1085 }
1086
1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088                                            const MachineInstrBuilder &MIB,
1089                                            MachineMemOperand::Flags Flags,
1090                                            unsigned ScaleFactor,
1091                                            MachineMemOperand *MMO) {
1092   int64_t Offset = Addr.getOffset() / ScaleFactor;
1093   // Frame base works a bit differently. Handle it separately.
1094   if (Addr.isFIBase()) {
1095     int FI = Addr.getFI();
1096     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1097     // and alignment should be based on the VT.
1098     MMO = FuncInfo.MF->getMachineMemOperand(
1099         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101     // Now add the rest of the operands.
1102     MIB.addFrameIndex(FI).addImm(Offset);
1103   } else {
1104     assert(Addr.isRegBase() && "Unexpected address kind.");
1105     const MCInstrDesc &II = MIB->getDesc();
1106     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107     Addr.setReg(
1108       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109     Addr.setOffsetReg(
1110       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111     if (Addr.getOffsetReg()) {
1112       assert(Addr.getOffset() == 0 && "Unexpected offset");
1113       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114                       Addr.getExtendType() == AArch64_AM::SXTX;
1115       MIB.addReg(Addr.getReg());
1116       MIB.addReg(Addr.getOffsetReg());
1117       MIB.addImm(IsSigned);
1118       MIB.addImm(Addr.getShift() != 0);
1119     } else
1120       MIB.addReg(Addr.getReg()).addImm(Offset);
1121   }
1122
1123   if (MMO)
1124     MIB.addMemOperand(MMO);
1125 }
1126
1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128                                      const Value *RHS, bool SetFlags,
1129                                      bool WantResult,  bool IsZExt) {
1130   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1131   bool NeedExtend = false;
1132   switch (RetVT.SimpleTy) {
1133   default:
1134     return 0;
1135   case MVT::i1:
1136     NeedExtend = true;
1137     break;
1138   case MVT::i8:
1139     NeedExtend = true;
1140     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141     break;
1142   case MVT::i16:
1143     NeedExtend = true;
1144     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145     break;
1146   case MVT::i32:  // fall-through
1147   case MVT::i64:
1148     break;
1149   }
1150   MVT SrcVT = RetVT;
1151   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152
1153   // Canonicalize immediates to the RHS first.
1154   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155     std::swap(LHS, RHS);
1156
1157   // Canonicalize mul by power of 2 to the RHS.
1158   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159     if (isMulPowOf2(LHS))
1160       std::swap(LHS, RHS);
1161
1162   // Canonicalize shift immediate to the RHS.
1163   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165       if (isa<ConstantInt>(SI->getOperand(1)))
1166         if (SI->getOpcode() == Instruction::Shl  ||
1167             SI->getOpcode() == Instruction::LShr ||
1168             SI->getOpcode() == Instruction::AShr   )
1169           std::swap(LHS, RHS);
1170
1171   unsigned LHSReg = getRegForValue(LHS);
1172   if (!LHSReg)
1173     return 0;
1174   bool LHSIsKill = hasTrivialKill(LHS);
1175
1176   if (NeedExtend)
1177     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178
1179   unsigned ResultReg = 0;
1180   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182     if (C->isNegative())
1183       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184                                 SetFlags, WantResult);
1185     else
1186       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187                                 WantResult);
1188   } else if (const auto *C = dyn_cast<Constant>(RHS))
1189     if (C->isNullValue())
1190       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191                                 WantResult);
1192
1193   if (ResultReg)
1194     return ResultReg;
1195
1196   // Only extend the RHS within the instruction if there is a valid extend type.
1197   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198       isValueAvailable(RHS)) {
1199     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203           if (!RHSReg)
1204             return 0;
1205           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207                                RHSIsKill, ExtendType, C->getZExtValue(),
1208                                SetFlags, WantResult);
1209         }
1210     unsigned RHSReg = getRegForValue(RHS);
1211     if (!RHSReg)
1212       return 0;
1213     bool RHSIsKill = hasTrivialKill(RHS);
1214     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215                          ExtendType, 0, SetFlags, WantResult);
1216   }
1217
1218   // Check if the mul can be folded into the instruction.
1219   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220     if (isMulPowOf2(RHS)) {
1221       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223
1224       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225         if (C->getValue().isPowerOf2())
1226           std::swap(MulLHS, MulRHS);
1227
1228       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230       unsigned RHSReg = getRegForValue(MulLHS);
1231       if (!RHSReg)
1232         return 0;
1233       bool RHSIsKill = hasTrivialKill(MulLHS);
1234       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236                                 WantResult);
1237       if (ResultReg)
1238         return ResultReg;
1239     }
1240   }
1241
1242   // Check if the shift can be folded into the instruction.
1243   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1247         switch (SI->getOpcode()) {
1248         default: break;
1249         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1250         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252         }
1253         uint64_t ShiftVal = C->getZExtValue();
1254         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256           if (!RHSReg)
1257             return 0;
1258           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261                                     WantResult);
1262           if (ResultReg)
1263             return ResultReg;
1264         }
1265       }
1266     }
1267   }
1268
1269   unsigned RHSReg = getRegForValue(RHS);
1270   if (!RHSReg)
1271     return 0;
1272   bool RHSIsKill = hasTrivialKill(RHS);
1273
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278                        SetFlags, WantResult);
1279 }
1280
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282                                         bool LHSIsKill, unsigned RHSReg,
1283                                         bool RHSIsKill, bool SetFlags,
1284                                         bool WantResult) {
1285   assert(LHSReg && RHSReg && "Invalid register number.");
1286
1287   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289     return 0;
1290
1291   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292     return 0;
1293
1294   static const unsigned OpcTable[2][2][2] = {
1295     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1296       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1297     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1299   };
1300   bool Is64Bit = RetVT == MVT::i64;
1301   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302   const TargetRegisterClass *RC =
1303       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304   unsigned ResultReg;
1305   if (WantResult)
1306     ResultReg = createResultReg(RC);
1307   else
1308     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309
1310   const MCInstrDesc &II = TII.get(Opc);
1311   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314       .addReg(LHSReg, getKillRegState(LHSIsKill))
1315       .addReg(RHSReg, getKillRegState(RHSIsKill));
1316   return ResultReg;
1317 }
1318
1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320                                         bool LHSIsKill, uint64_t Imm,
1321                                         bool SetFlags, bool WantResult) {
1322   assert(LHSReg && "Invalid register number.");
1323
1324   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325     return 0;
1326
1327   unsigned ShiftImm;
1328   if (isUInt<12>(Imm))
1329     ShiftImm = 0;
1330   else if ((Imm & 0xfff000) == Imm) {
1331     ShiftImm = 12;
1332     Imm >>= 12;
1333   } else
1334     return 0;
1335
1336   static const unsigned OpcTable[2][2][2] = {
1337     { { AArch64::SUBWri,  AArch64::SUBXri  },
1338       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1339     { { AArch64::SUBSWri, AArch64::SUBSXri },
1340       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1341   };
1342   bool Is64Bit = RetVT == MVT::i64;
1343   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344   const TargetRegisterClass *RC;
1345   if (SetFlags)
1346     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347   else
1348     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349   unsigned ResultReg;
1350   if (WantResult)
1351     ResultReg = createResultReg(RC);
1352   else
1353     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354
1355   const MCInstrDesc &II = TII.get(Opc);
1356   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358       .addReg(LHSReg, getKillRegState(LHSIsKill))
1359       .addImm(Imm)
1360       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361   return ResultReg;
1362 }
1363
1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365                                         bool LHSIsKill, unsigned RHSReg,
1366                                         bool RHSIsKill,
1367                                         AArch64_AM::ShiftExtendType ShiftType,
1368                                         uint64_t ShiftImm, bool SetFlags,
1369                                         bool WantResult) {
1370   assert(LHSReg && RHSReg && "Invalid register number.");
1371   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373
1374   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375     return 0;
1376
1377   // Don't deal with undefined shifts.
1378   if (ShiftImm >= RetVT.getSizeInBits())
1379     return 0;
1380
1381   static const unsigned OpcTable[2][2][2] = {
1382     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1383       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1384     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1386   };
1387   bool Is64Bit = RetVT == MVT::i64;
1388   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389   const TargetRegisterClass *RC =
1390       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391   unsigned ResultReg;
1392   if (WantResult)
1393     ResultReg = createResultReg(RC);
1394   else
1395     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396
1397   const MCInstrDesc &II = TII.get(Opc);
1398   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401       .addReg(LHSReg, getKillRegState(LHSIsKill))
1402       .addReg(RHSReg, getKillRegState(RHSIsKill))
1403       .addImm(getShifterImm(ShiftType, ShiftImm));
1404   return ResultReg;
1405 }
1406
1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408                                         bool LHSIsKill, unsigned RHSReg,
1409                                         bool RHSIsKill,
1410                                         AArch64_AM::ShiftExtendType ExtType,
1411                                         uint64_t ShiftImm, bool SetFlags,
1412                                         bool WantResult) {
1413   assert(LHSReg && RHSReg && "Invalid register number.");
1414   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416
1417   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418     return 0;
1419
1420   if (ShiftImm >= 4)
1421     return 0;
1422
1423   static const unsigned OpcTable[2][2][2] = {
1424     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1425       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1426     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1428   };
1429   bool Is64Bit = RetVT == MVT::i64;
1430   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431   const TargetRegisterClass *RC = nullptr;
1432   if (SetFlags)
1433     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434   else
1435     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436   unsigned ResultReg;
1437   if (WantResult)
1438     ResultReg = createResultReg(RC);
1439   else
1440     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441
1442   const MCInstrDesc &II = TII.get(Opc);
1443   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446       .addReg(LHSReg, getKillRegState(LHSIsKill))
1447       .addReg(RHSReg, getKillRegState(RHSIsKill))
1448       .addImm(getArithExtendImm(ExtType, ShiftImm));
1449   return ResultReg;
1450 }
1451
1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453   Type *Ty = LHS->getType();
1454   EVT EVT = TLI.getValueType(DL, Ty, true);
1455   if (!EVT.isSimple())
1456     return false;
1457   MVT VT = EVT.getSimpleVT();
1458
1459   switch (VT.SimpleTy) {
1460   default:
1461     return false;
1462   case MVT::i1:
1463   case MVT::i8:
1464   case MVT::i16:
1465   case MVT::i32:
1466   case MVT::i64:
1467     return emitICmp(VT, LHS, RHS, IsZExt);
1468   case MVT::f32:
1469   case MVT::f64:
1470     return emitFCmp(VT, LHS, RHS);
1471   }
1472 }
1473
1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475                                bool IsZExt) {
1476   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477                  IsZExt) != 0;
1478 }
1479
1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481                                   uint64_t Imm) {
1482   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485
1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488     return false;
1489
1490   // Check to see if the 2nd operand is a constant that we can encode directly
1491   // in the compare.
1492   bool UseImm = false;
1493   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494     if (CFP->isZero() && !CFP->isNegative())
1495       UseImm = true;
1496
1497   unsigned LHSReg = getRegForValue(LHS);
1498   if (!LHSReg)
1499     return false;
1500   bool LHSIsKill = hasTrivialKill(LHS);
1501
1502   if (UseImm) {
1503     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505         .addReg(LHSReg, getKillRegState(LHSIsKill));
1506     return true;
1507   }
1508
1509   unsigned RHSReg = getRegForValue(RHS);
1510   if (!RHSReg)
1511     return false;
1512   bool RHSIsKill = hasTrivialKill(RHS);
1513
1514   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516       .addReg(LHSReg, getKillRegState(LHSIsKill))
1517       .addReg(RHSReg, getKillRegState(RHSIsKill));
1518   return true;
1519 }
1520
1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522                                   bool SetFlags, bool WantResult, bool IsZExt) {
1523   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524                     IsZExt);
1525 }
1526
1527 /// This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533                                       int64_t Imm) {
1534   unsigned ResultReg;
1535   if (Imm < 0)
1536     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537   else
1538     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539
1540   if (ResultReg)
1541     return ResultReg;
1542
1543   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544   if (!CReg)
1545     return 0;
1546
1547   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548   return ResultReg;
1549 }
1550
1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552                                   bool SetFlags, bool WantResult, bool IsZExt) {
1553   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554                     IsZExt);
1555 }
1556
1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558                                       bool LHSIsKill, unsigned RHSReg,
1559                                       bool RHSIsKill, bool WantResult) {
1560   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561                        RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563
1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565                                       bool LHSIsKill, unsigned RHSReg,
1566                                       bool RHSIsKill,
1567                                       AArch64_AM::ShiftExtendType ShiftType,
1568                                       uint64_t ShiftImm, bool WantResult) {
1569   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571                        WantResult);
1572 }
1573
1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575                                         const Value *LHS, const Value *RHS) {
1576   // Canonicalize immediates to the RHS first.
1577   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578     std::swap(LHS, RHS);
1579
1580   // Canonicalize mul by power-of-2 to the RHS.
1581   if (LHS->hasOneUse() && isValueAvailable(LHS))
1582     if (isMulPowOf2(LHS))
1583       std::swap(LHS, RHS);
1584
1585   // Canonicalize shift immediate to the RHS.
1586   if (LHS->hasOneUse() && isValueAvailable(LHS))
1587     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588       if (isa<ConstantInt>(SI->getOperand(1)))
1589         std::swap(LHS, RHS);
1590
1591   unsigned LHSReg = getRegForValue(LHS);
1592   if (!LHSReg)
1593     return 0;
1594   bool LHSIsKill = hasTrivialKill(LHS);
1595
1596   unsigned ResultReg = 0;
1597   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598     uint64_t Imm = C->getZExtValue();
1599     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600   }
1601   if (ResultReg)
1602     return ResultReg;
1603
1604   // Check if the mul can be folded into the instruction.
1605   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606     if (isMulPowOf2(RHS)) {
1607       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609
1610       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611         if (C->getValue().isPowerOf2())
1612           std::swap(MulLHS, MulRHS);
1613
1614       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616
1617       unsigned RHSReg = getRegForValue(MulLHS);
1618       if (!RHSReg)
1619         return 0;
1620       bool RHSIsKill = hasTrivialKill(MulLHS);
1621       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622                                    RHSIsKill, ShiftVal);
1623       if (ResultReg)
1624         return ResultReg;
1625     }
1626   }
1627
1628   // Check if the shift can be folded into the instruction.
1629   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632         uint64_t ShiftVal = C->getZExtValue();
1633         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634         if (!RHSReg)
1635           return 0;
1636         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638                                      RHSIsKill, ShiftVal);
1639         if (ResultReg)
1640           return ResultReg;
1641       }
1642   }
1643
1644   unsigned RHSReg = getRegForValue(RHS);
1645   if (!RHSReg)
1646     return 0;
1647   bool RHSIsKill = hasTrivialKill(RHS);
1648
1649   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654   }
1655   return ResultReg;
1656 }
1657
1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659                                            unsigned LHSReg, bool LHSIsKill,
1660                                            uint64_t Imm) {
1661   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662                 "ISD nodes are not consecutive!");
1663   static const unsigned OpcTable[3][2] = {
1664     { AArch64::ANDWri, AArch64::ANDXri },
1665     { AArch64::ORRWri, AArch64::ORRXri },
1666     { AArch64::EORWri, AArch64::EORXri }
1667   };
1668   const TargetRegisterClass *RC;
1669   unsigned Opc;
1670   unsigned RegSize;
1671   switch (RetVT.SimpleTy) {
1672   default:
1673     return 0;
1674   case MVT::i1:
1675   case MVT::i8:
1676   case MVT::i16:
1677   case MVT::i32: {
1678     unsigned Idx = ISDOpc - ISD::AND;
1679     Opc = OpcTable[Idx][0];
1680     RC = &AArch64::GPR32spRegClass;
1681     RegSize = 32;
1682     break;
1683   }
1684   case MVT::i64:
1685     Opc = OpcTable[ISDOpc - ISD::AND][1];
1686     RC = &AArch64::GPR64spRegClass;
1687     RegSize = 64;
1688     break;
1689   }
1690
1691   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692     return 0;
1693
1694   unsigned ResultReg =
1695       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700   }
1701   return ResultReg;
1702 }
1703
1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705                                            unsigned LHSReg, bool LHSIsKill,
1706                                            unsigned RHSReg, bool RHSIsKill,
1707                                            uint64_t ShiftImm) {
1708   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709                 "ISD nodes are not consecutive!");
1710   static const unsigned OpcTable[3][2] = {
1711     { AArch64::ANDWrs, AArch64::ANDXrs },
1712     { AArch64::ORRWrs, AArch64::ORRXrs },
1713     { AArch64::EORWrs, AArch64::EORXrs }
1714   };
1715
1716   // Don't deal with undefined shifts.
1717   if (ShiftImm >= RetVT.getSizeInBits())
1718     return 0;
1719
1720   const TargetRegisterClass *RC;
1721   unsigned Opc;
1722   switch (RetVT.SimpleTy) {
1723   default:
1724     return 0;
1725   case MVT::i1:
1726   case MVT::i8:
1727   case MVT::i16:
1728   case MVT::i32:
1729     Opc = OpcTable[ISDOpc - ISD::AND][0];
1730     RC = &AArch64::GPR32RegClass;
1731     break;
1732   case MVT::i64:
1733     Opc = OpcTable[ISDOpc - ISD::AND][1];
1734     RC = &AArch64::GPR64RegClass;
1735     break;
1736   }
1737   unsigned ResultReg =
1738       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743   }
1744   return ResultReg;
1745 }
1746
1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748                                      uint64_t Imm) {
1749   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751
1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753                                    bool WantZExt, MachineMemOperand *MMO) {
1754   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755     return 0;
1756
1757   // Simplify this down to something we can handle.
1758   if (!simplifyAddress(Addr, VT))
1759     return 0;
1760
1761   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762   if (!ScaleFactor)
1763     llvm_unreachable("Unexpected value type.");
1764
1765   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767   bool UseScaled = true;
1768   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769     UseScaled = false;
1770     ScaleFactor = 1;
1771   }
1772
1773   static const unsigned GPOpcTable[2][8][4] = {
1774     // Sign-extend.
1775     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1778         AArch64::LDURXi  },
1779       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1782         AArch64::LDRXui  },
1783       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786         AArch64::LDRXroX },
1787       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788         AArch64::LDRXroW },
1789       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790         AArch64::LDRXroW }
1791     },
1792     // Zero-extend.
1793     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794         AArch64::LDURXi  },
1795       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1796         AArch64::LDURXi  },
1797       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798         AArch64::LDRXui  },
1799       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1800         AArch64::LDRXui  },
1801       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802         AArch64::LDRXroX },
1803       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1804         AArch64::LDRXroX },
1805       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806         AArch64::LDRXroW },
1807       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1808         AArch64::LDRXroW }
1809     }
1810   };
1811
1812   static const unsigned FPOpcTable[4][2] = {
1813     { AArch64::LDURSi,  AArch64::LDURDi  },
1814     { AArch64::LDRSui,  AArch64::LDRDui  },
1815     { AArch64::LDRSroX, AArch64::LDRDroX },
1816     { AArch64::LDRSroW, AArch64::LDRDroW }
1817   };
1818
1819   unsigned Opc;
1820   const TargetRegisterClass *RC;
1821   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822                       Addr.getOffsetReg();
1823   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825       Addr.getExtendType() == AArch64_AM::SXTW)
1826     Idx++;
1827
1828   bool IsRet64Bit = RetVT == MVT::i64;
1829   switch (VT.SimpleTy) {
1830   default:
1831     llvm_unreachable("Unexpected value type.");
1832   case MVT::i1: // Intentional fall-through.
1833   case MVT::i8:
1834     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835     RC = (IsRet64Bit && !WantZExt) ?
1836              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837     break;
1838   case MVT::i16:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840     RC = (IsRet64Bit && !WantZExt) ?
1841              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842     break;
1843   case MVT::i32:
1844     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845     RC = (IsRet64Bit && !WantZExt) ?
1846              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847     break;
1848   case MVT::i64:
1849     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850     RC = &AArch64::GPR64RegClass;
1851     break;
1852   case MVT::f32:
1853     Opc = FPOpcTable[Idx][0];
1854     RC = &AArch64::FPR32RegClass;
1855     break;
1856   case MVT::f64:
1857     Opc = FPOpcTable[Idx][1];
1858     RC = &AArch64::FPR64RegClass;
1859     break;
1860   }
1861
1862   // Create the base instruction, then add the operands.
1863   unsigned ResultReg = createResultReg(RC);
1864   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865                                     TII.get(Opc), ResultReg);
1866   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868   // Loading an i1 requires special handling.
1869   if (VT == MVT::i1) {
1870     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871     assert(ANDReg && "Unexpected AND instruction emission failure.");
1872     ResultReg = ANDReg;
1873   }
1874
1875   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876   // the 32bit reg to a 64bit reg.
1877   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881         .addImm(0)
1882         .addReg(ResultReg, getKillRegState(true))
1883         .addImm(AArch64::sub_32);
1884     ResultReg = Reg64;
1885   }
1886   return ResultReg;
1887 }
1888
1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890   MVT VT;
1891   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892     return false;
1893
1894   if (VT.isVector())
1895     return selectOperator(I, I->getOpcode());
1896
1897   unsigned ResultReg;
1898   switch (I->getOpcode()) {
1899   default:
1900     llvm_unreachable("Unexpected instruction.");
1901   case Instruction::Add:
1902     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903     break;
1904   case Instruction::Sub:
1905     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906     break;
1907   }
1908   if (!ResultReg)
1909     return false;
1910
1911   updateValueMap(I, ResultReg);
1912   return true;
1913 }
1914
1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916   MVT VT;
1917   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918     return false;
1919
1920   if (VT.isVector())
1921     return selectOperator(I, I->getOpcode());
1922
1923   unsigned ResultReg;
1924   switch (I->getOpcode()) {
1925   default:
1926     llvm_unreachable("Unexpected instruction.");
1927   case Instruction::And:
1928     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929     break;
1930   case Instruction::Or:
1931     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932     break;
1933   case Instruction::Xor:
1934     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935     break;
1936   }
1937   if (!ResultReg)
1938     return false;
1939
1940   updateValueMap(I, ResultReg);
1941   return true;
1942 }
1943
1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945   MVT VT;
1946   // Verify we have a legal type before going any further.  Currently, we handle
1947   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950       cast<LoadInst>(I)->isAtomic())
1951     return false;
1952
1953   const Value *SV = I->getOperand(0);
1954   if (TLI.supportSwiftError()) {
1955     // Swifterror values can come from either a function parameter with
1956     // swifterror attribute or an alloca with swifterror attribute.
1957     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958       if (Arg->hasSwiftErrorAttr())
1959         return false;
1960     }
1961
1962     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963       if (Alloca->isSwiftError())
1964         return false;
1965     }
1966   }
1967
1968   // See if we can handle this address.
1969   Address Addr;
1970   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971     return false;
1972
1973   // Fold the following sign-/zero-extend into the load instruction.
1974   bool WantZExt = true;
1975   MVT RetVT = VT;
1976   const Value *IntExtVal = nullptr;
1977   if (I->hasOneUse()) {
1978     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979       if (isTypeSupported(ZE->getType(), RetVT))
1980         IntExtVal = ZE;
1981       else
1982         RetVT = VT;
1983     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984       if (isTypeSupported(SE->getType(), RetVT))
1985         IntExtVal = SE;
1986       else
1987         RetVT = VT;
1988       WantZExt = false;
1989     }
1990   }
1991
1992   unsigned ResultReg =
1993       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994   if (!ResultReg)
1995     return false;
1996
1997   // There are a few different cases we have to handle, because the load or the
1998   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999   // SelectionDAG. There is also an ordering issue when both instructions are in
2000   // different basic blocks.
2001   // 1.) The load instruction is selected by FastISel, but the integer extend
2002   //     not. This usually happens when the integer extend is in a different
2003   //     basic block and SelectionDAG took over for that basic block.
2004   // 2.) The load instruction is selected before the integer extend. This only
2005   //     happens when the integer extend is in a different basic block.
2006   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007   //     by FastISel. This happens if there are instructions between the load
2008   //     and the integer extend that couldn't be selected by FastISel.
2009   if (IntExtVal) {
2010     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012     // it when it selects the integer extend.
2013     unsigned Reg = lookUpRegForValue(IntExtVal);
2014     auto *MI = MRI.getUniqueVRegDef(Reg);
2015     if (!MI) {
2016       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017         if (WantZExt) {
2018           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019           std::prev(FuncInfo.InsertPt)->eraseFromParent();
2020           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2021         } else
2022           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023                                                  /*IsKill=*/true,
2024                                                  AArch64::sub_32);
2025       }
2026       updateValueMap(I, ResultReg);
2027       return true;
2028     }
2029
2030     // The integer extend has already been emitted - delete all the instructions
2031     // that have been emitted by the integer extend lowering code and use the
2032     // result from the load instruction directly.
2033     while (MI) {
2034       Reg = 0;
2035       for (auto &Opnd : MI->uses()) {
2036         if (Opnd.isReg()) {
2037           Reg = Opnd.getReg();
2038           break;
2039         }
2040       }
2041       MI->eraseFromParent();
2042       MI = nullptr;
2043       if (Reg)
2044         MI = MRI.getUniqueVRegDef(Reg);
2045     }
2046     updateValueMap(IntExtVal, ResultReg);
2047     return true;
2048   }
2049
2050   updateValueMap(I, ResultReg);
2051   return true;
2052 }
2053
2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055                                        unsigned AddrReg,
2056                                        MachineMemOperand *MMO) {
2057   unsigned Opc;
2058   switch (VT.SimpleTy) {
2059   default: return false;
2060   case MVT::i8:  Opc = AArch64::STLRB; break;
2061   case MVT::i16: Opc = AArch64::STLRH; break;
2062   case MVT::i32: Opc = AArch64::STLRW; break;
2063   case MVT::i64: Opc = AArch64::STLRX; break;
2064   }
2065
2066   const MCInstrDesc &II = TII.get(Opc);
2067   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2070       .addReg(SrcReg)
2071       .addReg(AddrReg)
2072       .addMemOperand(MMO);
2073   return true;
2074 }
2075
2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077                                 MachineMemOperand *MMO) {
2078   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079     return false;
2080
2081   // Simplify this down to something we can handle.
2082   if (!simplifyAddress(Addr, VT))
2083     return false;
2084
2085   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086   if (!ScaleFactor)
2087     llvm_unreachable("Unexpected value type.");
2088
2089   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091   bool UseScaled = true;
2092   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093     UseScaled = false;
2094     ScaleFactor = 1;
2095   }
2096
2097   static const unsigned OpcTable[4][6] = {
2098     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2099       AArch64::STURSi,   AArch64::STURDi },
2100     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2101       AArch64::STRSui,   AArch64::STRDui },
2102     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103       AArch64::STRSroX,  AArch64::STRDroX },
2104     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105       AArch64::STRSroW,  AArch64::STRDroW }
2106   };
2107
2108   unsigned Opc;
2109   bool VTIsi1 = false;
2110   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111                       Addr.getOffsetReg();
2112   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114       Addr.getExtendType() == AArch64_AM::SXTW)
2115     Idx++;
2116
2117   switch (VT.SimpleTy) {
2118   default: llvm_unreachable("Unexpected value type.");
2119   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2120   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2121   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126   }
2127
2128   // Storing an i1 requires special handling.
2129   if (VTIsi1 && SrcReg != AArch64::WZR) {
2130     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2131     assert(ANDReg && "Unexpected AND instruction emission failure.");
2132     SrcReg = ANDReg;
2133   }
2134   // Create the base instruction, then add the operands.
2135   const MCInstrDesc &II = TII.get(Opc);
2136   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137   MachineInstrBuilder MIB =
2138       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2139   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140
2141   return true;
2142 }
2143
2144 bool AArch64FastISel::selectStore(const Instruction *I) {
2145   MVT VT;
2146   const Value *Op0 = I->getOperand(0);
2147   // Verify we have a legal type before going any further.  Currently, we handle
2148   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151     return false;
2152
2153   const Value *PtrV = I->getOperand(1);
2154   if (TLI.supportSwiftError()) {
2155     // Swifterror values can come from either a function parameter with
2156     // swifterror attribute or an alloca with swifterror attribute.
2157     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158       if (Arg->hasSwiftErrorAttr())
2159         return false;
2160     }
2161
2162     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163       if (Alloca->isSwiftError())
2164         return false;
2165     }
2166   }
2167
2168   // Get the value to be stored into a register. Use the zero register directly
2169   // when possible to avoid an unnecessary copy and a wasted register.
2170   unsigned SrcReg = 0;
2171   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172     if (CI->isZero())
2173       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175     if (CF->isZero() && !CF->isNegative()) {
2176       VT = MVT::getIntegerVT(VT.getSizeInBits());
2177       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178     }
2179   }
2180
2181   if (!SrcReg)
2182     SrcReg = getRegForValue(Op0);
2183
2184   if (!SrcReg)
2185     return false;
2186
2187   auto *SI = cast<StoreInst>(I);
2188
2189   // Try to emit a STLR for seq_cst/release.
2190   if (SI->isAtomic()) {
2191     AtomicOrdering Ord = SI->getOrdering();
2192     // The non-atomic instructions are sufficient for relaxed stores.
2193     if (isReleaseOrStronger(Ord)) {
2194       // The STLR addressing mode only supports a base reg; pass that directly.
2195       unsigned AddrReg = getRegForValue(PtrV);
2196       return emitStoreRelease(VT, SrcReg, AddrReg,
2197                               createMachineMemOperandFor(I));
2198     }
2199   }
2200
2201   // See if we can handle this address.
2202   Address Addr;
2203   if (!computeAddress(PtrV, Addr, Op0->getType()))
2204     return false;
2205
2206   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207     return false;
2208   return true;
2209 }
2210
2211 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2212   switch (Pred) {
2213   case CmpInst::FCMP_ONE:
2214   case CmpInst::FCMP_UEQ:
2215   default:
2216     // AL is our "false" for now. The other two need more compares.
2217     return AArch64CC::AL;
2218   case CmpInst::ICMP_EQ:
2219   case CmpInst::FCMP_OEQ:
2220     return AArch64CC::EQ;
2221   case CmpInst::ICMP_SGT:
2222   case CmpInst::FCMP_OGT:
2223     return AArch64CC::GT;
2224   case CmpInst::ICMP_SGE:
2225   case CmpInst::FCMP_OGE:
2226     return AArch64CC::GE;
2227   case CmpInst::ICMP_UGT:
2228   case CmpInst::FCMP_UGT:
2229     return AArch64CC::HI;
2230   case CmpInst::FCMP_OLT:
2231     return AArch64CC::MI;
2232   case CmpInst::ICMP_ULE:
2233   case CmpInst::FCMP_OLE:
2234     return AArch64CC::LS;
2235   case CmpInst::FCMP_ORD:
2236     return AArch64CC::VC;
2237   case CmpInst::FCMP_UNO:
2238     return AArch64CC::VS;
2239   case CmpInst::FCMP_UGE:
2240     return AArch64CC::PL;
2241   case CmpInst::ICMP_SLT:
2242   case CmpInst::FCMP_ULT:
2243     return AArch64CC::LT;
2244   case CmpInst::ICMP_SLE:
2245   case CmpInst::FCMP_ULE:
2246     return AArch64CC::LE;
2247   case CmpInst::FCMP_UNE:
2248   case CmpInst::ICMP_NE:
2249     return AArch64CC::NE;
2250   case CmpInst::ICMP_UGE:
2251     return AArch64CC::HS;
2252   case CmpInst::ICMP_ULT:
2253     return AArch64CC::LO;
2254   }
2255 }
2256
2257 /// Try to emit a combined compare-and-branch instruction.
2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2260   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2261   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2262
2263   const Value *LHS = CI->getOperand(0);
2264   const Value *RHS = CI->getOperand(1);
2265
2266   MVT VT;
2267   if (!isTypeSupported(LHS->getType(), VT))
2268     return false;
2269
2270   unsigned BW = VT.getSizeInBits();
2271   if (BW > 64)
2272     return false;
2273
2274   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2275   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2276
2277   // Try to take advantage of fallthrough opportunities.
2278   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2279     std::swap(TBB, FBB);
2280     Predicate = CmpInst::getInversePredicate(Predicate);
2281   }
2282
2283   int TestBit = -1;
2284   bool IsCmpNE;
2285   switch (Predicate) {
2286   default:
2287     return false;
2288   case CmpInst::ICMP_EQ:
2289   case CmpInst::ICMP_NE:
2290     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2291       std::swap(LHS, RHS);
2292
2293     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2294       return false;
2295
2296     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2297       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2298         const Value *AndLHS = AI->getOperand(0);
2299         const Value *AndRHS = AI->getOperand(1);
2300
2301         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2302           if (C->getValue().isPowerOf2())
2303             std::swap(AndLHS, AndRHS);
2304
2305         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2306           if (C->getValue().isPowerOf2()) {
2307             TestBit = C->getValue().logBase2();
2308             LHS = AndLHS;
2309           }
2310       }
2311
2312     if (VT == MVT::i1)
2313       TestBit = 0;
2314
2315     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2316     break;
2317   case CmpInst::ICMP_SLT:
2318   case CmpInst::ICMP_SGE:
2319     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2320       return false;
2321
2322     TestBit = BW - 1;
2323     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2324     break;
2325   case CmpInst::ICMP_SGT:
2326   case CmpInst::ICMP_SLE:
2327     if (!isa<ConstantInt>(RHS))
2328       return false;
2329
2330     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2331       return false;
2332
2333     TestBit = BW - 1;
2334     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2335     break;
2336   } // end switch
2337
2338   static const unsigned OpcTable[2][2][2] = {
2339     { {AArch64::CBZW,  AArch64::CBZX },
2340       {AArch64::CBNZW, AArch64::CBNZX} },
2341     { {AArch64::TBZW,  AArch64::TBZX },
2342       {AArch64::TBNZW, AArch64::TBNZX} }
2343   };
2344
2345   bool IsBitTest = TestBit != -1;
2346   bool Is64Bit = BW == 64;
2347   if (TestBit < 32 && TestBit >= 0)
2348     Is64Bit = false;
2349
2350   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2351   const MCInstrDesc &II = TII.get(Opc);
2352
2353   unsigned SrcReg = getRegForValue(LHS);
2354   if (!SrcReg)
2355     return false;
2356   bool SrcIsKill = hasTrivialKill(LHS);
2357
2358   if (BW == 64 && !Is64Bit)
2359     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2360                                         AArch64::sub_32);
2361
2362   if ((BW < 32) && !IsBitTest)
2363     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2364
2365   // Emit the combined compare and branch instruction.
2366   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2367   MachineInstrBuilder MIB =
2368       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2369           .addReg(SrcReg, getKillRegState(SrcIsKill));
2370   if (IsBitTest)
2371     MIB.addImm(TestBit);
2372   MIB.addMBB(TBB);
2373
2374   finishCondBranch(BI->getParent(), TBB, FBB);
2375   return true;
2376 }
2377
2378 bool AArch64FastISel::selectBranch(const Instruction *I) {
2379   const BranchInst *BI = cast<BranchInst>(I);
2380   if (BI->isUnconditional()) {
2381     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2382     fastEmitBranch(MSucc, BI->getDebugLoc());
2383     return true;
2384   }
2385
2386   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2388
2389   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2390     if (CI->hasOneUse() && isValueAvailable(CI)) {
2391       // Try to optimize or fold the cmp.
2392       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2393       switch (Predicate) {
2394       default:
2395         break;
2396       case CmpInst::FCMP_FALSE:
2397         fastEmitBranch(FBB, DbgLoc);
2398         return true;
2399       case CmpInst::FCMP_TRUE:
2400         fastEmitBranch(TBB, DbgLoc);
2401         return true;
2402       }
2403
2404       // Try to emit a combined compare-and-branch first.
2405       if (emitCompareAndBranch(BI))
2406         return true;
2407
2408       // Try to take advantage of fallthrough opportunities.
2409       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2410         std::swap(TBB, FBB);
2411         Predicate = CmpInst::getInversePredicate(Predicate);
2412       }
2413
2414       // Emit the cmp.
2415       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2416         return false;
2417
2418       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2419       // instruction.
2420       AArch64CC::CondCode CC = getCompareCC(Predicate);
2421       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2422       switch (Predicate) {
2423       default:
2424         break;
2425       case CmpInst::FCMP_UEQ:
2426         ExtraCC = AArch64CC::EQ;
2427         CC = AArch64CC::VS;
2428         break;
2429       case CmpInst::FCMP_ONE:
2430         ExtraCC = AArch64CC::MI;
2431         CC = AArch64CC::GT;
2432         break;
2433       }
2434       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2435
2436       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2437       if (ExtraCC != AArch64CC::AL) {
2438         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2439             .addImm(ExtraCC)
2440             .addMBB(TBB);
2441       }
2442
2443       // Emit the branch.
2444       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2445           .addImm(CC)
2446           .addMBB(TBB);
2447
2448       finishCondBranch(BI->getParent(), TBB, FBB);
2449       return true;
2450     }
2451   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2452     uint64_t Imm = CI->getZExtValue();
2453     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2454     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2455         .addMBB(Target);
2456
2457     // Obtain the branch probability and add the target to the successor list.
2458     if (FuncInfo.BPI) {
2459       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2460           BI->getParent(), Target->getBasicBlock());
2461       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2462     } else
2463       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2464     return true;
2465   } else {
2466     AArch64CC::CondCode CC = AArch64CC::NE;
2467     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2468       // Fake request the condition, otherwise the intrinsic might be completely
2469       // optimized away.
2470       unsigned CondReg = getRegForValue(BI->getCondition());
2471       if (!CondReg)
2472         return false;
2473
2474       // Emit the branch.
2475       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476         .addImm(CC)
2477         .addMBB(TBB);
2478
2479       finishCondBranch(BI->getParent(), TBB, FBB);
2480       return true;
2481     }
2482   }
2483
2484   unsigned CondReg = getRegForValue(BI->getCondition());
2485   if (CondReg == 0)
2486     return false;
2487   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2488
2489   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490   unsigned Opcode = AArch64::TBNZW;
2491   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492     std::swap(TBB, FBB);
2493     Opcode = AArch64::TBZW;
2494   }
2495
2496   const MCInstrDesc &II = TII.get(Opcode);
2497   unsigned ConstrainedCondReg
2498     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2500       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2501       .addImm(0)
2502       .addMBB(TBB);
2503
2504   finishCondBranch(BI->getParent(), TBB, FBB);
2505   return true;
2506 }
2507
2508 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2511   if (AddrReg == 0)
2512     return false;
2513
2514   // Emit the indirect branch.
2515   const MCInstrDesc &II = TII.get(AArch64::BR);
2516   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2517   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2518
2519   // Make sure the CFG is up-to-date.
2520   for (auto *Succ : BI->successors())
2521     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2522
2523   return true;
2524 }
2525
2526 bool AArch64FastISel::selectCmp(const Instruction *I) {
2527   const CmpInst *CI = cast<CmpInst>(I);
2528
2529   // Vectors of i1 are weird: bail out.
2530   if (CI->getType()->isVectorTy())
2531     return false;
2532
2533   // Try to optimize or fold the cmp.
2534   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535   unsigned ResultReg = 0;
2536   switch (Predicate) {
2537   default:
2538     break;
2539   case CmpInst::FCMP_FALSE:
2540     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2542             TII.get(TargetOpcode::COPY), ResultReg)
2543         .addReg(AArch64::WZR, getKillRegState(true));
2544     break;
2545   case CmpInst::FCMP_TRUE:
2546     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547     break;
2548   }
2549
2550   if (ResultReg) {
2551     updateValueMap(I, ResultReg);
2552     return true;
2553   }
2554
2555   // Emit the cmp.
2556   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557     return false;
2558
2559   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560
2561   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562   // condition codes are inverted, because they are used by CSINC.
2563   static unsigned CondCodeTable[2][2] = {
2564     { AArch64CC::NE, AArch64CC::VC },
2565     { AArch64CC::PL, AArch64CC::LE }
2566   };
2567   unsigned *CondCodes = nullptr;
2568   switch (Predicate) {
2569   default:
2570     break;
2571   case CmpInst::FCMP_UEQ:
2572     CondCodes = &CondCodeTable[0][0];
2573     break;
2574   case CmpInst::FCMP_ONE:
2575     CondCodes = &CondCodeTable[1][0];
2576     break;
2577   }
2578
2579   if (CondCodes) {
2580     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2582             TmpReg1)
2583         .addReg(AArch64::WZR, getKillRegState(true))
2584         .addReg(AArch64::WZR, getKillRegState(true))
2585         .addImm(CondCodes[0]);
2586     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587             ResultReg)
2588         .addReg(TmpReg1, getKillRegState(true))
2589         .addReg(AArch64::WZR, getKillRegState(true))
2590         .addImm(CondCodes[1]);
2591
2592     updateValueMap(I, ResultReg);
2593     return true;
2594   }
2595
2596   // Now set a register based on the comparison.
2597   AArch64CC::CondCode CC = getCompareCC(Predicate);
2598   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2601           ResultReg)
2602       .addReg(AArch64::WZR, getKillRegState(true))
2603       .addReg(AArch64::WZR, getKillRegState(true))
2604       .addImm(invertedCC);
2605
2606   updateValueMap(I, ResultReg);
2607   return true;
2608 }
2609
2610 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611 /// value.
2612 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613   if (!SI->getType()->isIntegerTy(1))
2614     return false;
2615
2616   const Value *Src1Val, *Src2Val;
2617   unsigned Opc = 0;
2618   bool NeedExtraOp = false;
2619   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620     if (CI->isOne()) {
2621       Src1Val = SI->getCondition();
2622       Src2Val = SI->getFalseValue();
2623       Opc = AArch64::ORRWrr;
2624     } else {
2625       assert(CI->isZero());
2626       Src1Val = SI->getFalseValue();
2627       Src2Val = SI->getCondition();
2628       Opc = AArch64::BICWrr;
2629     }
2630   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631     if (CI->isOne()) {
2632       Src1Val = SI->getCondition();
2633       Src2Val = SI->getTrueValue();
2634       Opc = AArch64::ORRWrr;
2635       NeedExtraOp = true;
2636     } else {
2637       assert(CI->isZero());
2638       Src1Val = SI->getCondition();
2639       Src2Val = SI->getTrueValue();
2640       Opc = AArch64::ANDWrr;
2641     }
2642   }
2643
2644   if (!Opc)
2645     return false;
2646
2647   unsigned Src1Reg = getRegForValue(Src1Val);
2648   if (!Src1Reg)
2649     return false;
2650   bool Src1IsKill = hasTrivialKill(Src1Val);
2651
2652   unsigned Src2Reg = getRegForValue(Src2Val);
2653   if (!Src2Reg)
2654     return false;
2655   bool Src2IsKill = hasTrivialKill(Src2Val);
2656
2657   if (NeedExtraOp) {
2658     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2659     Src1IsKill = true;
2660   }
2661   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2662                                        Src1IsKill, Src2Reg, Src2IsKill);
2663   updateValueMap(SI, ResultReg);
2664   return true;
2665 }
2666
2667 bool AArch64FastISel::selectSelect(const Instruction *I) {
2668   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2669   MVT VT;
2670   if (!isTypeSupported(I->getType(), VT))
2671     return false;
2672
2673   unsigned Opc;
2674   const TargetRegisterClass *RC;
2675   switch (VT.SimpleTy) {
2676   default:
2677     return false;
2678   case MVT::i1:
2679   case MVT::i8:
2680   case MVT::i16:
2681   case MVT::i32:
2682     Opc = AArch64::CSELWr;
2683     RC = &AArch64::GPR32RegClass;
2684     break;
2685   case MVT::i64:
2686     Opc = AArch64::CSELXr;
2687     RC = &AArch64::GPR64RegClass;
2688     break;
2689   case MVT::f32:
2690     Opc = AArch64::FCSELSrrr;
2691     RC = &AArch64::FPR32RegClass;
2692     break;
2693   case MVT::f64:
2694     Opc = AArch64::FCSELDrrr;
2695     RC = &AArch64::FPR64RegClass;
2696     break;
2697   }
2698
2699   const SelectInst *SI = cast<SelectInst>(I);
2700   const Value *Cond = SI->getCondition();
2701   AArch64CC::CondCode CC = AArch64CC::NE;
2702   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2703
2704   if (optimizeSelect(SI))
2705     return true;
2706
2707   // Try to pickup the flags, so we don't have to emit another compare.
2708   if (foldXALUIntrinsic(CC, I, Cond)) {
2709     // Fake request the condition to force emission of the XALU intrinsic.
2710     unsigned CondReg = getRegForValue(Cond);
2711     if (!CondReg)
2712       return false;
2713   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2714              isValueAvailable(Cond)) {
2715     const auto *Cmp = cast<CmpInst>(Cond);
2716     // Try to optimize or fold the cmp.
2717     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2718     const Value *FoldSelect = nullptr;
2719     switch (Predicate) {
2720     default:
2721       break;
2722     case CmpInst::FCMP_FALSE:
2723       FoldSelect = SI->getFalseValue();
2724       break;
2725     case CmpInst::FCMP_TRUE:
2726       FoldSelect = SI->getTrueValue();
2727       break;
2728     }
2729
2730     if (FoldSelect) {
2731       unsigned SrcReg = getRegForValue(FoldSelect);
2732       if (!SrcReg)
2733         return false;
2734       unsigned UseReg = lookUpRegForValue(SI);
2735       if (UseReg)
2736         MRI.clearKillFlags(UseReg);
2737
2738       updateValueMap(I, SrcReg);
2739       return true;
2740     }
2741
2742     // Emit the cmp.
2743     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744       return false;
2745
2746     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747     CC = getCompareCC(Predicate);
2748     switch (Predicate) {
2749     default:
2750       break;
2751     case CmpInst::FCMP_UEQ:
2752       ExtraCC = AArch64CC::EQ;
2753       CC = AArch64CC::VS;
2754       break;
2755     case CmpInst::FCMP_ONE:
2756       ExtraCC = AArch64CC::MI;
2757       CC = AArch64CC::GT;
2758       break;
2759     }
2760     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761   } else {
2762     unsigned CondReg = getRegForValue(Cond);
2763     if (!CondReg)
2764       return false;
2765     bool CondIsKill = hasTrivialKill(Cond);
2766
2767     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768     CondReg = constrainOperandRegClass(II, CondReg, 1);
2769
2770     // Emit a TST instruction (ANDS wzr, reg, #imm).
2771     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2772             AArch64::WZR)
2773         .addReg(CondReg, getKillRegState(CondIsKill))
2774         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2775   }
2776
2777   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2778   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2779
2780   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2781   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2782
2783   if (!Src1Reg || !Src2Reg)
2784     return false;
2785
2786   if (ExtraCC != AArch64CC::AL) {
2787     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2788                                Src2IsKill, ExtraCC);
2789     Src2IsKill = true;
2790   }
2791   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2792                                         Src2IsKill, CC);
2793   updateValueMap(I, ResultReg);
2794   return true;
2795 }
2796
2797 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798   Value *V = I->getOperand(0);
2799   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800     return false;
2801
2802   unsigned Op = getRegForValue(V);
2803   if (Op == 0)
2804     return false;
2805
2806   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2808           ResultReg).addReg(Op);
2809   updateValueMap(I, ResultReg);
2810   return true;
2811 }
2812
2813 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814   Value *V = I->getOperand(0);
2815   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816     return false;
2817
2818   unsigned Op = getRegForValue(V);
2819   if (Op == 0)
2820     return false;
2821
2822   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2824           ResultReg).addReg(Op);
2825   updateValueMap(I, ResultReg);
2826   return true;
2827 }
2828
2829 // FPToUI and FPToSI
2830 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831   MVT DestVT;
2832   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833     return false;
2834
2835   unsigned SrcReg = getRegForValue(I->getOperand(0));
2836   if (SrcReg == 0)
2837     return false;
2838
2839   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841     return false;
2842
2843   unsigned Opc;
2844   if (SrcVT == MVT::f64) {
2845     if (Signed)
2846       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847     else
2848       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849   } else {
2850     if (Signed)
2851       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852     else
2853       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854   }
2855   unsigned ResultReg = createResultReg(
2856       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2858       .addReg(SrcReg);
2859   updateValueMap(I, ResultReg);
2860   return true;
2861 }
2862
2863 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864   MVT DestVT;
2865   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866     return false;
2867   // Let regular ISEL handle FP16
2868   if (DestVT == MVT::f16)
2869     return false;
2870
2871   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872          "Unexpected value type.");
2873
2874   unsigned SrcReg = getRegForValue(I->getOperand(0));
2875   if (!SrcReg)
2876     return false;
2877   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2878
2879   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880
2881   // Handle sign-extension.
2882   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2883     SrcReg =
2884         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2885     if (!SrcReg)
2886       return false;
2887     SrcIsKill = true;
2888   }
2889
2890   unsigned Opc;
2891   if (SrcVT == MVT::i64) {
2892     if (Signed)
2893       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2894     else
2895       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2896   } else {
2897     if (Signed)
2898       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2899     else
2900       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2901   }
2902
2903   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2904                                       SrcIsKill);
2905   updateValueMap(I, ResultReg);
2906   return true;
2907 }
2908
2909 bool AArch64FastISel::fastLowerArguments() {
2910   if (!FuncInfo.CanLowerReturn)
2911     return false;
2912
2913   const Function *F = FuncInfo.Fn;
2914   if (F->isVarArg())
2915     return false;
2916
2917   CallingConv::ID CC = F->getCallingConv();
2918   if (CC != CallingConv::C && CC != CallingConv::Swift)
2919     return false;
2920
2921   // Only handle simple cases of up to 8 GPR and FPR each.
2922   unsigned GPRCnt = 0;
2923   unsigned FPRCnt = 0;
2924   for (auto const &Arg : F->args()) {
2925     if (Arg.hasAttribute(Attribute::ByVal) ||
2926         Arg.hasAttribute(Attribute::InReg) ||
2927         Arg.hasAttribute(Attribute::StructRet) ||
2928         Arg.hasAttribute(Attribute::SwiftSelf) ||
2929         Arg.hasAttribute(Attribute::SwiftError) ||
2930         Arg.hasAttribute(Attribute::Nest))
2931       return false;
2932
2933     Type *ArgTy = Arg.getType();
2934     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2935       return false;
2936
2937     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2938     if (!ArgVT.isSimple())
2939       return false;
2940
2941     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2942     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2943       return false;
2944
2945     if (VT.isVector() &&
2946         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2947       return false;
2948
2949     if (VT >= MVT::i1 && VT <= MVT::i64)
2950       ++GPRCnt;
2951     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2952              VT.is128BitVector())
2953       ++FPRCnt;
2954     else
2955       return false;
2956
2957     if (GPRCnt > 8 || FPRCnt > 8)
2958       return false;
2959   }
2960
2961   static const MCPhysReg Registers[6][8] = {
2962     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2963       AArch64::W5, AArch64::W6, AArch64::W7 },
2964     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2965       AArch64::X5, AArch64::X6, AArch64::X7 },
2966     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2967       AArch64::H5, AArch64::H6, AArch64::H7 },
2968     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2969       AArch64::S5, AArch64::S6, AArch64::S7 },
2970     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2971       AArch64::D5, AArch64::D6, AArch64::D7 },
2972     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2973       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2974   };
2975
2976   unsigned GPRIdx = 0;
2977   unsigned FPRIdx = 0;
2978   for (auto const &Arg : F->args()) {
2979     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2980     unsigned SrcReg;
2981     const TargetRegisterClass *RC;
2982     if (VT >= MVT::i1 && VT <= MVT::i32) {
2983       SrcReg = Registers[0][GPRIdx++];
2984       RC = &AArch64::GPR32RegClass;
2985       VT = MVT::i32;
2986     } else if (VT == MVT::i64) {
2987       SrcReg = Registers[1][GPRIdx++];
2988       RC = &AArch64::GPR64RegClass;
2989     } else if (VT == MVT::f16) {
2990       SrcReg = Registers[2][FPRIdx++];
2991       RC = &AArch64::FPR16RegClass;
2992     } else if (VT ==  MVT::f32) {
2993       SrcReg = Registers[3][FPRIdx++];
2994       RC = &AArch64::FPR32RegClass;
2995     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2996       SrcReg = Registers[4][FPRIdx++];
2997       RC = &AArch64::FPR64RegClass;
2998     } else if (VT.is128BitVector()) {
2999       SrcReg = Registers[5][FPRIdx++];
3000       RC = &AArch64::FPR128RegClass;
3001     } else
3002       llvm_unreachable("Unexpected value type.");
3003
3004     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3005     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3006     // Without this, EmitLiveInCopies may eliminate the livein if its only
3007     // use is a bitcast (which isn't turned into an instruction).
3008     unsigned ResultReg = createResultReg(RC);
3009     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3010             TII.get(TargetOpcode::COPY), ResultReg)
3011         .addReg(DstReg, getKillRegState(true));
3012     updateValueMap(&Arg, ResultReg);
3013   }
3014   return true;
3015 }
3016
3017 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3018                                       SmallVectorImpl<MVT> &OutVTs,
3019                                       unsigned &NumBytes) {
3020   CallingConv::ID CC = CLI.CallConv;
3021   SmallVector<CCValAssign, 16> ArgLocs;
3022   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3023   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3024
3025   // Get a count of how many bytes are to be pushed on the stack.
3026   NumBytes = CCInfo.getNextStackOffset();
3027
3028   // Issue CALLSEQ_START
3029   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3030   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3031     .addImm(NumBytes).addImm(0);
3032
3033   // Process the args.
3034   for (CCValAssign &VA : ArgLocs) {
3035     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3036     MVT ArgVT = OutVTs[VA.getValNo()];
3037
3038     unsigned ArgReg = getRegForValue(ArgVal);
3039     if (!ArgReg)
3040       return false;
3041
3042     // Handle arg promotion: SExt, ZExt, AExt.
3043     switch (VA.getLocInfo()) {
3044     case CCValAssign::Full:
3045       break;
3046     case CCValAssign::SExt: {
3047       MVT DestVT = VA.getLocVT();
3048       MVT SrcVT = ArgVT;
3049       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3050       if (!ArgReg)
3051         return false;
3052       break;
3053     }
3054     case CCValAssign::AExt:
3055     // Intentional fall-through.
3056     case CCValAssign::ZExt: {
3057       MVT DestVT = VA.getLocVT();
3058       MVT SrcVT = ArgVT;
3059       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3060       if (!ArgReg)
3061         return false;
3062       break;
3063     }
3064     default:
3065       llvm_unreachable("Unknown arg promotion!");
3066     }
3067
3068     // Now copy/store arg to correct locations.
3069     if (VA.isRegLoc() && !VA.needsCustom()) {
3070       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3071               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3072       CLI.OutRegs.push_back(VA.getLocReg());
3073     } else if (VA.needsCustom()) {
3074       // FIXME: Handle custom args.
3075       return false;
3076     } else {
3077       assert(VA.isMemLoc() && "Assuming store on stack.");
3078
3079       // Don't emit stores for undef values.
3080       if (isa<UndefValue>(ArgVal))
3081         continue;
3082
3083       // Need to store on the stack.
3084       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3085
3086       unsigned BEAlign = 0;
3087       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3088         BEAlign = 8 - ArgSize;
3089
3090       Address Addr;
3091       Addr.setKind(Address::RegBase);
3092       Addr.setReg(AArch64::SP);
3093       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3094
3095       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3096       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3097           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3098           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3099
3100       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3101         return false;
3102     }
3103   }
3104   return true;
3105 }
3106
3107 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3108                                  unsigned NumBytes) {
3109   CallingConv::ID CC = CLI.CallConv;
3110
3111   // Issue CALLSEQ_END
3112   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3113   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3114     .addImm(NumBytes).addImm(0);
3115
3116   // Now the return value.
3117   if (RetVT != MVT::isVoid) {
3118     SmallVector<CCValAssign, 16> RVLocs;
3119     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3120     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3121
3122     // Only handle a single return value.
3123     if (RVLocs.size() != 1)
3124       return false;
3125
3126     // Copy all of the result registers out of their specified physreg.
3127     MVT CopyVT = RVLocs[0].getValVT();
3128
3129     // TODO: Handle big-endian results
3130     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3131       return false;
3132
3133     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3134     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3135             TII.get(TargetOpcode::COPY), ResultReg)
3136         .addReg(RVLocs[0].getLocReg());
3137     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3138
3139     CLI.ResultReg = ResultReg;
3140     CLI.NumResultRegs = 1;
3141   }
3142
3143   return true;
3144 }
3145
3146 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3147   CallingConv::ID CC  = CLI.CallConv;
3148   bool IsTailCall     = CLI.IsTailCall;
3149   bool IsVarArg       = CLI.IsVarArg;
3150   const Value *Callee = CLI.Callee;
3151   MCSymbol *Symbol = CLI.Symbol;
3152
3153   if (!Callee && !Symbol)
3154     return false;
3155
3156   // Allow SelectionDAG isel to handle tail calls.
3157   if (IsTailCall)
3158     return false;
3159
3160   CodeModel::Model CM = TM.getCodeModel();
3161   // Only support the small-addressing and large code models.
3162   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3163     return false;
3164
3165   // FIXME: Add large code model support for ELF.
3166   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3167     return false;
3168
3169   // Let SDISel handle vararg functions.
3170   if (IsVarArg)
3171     return false;
3172
3173   // FIXME: Only handle *simple* calls for now.
3174   MVT RetVT;
3175   if (CLI.RetTy->isVoidTy())
3176     RetVT = MVT::isVoid;
3177   else if (!isTypeLegal(CLI.RetTy, RetVT))
3178     return false;
3179
3180   for (auto Flag : CLI.OutFlags)
3181     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3182         Flag.isSwiftSelf() || Flag.isSwiftError())
3183       return false;
3184
3185   // Set up the argument vectors.
3186   SmallVector<MVT, 16> OutVTs;
3187   OutVTs.reserve(CLI.OutVals.size());
3188
3189   for (auto *Val : CLI.OutVals) {
3190     MVT VT;
3191     if (!isTypeLegal(Val->getType(), VT) &&
3192         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3193       return false;
3194
3195     // We don't handle vector parameters yet.
3196     if (VT.isVector() || VT.getSizeInBits() > 64)
3197       return false;
3198
3199     OutVTs.push_back(VT);
3200   }
3201
3202   Address Addr;
3203   if (Callee && !computeCallAddress(Callee, Addr))
3204     return false;
3205
3206   // Handle the arguments now that we've gotten them.
3207   unsigned NumBytes;
3208   if (!processCallArgs(CLI, OutVTs, NumBytes))
3209     return false;
3210
3211   // Issue the call.
3212   MachineInstrBuilder MIB;
3213   if (Subtarget->useSmallAddressing()) {
3214     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3215     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3216     if (Symbol)
3217       MIB.addSym(Symbol, 0);
3218     else if (Addr.getGlobalValue())
3219       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3220     else if (Addr.getReg()) {
3221       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3222       MIB.addReg(Reg);
3223     } else
3224       return false;
3225   } else {
3226     unsigned CallReg = 0;
3227     if (Symbol) {
3228       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3229       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3230               ADRPReg)
3231           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3232
3233       CallReg = createResultReg(&AArch64::GPR64RegClass);
3234       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3235               TII.get(AArch64::LDRXui), CallReg)
3236           .addReg(ADRPReg)
3237           .addSym(Symbol,
3238                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3239     } else if (Addr.getGlobalValue())
3240       CallReg = materializeGV(Addr.getGlobalValue());
3241     else if (Addr.getReg())
3242       CallReg = Addr.getReg();
3243
3244     if (!CallReg)
3245       return false;
3246
3247     const MCInstrDesc &II = TII.get(AArch64::BLR);
3248     CallReg = constrainOperandRegClass(II, CallReg, 0);
3249     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3250   }
3251
3252   // Add implicit physical register uses to the call.
3253   for (auto Reg : CLI.OutRegs)
3254     MIB.addReg(Reg, RegState::Implicit);
3255
3256   // Add a register mask with the call-preserved registers.
3257   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3258   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3259
3260   CLI.Call = MIB;
3261
3262   // Finish off the call including any return values.
3263   return finishCall(CLI, RetVT, NumBytes);
3264 }
3265
3266 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3267   if (Alignment)
3268     return Len / Alignment <= 4;
3269   else
3270     return Len < 32;
3271 }
3272
3273 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3274                                          uint64_t Len, unsigned Alignment) {
3275   // Make sure we don't bloat code by inlining very large memcpy's.
3276   if (!isMemCpySmall(Len, Alignment))
3277     return false;
3278
3279   int64_t UnscaledOffset = 0;
3280   Address OrigDest = Dest;
3281   Address OrigSrc = Src;
3282
3283   while (Len) {
3284     MVT VT;
3285     if (!Alignment || Alignment >= 8) {
3286       if (Len >= 8)
3287         VT = MVT::i64;
3288       else if (Len >= 4)
3289         VT = MVT::i32;
3290       else if (Len >= 2)
3291         VT = MVT::i16;
3292       else {
3293         VT = MVT::i8;
3294       }
3295     } else {
3296       // Bound based on alignment.
3297       if (Len >= 4 && Alignment == 4)
3298         VT = MVT::i32;
3299       else if (Len >= 2 && Alignment == 2)
3300         VT = MVT::i16;
3301       else {
3302         VT = MVT::i8;
3303       }
3304     }
3305
3306     unsigned ResultReg = emitLoad(VT, VT, Src);
3307     if (!ResultReg)
3308       return false;
3309
3310     if (!emitStore(VT, ResultReg, Dest))
3311       return false;
3312
3313     int64_t Size = VT.getSizeInBits() / 8;
3314     Len -= Size;
3315     UnscaledOffset += Size;
3316
3317     // We need to recompute the unscaled offset for each iteration.
3318     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3319     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3320   }
3321
3322   return true;
3323 }
3324
3325 /// Check if it is possible to fold the condition from the XALU intrinsic
3326 /// into the user. The condition code will only be updated on success.
3327 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3328                                         const Instruction *I,
3329                                         const Value *Cond) {
3330   if (!isa<ExtractValueInst>(Cond))
3331     return false;
3332
3333   const auto *EV = cast<ExtractValueInst>(Cond);
3334   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3335     return false;
3336
3337   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3338   MVT RetVT;
3339   const Function *Callee = II->getCalledFunction();
3340   Type *RetTy =
3341   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3342   if (!isTypeLegal(RetTy, RetVT))
3343     return false;
3344
3345   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3346     return false;
3347
3348   const Value *LHS = II->getArgOperand(0);
3349   const Value *RHS = II->getArgOperand(1);
3350
3351   // Canonicalize immediate to the RHS.
3352   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3353       isCommutativeIntrinsic(II))
3354     std::swap(LHS, RHS);
3355
3356   // Simplify multiplies.
3357   Intrinsic::ID IID = II->getIntrinsicID();
3358   switch (IID) {
3359   default:
3360     break;
3361   case Intrinsic::smul_with_overflow:
3362     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3363       if (C->getValue() == 2)
3364         IID = Intrinsic::sadd_with_overflow;
3365     break;
3366   case Intrinsic::umul_with_overflow:
3367     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3368       if (C->getValue() == 2)
3369         IID = Intrinsic::uadd_with_overflow;
3370     break;
3371   }
3372
3373   AArch64CC::CondCode TmpCC;
3374   switch (IID) {
3375   default:
3376     return false;
3377   case Intrinsic::sadd_with_overflow:
3378   case Intrinsic::ssub_with_overflow:
3379     TmpCC = AArch64CC::VS;
3380     break;
3381   case Intrinsic::uadd_with_overflow:
3382     TmpCC = AArch64CC::HS;
3383     break;
3384   case Intrinsic::usub_with_overflow:
3385     TmpCC = AArch64CC::LO;
3386     break;
3387   case Intrinsic::smul_with_overflow:
3388   case Intrinsic::umul_with_overflow:
3389     TmpCC = AArch64CC::NE;
3390     break;
3391   }
3392
3393   // Check if both instructions are in the same basic block.
3394   if (!isValueAvailable(II))
3395     return false;
3396
3397   // Make sure nothing is in the way
3398   BasicBlock::const_iterator Start(I);
3399   BasicBlock::const_iterator End(II);
3400   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3401     // We only expect extractvalue instructions between the intrinsic and the
3402     // instruction to be selected.
3403     if (!isa<ExtractValueInst>(Itr))
3404       return false;
3405
3406     // Check that the extractvalue operand comes from the intrinsic.
3407     const auto *EVI = cast<ExtractValueInst>(Itr);
3408     if (EVI->getAggregateOperand() != II)
3409       return false;
3410   }
3411
3412   CC = TmpCC;
3413   return true;
3414 }
3415
3416 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3417   // FIXME: Handle more intrinsics.
3418   switch (II->getIntrinsicID()) {
3419   default: return false;
3420   case Intrinsic::frameaddress: {
3421     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3422     MFI.setFrameAddressIsTaken(true);
3423
3424     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3425     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3426     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3427     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3428             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3429     // Recursively load frame address
3430     // ldr x0, [fp]
3431     // ldr x0, [x0]
3432     // ldr x0, [x0]
3433     // ...
3434     unsigned DestReg;
3435     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3436     while (Depth--) {
3437       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3438                                 SrcReg, /*IsKill=*/true, 0);
3439       assert(DestReg && "Unexpected LDR instruction emission failure.");
3440       SrcReg = DestReg;
3441     }
3442
3443     updateValueMap(II, SrcReg);
3444     return true;
3445   }
3446   case Intrinsic::memcpy:
3447   case Intrinsic::memmove: {
3448     const auto *MTI = cast<MemTransferInst>(II);
3449     // Don't handle volatile.
3450     if (MTI->isVolatile())
3451       return false;
3452
3453     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3454     // we would emit dead code because we don't currently handle memmoves.
3455     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3456     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3457       // Small memcpy's are common enough that we want to do them without a call
3458       // if possible.
3459       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3460       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3461                                     MTI->getSourceAlignment());
3462       if (isMemCpySmall(Len, Alignment)) {
3463         Address Dest, Src;
3464         if (!computeAddress(MTI->getRawDest(), Dest) ||
3465             !computeAddress(MTI->getRawSource(), Src))
3466           return false;
3467         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3468           return true;
3469       }
3470     }
3471
3472     if (!MTI->getLength()->getType()->isIntegerTy(64))
3473       return false;
3474
3475     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3476       // Fast instruction selection doesn't support the special
3477       // address spaces.
3478       return false;
3479
3480     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3481     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3482   }
3483   case Intrinsic::memset: {
3484     const MemSetInst *MSI = cast<MemSetInst>(II);
3485     // Don't handle volatile.
3486     if (MSI->isVolatile())
3487       return false;
3488
3489     if (!MSI->getLength()->getType()->isIntegerTy(64))
3490       return false;
3491
3492     if (MSI->getDestAddressSpace() > 255)
3493       // Fast instruction selection doesn't support the special
3494       // address spaces.
3495       return false;
3496
3497     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3498   }
3499   case Intrinsic::sin:
3500   case Intrinsic::cos:
3501   case Intrinsic::pow: {
3502     MVT RetVT;
3503     if (!isTypeLegal(II->getType(), RetVT))
3504       return false;
3505
3506     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3507       return false;
3508
3509     static const RTLIB::Libcall LibCallTable[3][2] = {
3510       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3511       { RTLIB::COS_F32, RTLIB::COS_F64 },
3512       { RTLIB::POW_F32, RTLIB::POW_F64 }
3513     };
3514     RTLIB::Libcall LC;
3515     bool Is64Bit = RetVT == MVT::f64;
3516     switch (II->getIntrinsicID()) {
3517     default:
3518       llvm_unreachable("Unexpected intrinsic.");
3519     case Intrinsic::sin:
3520       LC = LibCallTable[0][Is64Bit];
3521       break;
3522     case Intrinsic::cos:
3523       LC = LibCallTable[1][Is64Bit];
3524       break;
3525     case Intrinsic::pow:
3526       LC = LibCallTable[2][Is64Bit];
3527       break;
3528     }
3529
3530     ArgListTy Args;
3531     Args.reserve(II->getNumArgOperands());
3532
3533     // Populate the argument list.
3534     for (auto &Arg : II->arg_operands()) {
3535       ArgListEntry Entry;
3536       Entry.Val = Arg;
3537       Entry.Ty = Arg->getType();
3538       Args.push_back(Entry);
3539     }
3540
3541     CallLoweringInfo CLI;
3542     MCContext &Ctx = MF->getContext();
3543     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3544                   TLI.getLibcallName(LC), std::move(Args));
3545     if (!lowerCallTo(CLI))
3546       return false;
3547     updateValueMap(II, CLI.ResultReg);
3548     return true;
3549   }
3550   case Intrinsic::fabs: {
3551     MVT VT;
3552     if (!isTypeLegal(II->getType(), VT))
3553       return false;
3554
3555     unsigned Opc;
3556     switch (VT.SimpleTy) {
3557     default:
3558       return false;
3559     case MVT::f32:
3560       Opc = AArch64::FABSSr;
3561       break;
3562     case MVT::f64:
3563       Opc = AArch64::FABSDr;
3564       break;
3565     }
3566     unsigned SrcReg = getRegForValue(II->getOperand(0));
3567     if (!SrcReg)
3568       return false;
3569     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3570     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3571     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3572       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3573     updateValueMap(II, ResultReg);
3574     return true;
3575   }
3576   case Intrinsic::trap:
3577     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3578         .addImm(1);
3579     return true;
3580
3581   case Intrinsic::sqrt: {
3582     Type *RetTy = II->getCalledFunction()->getReturnType();
3583
3584     MVT VT;
3585     if (!isTypeLegal(RetTy, VT))
3586       return false;
3587
3588     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3589     if (!Op0Reg)
3590       return false;
3591     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3592
3593     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3594     if (!ResultReg)
3595       return false;
3596
3597     updateValueMap(II, ResultReg);
3598     return true;
3599   }
3600   case Intrinsic::sadd_with_overflow:
3601   case Intrinsic::uadd_with_overflow:
3602   case Intrinsic::ssub_with_overflow:
3603   case Intrinsic::usub_with_overflow:
3604   case Intrinsic::smul_with_overflow:
3605   case Intrinsic::umul_with_overflow: {
3606     // This implements the basic lowering of the xalu with overflow intrinsics.
3607     const Function *Callee = II->getCalledFunction();
3608     auto *Ty = cast<StructType>(Callee->getReturnType());
3609     Type *RetTy = Ty->getTypeAtIndex(0U);
3610
3611     MVT VT;
3612     if (!isTypeLegal(RetTy, VT))
3613       return false;
3614
3615     if (VT != MVT::i32 && VT != MVT::i64)
3616       return false;
3617
3618     const Value *LHS = II->getArgOperand(0);
3619     const Value *RHS = II->getArgOperand(1);
3620     // Canonicalize immediate to the RHS.
3621     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3622         isCommutativeIntrinsic(II))
3623       std::swap(LHS, RHS);
3624
3625     // Simplify multiplies.
3626     Intrinsic::ID IID = II->getIntrinsicID();
3627     switch (IID) {
3628     default:
3629       break;
3630     case Intrinsic::smul_with_overflow:
3631       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3632         if (C->getValue() == 2) {
3633           IID = Intrinsic::sadd_with_overflow;
3634           RHS = LHS;
3635         }
3636       break;
3637     case Intrinsic::umul_with_overflow:
3638       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3639         if (C->getValue() == 2) {
3640           IID = Intrinsic::uadd_with_overflow;
3641           RHS = LHS;
3642         }
3643       break;
3644     }
3645
3646     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3647     AArch64CC::CondCode CC = AArch64CC::Invalid;
3648     switch (IID) {
3649     default: llvm_unreachable("Unexpected intrinsic!");
3650     case Intrinsic::sadd_with_overflow:
3651       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3652       CC = AArch64CC::VS;
3653       break;
3654     case Intrinsic::uadd_with_overflow:
3655       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3656       CC = AArch64CC::HS;
3657       break;
3658     case Intrinsic::ssub_with_overflow:
3659       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3660       CC = AArch64CC::VS;
3661       break;
3662     case Intrinsic::usub_with_overflow:
3663       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3664       CC = AArch64CC::LO;
3665       break;
3666     case Intrinsic::smul_with_overflow: {
3667       CC = AArch64CC::NE;
3668       unsigned LHSReg = getRegForValue(LHS);
3669       if (!LHSReg)
3670         return false;
3671       bool LHSIsKill = hasTrivialKill(LHS);
3672
3673       unsigned RHSReg = getRegForValue(RHS);
3674       if (!RHSReg)
3675         return false;
3676       bool RHSIsKill = hasTrivialKill(RHS);
3677
3678       if (VT == MVT::i32) {
3679         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3680         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3681                                        /*IsKill=*/false, 32);
3682         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3683                                             AArch64::sub_32);
3684         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3685                                               AArch64::sub_32);
3686         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3687                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3688       } else {
3689         assert(VT == MVT::i64 && "Unexpected value type.");
3690         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3691         // reused in the next instruction.
3692         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3693                             /*IsKill=*/false);
3694         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3695                                         RHSReg, RHSIsKill);
3696         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3697                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3698       }
3699       break;
3700     }
3701     case Intrinsic::umul_with_overflow: {
3702       CC = AArch64CC::NE;
3703       unsigned LHSReg = getRegForValue(LHS);
3704       if (!LHSReg)
3705         return false;
3706       bool LHSIsKill = hasTrivialKill(LHS);
3707
3708       unsigned RHSReg = getRegForValue(RHS);
3709       if (!RHSReg)
3710         return false;
3711       bool RHSIsKill = hasTrivialKill(RHS);
3712
3713       if (VT == MVT::i32) {
3714         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3716                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3717                     /*WantResult=*/false);
3718         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3719                                             AArch64::sub_32);
3720       } else {
3721         assert(VT == MVT::i64 && "Unexpected value type.");
3722         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3723         // reused in the next instruction.
3724         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3725                             /*IsKill=*/false);
3726         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3727                                         RHSReg, RHSIsKill);
3728         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3729                     /*IsKill=*/false, /*WantResult=*/false);
3730       }
3731       break;
3732     }
3733     }
3734
3735     if (MulReg) {
3736       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3737       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3738               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3739     }
3740
3741     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3742                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3743                                   /*IsKill=*/true, getInvertedCondCode(CC));
3744     (void)ResultReg2;
3745     assert((ResultReg1 + 1) == ResultReg2 &&
3746            "Nonconsecutive result registers.");
3747     updateValueMap(II, ResultReg1, 2);
3748     return true;
3749   }
3750   }
3751   return false;
3752 }
3753
3754 bool AArch64FastISel::selectRet(const Instruction *I) {
3755   const ReturnInst *Ret = cast<ReturnInst>(I);
3756   const Function &F = *I->getParent()->getParent();
3757
3758   if (!FuncInfo.CanLowerReturn)
3759     return false;
3760
3761   if (F.isVarArg())
3762     return false;
3763
3764   if (TLI.supportSwiftError() &&
3765       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3766     return false;
3767
3768   if (TLI.supportSplitCSR(FuncInfo.MF))
3769     return false;
3770
3771   // Build a list of return value registers.
3772   SmallVector<unsigned, 4> RetRegs;
3773
3774   if (Ret->getNumOperands() > 0) {
3775     CallingConv::ID CC = F.getCallingConv();
3776     SmallVector<ISD::OutputArg, 4> Outs;
3777     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3778
3779     // Analyze operands of the call, assigning locations to each operand.
3780     SmallVector<CCValAssign, 16> ValLocs;
3781     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3782     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3783                                                      : RetCC_AArch64_AAPCS;
3784     CCInfo.AnalyzeReturn(Outs, RetCC);
3785
3786     // Only handle a single return value for now.
3787     if (ValLocs.size() != 1)
3788       return false;
3789
3790     CCValAssign &VA = ValLocs[0];
3791     const Value *RV = Ret->getOperand(0);
3792
3793     // Don't bother handling odd stuff for now.
3794     if ((VA.getLocInfo() != CCValAssign::Full) &&
3795         (VA.getLocInfo() != CCValAssign::BCvt))
3796       return false;
3797
3798     // Only handle register returns for now.
3799     if (!VA.isRegLoc())
3800       return false;
3801
3802     unsigned Reg = getRegForValue(RV);
3803     if (Reg == 0)
3804       return false;
3805
3806     unsigned SrcReg = Reg + VA.getValNo();
3807     unsigned DestReg = VA.getLocReg();
3808     // Avoid a cross-class copy. This is very unlikely.
3809     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3810       return false;
3811
3812     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3813     if (!RVEVT.isSimple())
3814       return false;
3815
3816     // Vectors (of > 1 lane) in big endian need tricky handling.
3817     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3818         !Subtarget->isLittleEndian())
3819       return false;
3820
3821     MVT RVVT = RVEVT.getSimpleVT();
3822     if (RVVT == MVT::f128)
3823       return false;
3824
3825     MVT DestVT = VA.getValVT();
3826     // Special handling for extended integers.
3827     if (RVVT != DestVT) {
3828       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3829         return false;
3830
3831       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3832         return false;
3833
3834       bool IsZExt = Outs[0].Flags.isZExt();
3835       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3836       if (SrcReg == 0)
3837         return false;
3838     }
3839
3840     // Make the copy.
3841     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3842             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3843
3844     // Add register to return instruction.
3845     RetRegs.push_back(VA.getLocReg());
3846   }
3847
3848   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3849                                     TII.get(AArch64::RET_ReallyLR));
3850   for (unsigned RetReg : RetRegs)
3851     MIB.addReg(RetReg, RegState::Implicit);
3852   return true;
3853 }
3854
3855 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3856   Type *DestTy = I->getType();
3857   Value *Op = I->getOperand(0);
3858   Type *SrcTy = Op->getType();
3859
3860   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3861   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3862   if (!SrcEVT.isSimple())
3863     return false;
3864   if (!DestEVT.isSimple())
3865     return false;
3866
3867   MVT SrcVT = SrcEVT.getSimpleVT();
3868   MVT DestVT = DestEVT.getSimpleVT();
3869
3870   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3871       SrcVT != MVT::i8)
3872     return false;
3873   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3874       DestVT != MVT::i1)
3875     return false;
3876
3877   unsigned SrcReg = getRegForValue(Op);
3878   if (!SrcReg)
3879     return false;
3880   bool SrcIsKill = hasTrivialKill(Op);
3881
3882   // If we're truncating from i64 to a smaller non-legal type then generate an
3883   // AND. Otherwise, we know the high bits are undefined and a truncate only
3884   // generate a COPY. We cannot mark the source register also as result
3885   // register, because this can incorrectly transfer the kill flag onto the
3886   // source register.
3887   unsigned ResultReg;
3888   if (SrcVT == MVT::i64) {
3889     uint64_t Mask = 0;
3890     switch (DestVT.SimpleTy) {
3891     default:
3892       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3893       return false;
3894     case MVT::i1:
3895       Mask = 0x1;
3896       break;
3897     case MVT::i8:
3898       Mask = 0xff;
3899       break;
3900     case MVT::i16:
3901       Mask = 0xffff;
3902       break;
3903     }
3904     // Issue an extract_subreg to get the lower 32-bits.
3905     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3906                                                 AArch64::sub_32);
3907     // Create the AND instruction which performs the actual truncation.
3908     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3909     assert(ResultReg && "Unexpected AND instruction emission failure.");
3910   } else {
3911     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3912     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3913             TII.get(TargetOpcode::COPY), ResultReg)
3914         .addReg(SrcReg, getKillRegState(SrcIsKill));
3915   }
3916
3917   updateValueMap(I, ResultReg);
3918   return true;
3919 }
3920
3921 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3922   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3923           DestVT == MVT::i64) &&
3924          "Unexpected value type.");
3925   // Handle i8 and i16 as i32.
3926   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3927     DestVT = MVT::i32;
3928
3929   if (IsZExt) {
3930     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3931     assert(ResultReg && "Unexpected AND instruction emission failure.");
3932     if (DestVT == MVT::i64) {
3933       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3934       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3935       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3936       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3937               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3938           .addImm(0)
3939           .addReg(ResultReg)
3940           .addImm(AArch64::sub_32);
3941       ResultReg = Reg64;
3942     }
3943     return ResultReg;
3944   } else {
3945     if (DestVT == MVT::i64) {
3946       // FIXME: We're SExt i1 to i64.
3947       return 0;
3948     }
3949     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3950                             /*TODO:IsKill=*/false, 0, 0);
3951   }
3952 }
3953
3954 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3955                                       unsigned Op1, bool Op1IsKill) {
3956   unsigned Opc, ZReg;
3957   switch (RetVT.SimpleTy) {
3958   default: return 0;
3959   case MVT::i8:
3960   case MVT::i16:
3961   case MVT::i32:
3962     RetVT = MVT::i32;
3963     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3964   case MVT::i64:
3965     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3966   }
3967
3968   const TargetRegisterClass *RC =
3969       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3970   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3971                           /*IsKill=*/ZReg, true);
3972 }
3973
3974 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3975                                         unsigned Op1, bool Op1IsKill) {
3976   if (RetVT != MVT::i64)
3977     return 0;
3978
3979   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3980                           Op0, Op0IsKill, Op1, Op1IsKill,
3981                           AArch64::XZR, /*IsKill=*/true);
3982 }
3983
3984 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3985                                         unsigned Op1, bool Op1IsKill) {
3986   if (RetVT != MVT::i64)
3987     return 0;
3988
3989   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3990                           Op0, Op0IsKill, Op1, Op1IsKill,
3991                           AArch64::XZR, /*IsKill=*/true);
3992 }
3993
3994 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3995                                      unsigned Op1Reg, bool Op1IsKill) {
3996   unsigned Opc = 0;
3997   bool NeedTrunc = false;
3998   uint64_t Mask = 0;
3999   switch (RetVT.SimpleTy) {
4000   default: return 0;
4001   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4002   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4003   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4004   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4005   }
4006
4007   const TargetRegisterClass *RC =
4008       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4009   if (NeedTrunc) {
4010     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4011     Op1IsKill = true;
4012   }
4013   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4014                                        Op1IsKill);
4015   if (NeedTrunc)
4016     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4017   return ResultReg;
4018 }
4019
4020 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4021                                      bool Op0IsKill, uint64_t Shift,
4022                                      bool IsZExt) {
4023   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4024          "Unexpected source/return type pair.");
4025   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4026           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4027          "Unexpected source value type.");
4028   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4029           RetVT == MVT::i64) && "Unexpected return value type.");
4030
4031   bool Is64Bit = (RetVT == MVT::i64);
4032   unsigned RegSize = Is64Bit ? 64 : 32;
4033   unsigned DstBits = RetVT.getSizeInBits();
4034   unsigned SrcBits = SrcVT.getSizeInBits();
4035   const TargetRegisterClass *RC =
4036       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4037
4038   // Just emit a copy for "zero" shifts.
4039   if (Shift == 0) {
4040     if (RetVT == SrcVT) {
4041       unsigned ResultReg = createResultReg(RC);
4042       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4043               TII.get(TargetOpcode::COPY), ResultReg)
4044           .addReg(Op0, getKillRegState(Op0IsKill));
4045       return ResultReg;
4046     } else
4047       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4048   }
4049
4050   // Don't deal with undefined shifts.
4051   if (Shift >= DstBits)
4052     return 0;
4053
4054   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4055   // {S|U}BFM Wd, Wn, #r, #s
4056   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4057
4058   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4059   // %2 = shl i16 %1, 4
4060   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4061   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4062   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4063   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4064
4065   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4066   // %2 = shl i16 %1, 8
4067   // Wd<32+7-24,32-24> = Wn<7:0>
4068   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4069   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4070   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4071
4072   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4073   // %2 = shl i16 %1, 12
4074   // Wd<32+3-20,32-20> = Wn<3:0>
4075   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4076   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4077   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4078
4079   unsigned ImmR = RegSize - Shift;
4080   // Limit the width to the length of the source type.
4081   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4082   static const unsigned OpcTable[2][2] = {
4083     {AArch64::SBFMWri, AArch64::SBFMXri},
4084     {AArch64::UBFMWri, AArch64::UBFMXri}
4085   };
4086   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4087   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4088     unsigned TmpReg = MRI.createVirtualRegister(RC);
4089     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4090             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4091         .addImm(0)
4092         .addReg(Op0, getKillRegState(Op0IsKill))
4093         .addImm(AArch64::sub_32);
4094     Op0 = TmpReg;
4095     Op0IsKill = true;
4096   }
4097   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4098 }
4099
4100 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4101                                      unsigned Op1Reg, bool Op1IsKill) {
4102   unsigned Opc = 0;
4103   bool NeedTrunc = false;
4104   uint64_t Mask = 0;
4105   switch (RetVT.SimpleTy) {
4106   default: return 0;
4107   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4108   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4109   case MVT::i32: Opc = AArch64::LSRVWr; break;
4110   case MVT::i64: Opc = AArch64::LSRVXr; break;
4111   }
4112
4113   const TargetRegisterClass *RC =
4114       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4115   if (NeedTrunc) {
4116     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4117     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4118     Op0IsKill = Op1IsKill = true;
4119   }
4120   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4121                                        Op1IsKill);
4122   if (NeedTrunc)
4123     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4124   return ResultReg;
4125 }
4126
4127 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4128                                      bool Op0IsKill, uint64_t Shift,
4129                                      bool IsZExt) {
4130   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4131          "Unexpected source/return type pair.");
4132   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4133           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4134          "Unexpected source value type.");
4135   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4136           RetVT == MVT::i64) && "Unexpected return value type.");
4137
4138   bool Is64Bit = (RetVT == MVT::i64);
4139   unsigned RegSize = Is64Bit ? 64 : 32;
4140   unsigned DstBits = RetVT.getSizeInBits();
4141   unsigned SrcBits = SrcVT.getSizeInBits();
4142   const TargetRegisterClass *RC =
4143       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4144
4145   // Just emit a copy for "zero" shifts.
4146   if (Shift == 0) {
4147     if (RetVT == SrcVT) {
4148       unsigned ResultReg = createResultReg(RC);
4149       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4150               TII.get(TargetOpcode::COPY), ResultReg)
4151       .addReg(Op0, getKillRegState(Op0IsKill));
4152       return ResultReg;
4153     } else
4154       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4155   }
4156
4157   // Don't deal with undefined shifts.
4158   if (Shift >= DstBits)
4159     return 0;
4160
4161   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4162   // {S|U}BFM Wd, Wn, #r, #s
4163   // Wd<s-r:0> = Wn<s:r> when r <= s
4164
4165   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4166   // %2 = lshr i16 %1, 4
4167   // Wd<7-4:0> = Wn<7:4>
4168   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4169   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4170   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4171
4172   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4173   // %2 = lshr i16 %1, 8
4174   // Wd<7-7,0> = Wn<7:7>
4175   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4176   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4177   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4178
4179   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4180   // %2 = lshr i16 %1, 12
4181   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4182   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4183   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4184   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4185
4186   if (Shift >= SrcBits && IsZExt)
4187     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4188
4189   // It is not possible to fold a sign-extend into the LShr instruction. In this
4190   // case emit a sign-extend.
4191   if (!IsZExt) {
4192     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193     if (!Op0)
4194       return 0;
4195     Op0IsKill = true;
4196     SrcVT = RetVT;
4197     SrcBits = SrcVT.getSizeInBits();
4198     IsZExt = true;
4199   }
4200
4201   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4202   unsigned ImmS = SrcBits - 1;
4203   static const unsigned OpcTable[2][2] = {
4204     {AArch64::SBFMWri, AArch64::SBFMXri},
4205     {AArch64::UBFMWri, AArch64::UBFMXri}
4206   };
4207   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4208   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4209     unsigned TmpReg = MRI.createVirtualRegister(RC);
4210     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4211             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4212         .addImm(0)
4213         .addReg(Op0, getKillRegState(Op0IsKill))
4214         .addImm(AArch64::sub_32);
4215     Op0 = TmpReg;
4216     Op0IsKill = true;
4217   }
4218   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4219 }
4220
4221 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4222                                      unsigned Op1Reg, bool Op1IsKill) {
4223   unsigned Opc = 0;
4224   bool NeedTrunc = false;
4225   uint64_t Mask = 0;
4226   switch (RetVT.SimpleTy) {
4227   default: return 0;
4228   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4229   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4230   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4231   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4232   }
4233
4234   const TargetRegisterClass *RC =
4235       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4236   if (NeedTrunc) {
4237     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4238     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4239     Op0IsKill = Op1IsKill = true;
4240   }
4241   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4242                                        Op1IsKill);
4243   if (NeedTrunc)
4244     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4245   return ResultReg;
4246 }
4247
4248 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4249                                      bool Op0IsKill, uint64_t Shift,
4250                                      bool IsZExt) {
4251   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4252          "Unexpected source/return type pair.");
4253   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4254           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4255          "Unexpected source value type.");
4256   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4257           RetVT == MVT::i64) && "Unexpected return value type.");
4258
4259   bool Is64Bit = (RetVT == MVT::i64);
4260   unsigned RegSize = Is64Bit ? 64 : 32;
4261   unsigned DstBits = RetVT.getSizeInBits();
4262   unsigned SrcBits = SrcVT.getSizeInBits();
4263   const TargetRegisterClass *RC =
4264       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4265
4266   // Just emit a copy for "zero" shifts.
4267   if (Shift == 0) {
4268     if (RetVT == SrcVT) {
4269       unsigned ResultReg = createResultReg(RC);
4270       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4271               TII.get(TargetOpcode::COPY), ResultReg)
4272       .addReg(Op0, getKillRegState(Op0IsKill));
4273       return ResultReg;
4274     } else
4275       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4276   }
4277
4278   // Don't deal with undefined shifts.
4279   if (Shift >= DstBits)
4280     return 0;
4281
4282   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4283   // {S|U}BFM Wd, Wn, #r, #s
4284   // Wd<s-r:0> = Wn<s:r> when r <= s
4285
4286   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4287   // %2 = ashr i16 %1, 4
4288   // Wd<7-4:0> = Wn<7:4>
4289   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4290   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4291   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4292
4293   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4294   // %2 = ashr i16 %1, 8
4295   // Wd<7-7,0> = Wn<7:7>
4296   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4297   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4298   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4299
4300   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4301   // %2 = ashr i16 %1, 12
4302   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4303   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4304   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4305   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4306
4307   if (Shift >= SrcBits && IsZExt)
4308     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4309
4310   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4311   unsigned ImmS = SrcBits - 1;
4312   static const unsigned OpcTable[2][2] = {
4313     {AArch64::SBFMWri, AArch64::SBFMXri},
4314     {AArch64::UBFMWri, AArch64::UBFMXri}
4315   };
4316   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4317   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4318     unsigned TmpReg = MRI.createVirtualRegister(RC);
4319     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4320             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4321         .addImm(0)
4322         .addReg(Op0, getKillRegState(Op0IsKill))
4323         .addImm(AArch64::sub_32);
4324     Op0 = TmpReg;
4325     Op0IsKill = true;
4326   }
4327   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4328 }
4329
4330 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4331                                      bool IsZExt) {
4332   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4333
4334   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4335   // DestVT are odd things, so test to make sure that they are both types we can
4336   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4337   // bail out to SelectionDAG.
4338   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4339        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4340       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4341        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4342     return 0;
4343
4344   unsigned Opc;
4345   unsigned Imm = 0;
4346
4347   switch (SrcVT.SimpleTy) {
4348   default:
4349     return 0;
4350   case MVT::i1:
4351     return emiti1Ext(SrcReg, DestVT, IsZExt);
4352   case MVT::i8:
4353     if (DestVT == MVT::i64)
4354       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4355     else
4356       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4357     Imm = 7;
4358     break;
4359   case MVT::i16:
4360     if (DestVT == MVT::i64)
4361       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4362     else
4363       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4364     Imm = 15;
4365     break;
4366   case MVT::i32:
4367     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4368     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4369     Imm = 31;
4370     break;
4371   }
4372
4373   // Handle i8 and i16 as i32.
4374   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4375     DestVT = MVT::i32;
4376   else if (DestVT == MVT::i64) {
4377     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4378     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4379             TII.get(AArch64::SUBREG_TO_REG), Src64)
4380         .addImm(0)
4381         .addReg(SrcReg)
4382         .addImm(AArch64::sub_32);
4383     SrcReg = Src64;
4384   }
4385
4386   const TargetRegisterClass *RC =
4387       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4388   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4389 }
4390
4391 static bool isZExtLoad(const MachineInstr *LI) {
4392   switch (LI->getOpcode()) {
4393   default:
4394     return false;
4395   case AArch64::LDURBBi:
4396   case AArch64::LDURHHi:
4397   case AArch64::LDURWi:
4398   case AArch64::LDRBBui:
4399   case AArch64::LDRHHui:
4400   case AArch64::LDRWui:
4401   case AArch64::LDRBBroX:
4402   case AArch64::LDRHHroX:
4403   case AArch64::LDRWroX:
4404   case AArch64::LDRBBroW:
4405   case AArch64::LDRHHroW:
4406   case AArch64::LDRWroW:
4407     return true;
4408   }
4409 }
4410
4411 static bool isSExtLoad(const MachineInstr *LI) {
4412   switch (LI->getOpcode()) {
4413   default:
4414     return false;
4415   case AArch64::LDURSBWi:
4416   case AArch64::LDURSHWi:
4417   case AArch64::LDURSBXi:
4418   case AArch64::LDURSHXi:
4419   case AArch64::LDURSWi:
4420   case AArch64::LDRSBWui:
4421   case AArch64::LDRSHWui:
4422   case AArch64::LDRSBXui:
4423   case AArch64::LDRSHXui:
4424   case AArch64::LDRSWui:
4425   case AArch64::LDRSBWroX:
4426   case AArch64::LDRSHWroX:
4427   case AArch64::LDRSBXroX:
4428   case AArch64::LDRSHXroX:
4429   case AArch64::LDRSWroX:
4430   case AArch64::LDRSBWroW:
4431   case AArch64::LDRSHWroW:
4432   case AArch64::LDRSBXroW:
4433   case AArch64::LDRSHXroW:
4434   case AArch64::LDRSWroW:
4435     return true;
4436   }
4437 }
4438
4439 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4440                                          MVT SrcVT) {
4441   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4442   if (!LI || !LI->hasOneUse())
4443     return false;
4444
4445   // Check if the load instruction has already been selected.
4446   unsigned Reg = lookUpRegForValue(LI);
4447   if (!Reg)
4448     return false;
4449
4450   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4451   if (!MI)
4452     return false;
4453
4454   // Check if the correct load instruction has been emitted - SelectionDAG might
4455   // have emitted a zero-extending load, but we need a sign-extending load.
4456   bool IsZExt = isa<ZExtInst>(I);
4457   const auto *LoadMI = MI;
4458   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4459       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4460     unsigned LoadReg = MI->getOperand(1).getReg();
4461     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4462     assert(LoadMI && "Expected valid instruction");
4463   }
4464   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4465     return false;
4466
4467   // Nothing to be done.
4468   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4469     updateValueMap(I, Reg);
4470     return true;
4471   }
4472
4473   if (IsZExt) {
4474     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4476             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4477         .addImm(0)
4478         .addReg(Reg, getKillRegState(true))
4479         .addImm(AArch64::sub_32);
4480     Reg = Reg64;
4481   } else {
4482     assert((MI->getOpcode() == TargetOpcode::COPY &&
4483             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4484            "Expected copy instruction");
4485     Reg = MI->getOperand(1).getReg();
4486     MI->eraseFromParent();
4487   }
4488   updateValueMap(I, Reg);
4489   return true;
4490 }
4491
4492 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4493   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4494          "Unexpected integer extend instruction.");
4495   MVT RetVT;
4496   MVT SrcVT;
4497   if (!isTypeSupported(I->getType(), RetVT))
4498     return false;
4499
4500   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4501     return false;
4502
4503   // Try to optimize already sign-/zero-extended values from load instructions.
4504   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4505     return true;
4506
4507   unsigned SrcReg = getRegForValue(I->getOperand(0));
4508   if (!SrcReg)
4509     return false;
4510   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4511
4512   // Try to optimize already sign-/zero-extended values from function arguments.
4513   bool IsZExt = isa<ZExtInst>(I);
4514   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4515     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4516       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4517         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4518         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4519                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4520             .addImm(0)
4521             .addReg(SrcReg, getKillRegState(SrcIsKill))
4522             .addImm(AArch64::sub_32);
4523         SrcReg = ResultReg;
4524       }
4525       // Conservatively clear all kill flags from all uses, because we are
4526       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4527       // level. The result of the instruction at IR level might have been
4528       // trivially dead, which is now not longer true.
4529       unsigned UseReg = lookUpRegForValue(I);
4530       if (UseReg)
4531         MRI.clearKillFlags(UseReg);
4532
4533       updateValueMap(I, SrcReg);
4534       return true;
4535     }
4536   }
4537
4538   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4539   if (!ResultReg)
4540     return false;
4541
4542   updateValueMap(I, ResultReg);
4543   return true;
4544 }
4545
4546 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4547   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4548   if (!DestEVT.isSimple())
4549     return false;
4550
4551   MVT DestVT = DestEVT.getSimpleVT();
4552   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4553     return false;
4554
4555   unsigned DivOpc;
4556   bool Is64bit = (DestVT == MVT::i64);
4557   switch (ISDOpcode) {
4558   default:
4559     return false;
4560   case ISD::SREM:
4561     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4562     break;
4563   case ISD::UREM:
4564     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4565     break;
4566   }
4567   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4568   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4569   if (!Src0Reg)
4570     return false;
4571   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4572
4573   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4574   if (!Src1Reg)
4575     return false;
4576   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4577
4578   const TargetRegisterClass *RC =
4579       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4580   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4581                                      Src1Reg, /*IsKill=*/false);
4582   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4583   // The remainder is computed as numerator - (quotient * denominator) using the
4584   // MSUB instruction.
4585   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4586                                         Src1Reg, Src1IsKill, Src0Reg,
4587                                         Src0IsKill);
4588   updateValueMap(I, ResultReg);
4589   return true;
4590 }
4591
4592 bool AArch64FastISel::selectMul(const Instruction *I) {
4593   MVT VT;
4594   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4595     return false;
4596
4597   if (VT.isVector())
4598     return selectBinaryOp(I, ISD::MUL);
4599
4600   const Value *Src0 = I->getOperand(0);
4601   const Value *Src1 = I->getOperand(1);
4602   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4603     if (C->getValue().isPowerOf2())
4604       std::swap(Src0, Src1);
4605
4606   // Try to simplify to a shift instruction.
4607   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4608     if (C->getValue().isPowerOf2()) {
4609       uint64_t ShiftVal = C->getValue().logBase2();
4610       MVT SrcVT = VT;
4611       bool IsZExt = true;
4612       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4613         if (!isIntExtFree(ZExt)) {
4614           MVT VT;
4615           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4616             SrcVT = VT;
4617             IsZExt = true;
4618             Src0 = ZExt->getOperand(0);
4619           }
4620         }
4621       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4622         if (!isIntExtFree(SExt)) {
4623           MVT VT;
4624           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4625             SrcVT = VT;
4626             IsZExt = false;
4627             Src0 = SExt->getOperand(0);
4628           }
4629         }
4630       }
4631
4632       unsigned Src0Reg = getRegForValue(Src0);
4633       if (!Src0Reg)
4634         return false;
4635       bool Src0IsKill = hasTrivialKill(Src0);
4636
4637       unsigned ResultReg =
4638           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4639
4640       if (ResultReg) {
4641         updateValueMap(I, ResultReg);
4642         return true;
4643       }
4644     }
4645
4646   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4647   if (!Src0Reg)
4648     return false;
4649   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4650
4651   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4652   if (!Src1Reg)
4653     return false;
4654   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4655
4656   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4657
4658   if (!ResultReg)
4659     return false;
4660
4661   updateValueMap(I, ResultReg);
4662   return true;
4663 }
4664
4665 bool AArch64FastISel::selectShift(const Instruction *I) {
4666   MVT RetVT;
4667   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4668     return false;
4669
4670   if (RetVT.isVector())
4671     return selectOperator(I, I->getOpcode());
4672
4673   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4674     unsigned ResultReg = 0;
4675     uint64_t ShiftVal = C->getZExtValue();
4676     MVT SrcVT = RetVT;
4677     bool IsZExt = I->getOpcode() != Instruction::AShr;
4678     const Value *Op0 = I->getOperand(0);
4679     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4680       if (!isIntExtFree(ZExt)) {
4681         MVT TmpVT;
4682         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4683           SrcVT = TmpVT;
4684           IsZExt = true;
4685           Op0 = ZExt->getOperand(0);
4686         }
4687       }
4688     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4689       if (!isIntExtFree(SExt)) {
4690         MVT TmpVT;
4691         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4692           SrcVT = TmpVT;
4693           IsZExt = false;
4694           Op0 = SExt->getOperand(0);
4695         }
4696       }
4697     }
4698
4699     unsigned Op0Reg = getRegForValue(Op0);
4700     if (!Op0Reg)
4701       return false;
4702     bool Op0IsKill = hasTrivialKill(Op0);
4703
4704     switch (I->getOpcode()) {
4705     default: llvm_unreachable("Unexpected instruction.");
4706     case Instruction::Shl:
4707       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4708       break;
4709     case Instruction::AShr:
4710       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4711       break;
4712     case Instruction::LShr:
4713       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4714       break;
4715     }
4716     if (!ResultReg)
4717       return false;
4718
4719     updateValueMap(I, ResultReg);
4720     return true;
4721   }
4722
4723   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4724   if (!Op0Reg)
4725     return false;
4726   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4727
4728   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4729   if (!Op1Reg)
4730     return false;
4731   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4732
4733   unsigned ResultReg = 0;
4734   switch (I->getOpcode()) {
4735   default: llvm_unreachable("Unexpected instruction.");
4736   case Instruction::Shl:
4737     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4738     break;
4739   case Instruction::AShr:
4740     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4741     break;
4742   case Instruction::LShr:
4743     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4744     break;
4745   }
4746
4747   if (!ResultReg)
4748     return false;
4749
4750   updateValueMap(I, ResultReg);
4751   return true;
4752 }
4753
4754 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4755   MVT RetVT, SrcVT;
4756
4757   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4758     return false;
4759   if (!isTypeLegal(I->getType(), RetVT))
4760     return false;
4761
4762   unsigned Opc;
4763   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4764     Opc = AArch64::FMOVWSr;
4765   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4766     Opc = AArch64::FMOVXDr;
4767   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4768     Opc = AArch64::FMOVSWr;
4769   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4770     Opc = AArch64::FMOVDXr;
4771   else
4772     return false;
4773
4774   const TargetRegisterClass *RC = nullptr;
4775   switch (RetVT.SimpleTy) {
4776   default: llvm_unreachable("Unexpected value type.");
4777   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4778   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4779   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4780   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4781   }
4782   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4783   if (!Op0Reg)
4784     return false;
4785   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4786   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4787
4788   if (!ResultReg)
4789     return false;
4790
4791   updateValueMap(I, ResultReg);
4792   return true;
4793 }
4794
4795 bool AArch64FastISel::selectFRem(const Instruction *I) {
4796   MVT RetVT;
4797   if (!isTypeLegal(I->getType(), RetVT))
4798     return false;
4799
4800   RTLIB::Libcall LC;
4801   switch (RetVT.SimpleTy) {
4802   default:
4803     return false;
4804   case MVT::f32:
4805     LC = RTLIB::REM_F32;
4806     break;
4807   case MVT::f64:
4808     LC = RTLIB::REM_F64;
4809     break;
4810   }
4811
4812   ArgListTy Args;
4813   Args.reserve(I->getNumOperands());
4814
4815   // Populate the argument list.
4816   for (auto &Arg : I->operands()) {
4817     ArgListEntry Entry;
4818     Entry.Val = Arg;
4819     Entry.Ty = Arg->getType();
4820     Args.push_back(Entry);
4821   }
4822
4823   CallLoweringInfo CLI;
4824   MCContext &Ctx = MF->getContext();
4825   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4826                 TLI.getLibcallName(LC), std::move(Args));
4827   if (!lowerCallTo(CLI))
4828     return false;
4829   updateValueMap(I, CLI.ResultReg);
4830   return true;
4831 }
4832
4833 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4834   MVT VT;
4835   if (!isTypeLegal(I->getType(), VT))
4836     return false;
4837
4838   if (!isa<ConstantInt>(I->getOperand(1)))
4839     return selectBinaryOp(I, ISD::SDIV);
4840
4841   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4842   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4843       !(C.isPowerOf2() || (-C).isPowerOf2()))
4844     return selectBinaryOp(I, ISD::SDIV);
4845
4846   unsigned Lg2 = C.countTrailingZeros();
4847   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4848   if (!Src0Reg)
4849     return false;
4850   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4851
4852   if (cast<BinaryOperator>(I)->isExact()) {
4853     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4854     if (!ResultReg)
4855       return false;
4856     updateValueMap(I, ResultReg);
4857     return true;
4858   }
4859
4860   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4861   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4862   if (!AddReg)
4863     return false;
4864
4865   // (Src0 < 0) ? Pow2 - 1 : 0;
4866   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4867     return false;
4868
4869   unsigned SelectOpc;
4870   const TargetRegisterClass *RC;
4871   if (VT == MVT::i64) {
4872     SelectOpc = AArch64::CSELXr;
4873     RC = &AArch64::GPR64RegClass;
4874   } else {
4875     SelectOpc = AArch64::CSELWr;
4876     RC = &AArch64::GPR32RegClass;
4877   }
4878   unsigned SelectReg =
4879       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4880                        Src0IsKill, AArch64CC::LT);
4881   if (!SelectReg)
4882     return false;
4883
4884   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4885   // negate the result.
4886   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4887   unsigned ResultReg;
4888   if (C.isNegative())
4889     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4890                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4891   else
4892     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4893
4894   if (!ResultReg)
4895     return false;
4896
4897   updateValueMap(I, ResultReg);
4898   return true;
4899 }
4900
4901 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4902 /// have to duplicate it for AArch64, because otherwise we would fail during the
4903 /// sign-extend emission.
4904 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4905   unsigned IdxN = getRegForValue(Idx);
4906   if (IdxN == 0)
4907     // Unhandled operand. Halt "fast" selection and bail.
4908     return std::pair<unsigned, bool>(0, false);
4909
4910   bool IdxNIsKill = hasTrivialKill(Idx);
4911
4912   // If the index is smaller or larger than intptr_t, truncate or extend it.
4913   MVT PtrVT = TLI.getPointerTy(DL);
4914   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4915   if (IdxVT.bitsLT(PtrVT)) {
4916     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4917     IdxNIsKill = true;
4918   } else if (IdxVT.bitsGT(PtrVT))
4919     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4920   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4921 }
4922
4923 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4924 /// duplicate it for AArch64, because otherwise we would bail out even for
4925 /// simple cases. This is because the standard fastEmit functions don't cover
4926 /// MUL at all and ADD is lowered very inefficientily.
4927 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4928   unsigned N = getRegForValue(I->getOperand(0));
4929   if (!N)
4930     return false;
4931   bool NIsKill = hasTrivialKill(I->getOperand(0));
4932
4933   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4934   // into a single N = N + TotalOffset.
4935   uint64_t TotalOffs = 0;
4936   MVT VT = TLI.getPointerTy(DL);
4937   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4938        GTI != E; ++GTI) {
4939     const Value *Idx = GTI.getOperand();
4940     if (auto *StTy = GTI.getStructTypeOrNull()) {
4941       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4942       // N = N + Offset
4943       if (Field)
4944         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4945     } else {
4946       Type *Ty = GTI.getIndexedType();
4947
4948       // If this is a constant subscript, handle it quickly.
4949       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4950         if (CI->isZero())
4951           continue;
4952         // N = N + Offset
4953         TotalOffs +=
4954             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4955         continue;
4956       }
4957       if (TotalOffs) {
4958         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4959         if (!N)
4960           return false;
4961         NIsKill = true;
4962         TotalOffs = 0;
4963       }
4964
4965       // N = N + Idx * ElementSize;
4966       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4967       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4968       unsigned IdxN = Pair.first;
4969       bool IdxNIsKill = Pair.second;
4970       if (!IdxN)
4971         return false;
4972
4973       if (ElementSize != 1) {
4974         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4975         if (!C)
4976           return false;
4977         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4978         if (!IdxN)
4979           return false;
4980         IdxNIsKill = true;
4981       }
4982       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4983       if (!N)
4984         return false;
4985     }
4986   }
4987   if (TotalOffs) {
4988     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4989     if (!N)
4990       return false;
4991   }
4992   updateValueMap(I, N);
4993   return true;
4994 }
4995
4996 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4997   assert(TM.getOptLevel() == CodeGenOpt::None &&
4998          "cmpxchg survived AtomicExpand at optlevel > -O0");
4999
5000   auto *RetPairTy = cast<StructType>(I->getType());
5001   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5002   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5003          "cmpxchg has a non-i1 status result");
5004
5005   MVT VT;
5006   if (!isTypeLegal(RetTy, VT))
5007     return false;
5008
5009   const TargetRegisterClass *ResRC;
5010   unsigned Opc, CmpOpc;
5011   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5012   // extractvalue selection doesn't support that.
5013   if (VT == MVT::i32) {
5014     Opc = AArch64::CMP_SWAP_32;
5015     CmpOpc = AArch64::SUBSWrs;
5016     ResRC = &AArch64::GPR32RegClass;
5017   } else if (VT == MVT::i64) {
5018     Opc = AArch64::CMP_SWAP_64;
5019     CmpOpc = AArch64::SUBSXrs;
5020     ResRC = &AArch64::GPR64RegClass;
5021   } else {
5022     return false;
5023   }
5024
5025   const MCInstrDesc &II = TII.get(Opc);
5026
5027   const unsigned AddrReg = constrainOperandRegClass(
5028       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5029   const unsigned DesiredReg = constrainOperandRegClass(
5030       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5031   const unsigned NewReg = constrainOperandRegClass(
5032       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5033
5034   const unsigned ResultReg1 = createResultReg(ResRC);
5035   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5036   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5037
5038   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5039   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5040       .addDef(ResultReg1)
5041       .addDef(ScratchReg)
5042       .addUse(AddrReg)
5043       .addUse(DesiredReg)
5044       .addUse(NewReg);
5045
5046   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5047       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5048       .addUse(ResultReg1)
5049       .addUse(DesiredReg)
5050       .addImm(0);
5051
5052   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5053       .addDef(ResultReg2)
5054       .addUse(AArch64::WZR)
5055       .addUse(AArch64::WZR)
5056       .addImm(AArch64CC::NE);
5057
5058   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5059   updateValueMap(I, ResultReg1, 2);
5060   return true;
5061 }
5062
5063 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5064   switch (I->getOpcode()) {
5065   default:
5066     break;
5067   case Instruction::Add:
5068   case Instruction::Sub:
5069     return selectAddSub(I);
5070   case Instruction::Mul:
5071     return selectMul(I);
5072   case Instruction::SDiv:
5073     return selectSDiv(I);
5074   case Instruction::SRem:
5075     if (!selectBinaryOp(I, ISD::SREM))
5076       return selectRem(I, ISD::SREM);
5077     return true;
5078   case Instruction::URem:
5079     if (!selectBinaryOp(I, ISD::UREM))
5080       return selectRem(I, ISD::UREM);
5081     return true;
5082   case Instruction::Shl:
5083   case Instruction::LShr:
5084   case Instruction::AShr:
5085     return selectShift(I);
5086   case Instruction::And:
5087   case Instruction::Or:
5088   case Instruction::Xor:
5089     return selectLogicalOp(I);
5090   case Instruction::Br:
5091     return selectBranch(I);
5092   case Instruction::IndirectBr:
5093     return selectIndirectBr(I);
5094   case Instruction::BitCast:
5095     if (!FastISel::selectBitCast(I))
5096       return selectBitCast(I);
5097     return true;
5098   case Instruction::FPToSI:
5099     if (!selectCast(I, ISD::FP_TO_SINT))
5100       return selectFPToInt(I, /*Signed=*/true);
5101     return true;
5102   case Instruction::FPToUI:
5103     return selectFPToInt(I, /*Signed=*/false);
5104   case Instruction::ZExt:
5105   case Instruction::SExt:
5106     return selectIntExt(I);
5107   case Instruction::Trunc:
5108     if (!selectCast(I, ISD::TRUNCATE))
5109       return selectTrunc(I);
5110     return true;
5111   case Instruction::FPExt:
5112     return selectFPExt(I);
5113   case Instruction::FPTrunc:
5114     return selectFPTrunc(I);
5115   case Instruction::SIToFP:
5116     if (!selectCast(I, ISD::SINT_TO_FP))
5117       return selectIntToFP(I, /*Signed=*/true);
5118     return true;
5119   case Instruction::UIToFP:
5120     return selectIntToFP(I, /*Signed=*/false);
5121   case Instruction::Load:
5122     return selectLoad(I);
5123   case Instruction::Store:
5124     return selectStore(I);
5125   case Instruction::FCmp:
5126   case Instruction::ICmp:
5127     return selectCmp(I);
5128   case Instruction::Select:
5129     return selectSelect(I);
5130   case Instruction::Ret:
5131     return selectRet(I);
5132   case Instruction::FRem:
5133     return selectFRem(I);
5134   case Instruction::GetElementPtr:
5135     return selectGetElementPtr(I);
5136   case Instruction::AtomicCmpXchg:
5137     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5138   }
5139
5140   // Silence warnings.
5141   (void)&CC_AArch64_DarwinPCS_VarArg;
5142   (void)&CC_AArch64_Win64_VarArg;
5143
5144   // fall-back to target-independent instruction selection.
5145   return selectOperator(I, I->getOpcode());
5146 }
5147
5148 namespace llvm {
5149
5150 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5151                                         const TargetLibraryInfo *LibInfo) {
5152   return new AArch64FastISel(FuncInfo, LibInfo);
5153 }
5154
5155 } // end namespace llvm