contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "Utils/ARMBaseInfo.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/CodeGen/SelectionDAGISel.h"
  26 #include "llvm/CodeGen/TargetLowering.h"
  27 #include "llvm/IR/CallingConv.h"
  28 #include "llvm/IR/Constants.h"
  29 #include "llvm/IR/DerivedTypes.h"
  30 #include "llvm/IR/Function.h"
  31 #include "llvm/IR/Intrinsics.h"
  32 #include "llvm/IR/LLVMContext.h"
  33 #include "llvm/Support/CommandLine.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 //===--------------------------------------------------------------------===//
  48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  49 /// instructions for SelectionDAG operations.
  50 ///
  51 namespace {
  52
  53 class ARMDAGToDAGISel : public SelectionDAGISel {
  54   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  55   /// make the right decision when generating code for different targets.
  56   const ARMSubtarget *Subtarget;
  57
  58 public:
  59   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  60       : SelectionDAGISel(tm, OptLevel) {}
  61
  62   bool runOnMachineFunction(MachineFunction &MF) override {
  63     // Reset the subtarget each time through.
  64     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  65     SelectionDAGISel::runOnMachineFunction(MF);
  66     return true;
  67   }
  68
  69   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  70
  71   void PreprocessISelDAG() override;
  72
  73   /// getI32Imm - Return a target constant of type i32 with the specified
  74   /// value.
  75   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  76     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  77   }
  78
  79   void Select(SDNode *N) override;
  80
  81   bool hasNoVMLxHazardUse(SDNode *N) const;
  82   bool isShifterOpProfitable(const SDValue &Shift,
  83                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  84   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  85                                SDValue &B, SDValue &C,
  86                                bool CheckProfitability = true);
  87   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  88                                SDValue &B, bool CheckProfitability = true);
  89   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  90                                     SDValue &B, SDValue &C) {
  91     // Don't apply the profitability check
  92     return SelectRegShifterOperand(N, A, B, C, false);
  93   }
  94   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  95                                     SDValue &B) {
  96     // Don't apply the profitability check
  97     return SelectImmShifterOperand(N, A, B, false);
  98   }
  99
 100   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
 101
 102   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 103   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 104
 105   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 106     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 107     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 108     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 109     return true;
 110   }
 111
 112   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 113                              SDValue &Offset, SDValue &Opc);
 114   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 115                              SDValue &Offset, SDValue &Opc);
 116   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 117                              SDValue &Offset, SDValue &Opc);
 118   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 119   bool SelectAddrMode3(SDValue N, SDValue &Base,
 120                        SDValue &Offset, SDValue &Opc);
 121   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 122                              SDValue &Offset, SDValue &Opc);
 123   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 124                          int Lwb, int Upb, bool FP16);
 125   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
 126   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
 127   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 128   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 129
 130   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 131
 132   // Thumb Addressing Modes:
 133   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 134   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 135                                 SDValue &OffImm);
 136   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 137                                  SDValue &OffImm);
 138   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 139                                  SDValue &OffImm);
 140   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 141                                  SDValue &OffImm);
 142   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 143
 144   // Thumb 2 Addressing Modes:
 145   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 146   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 147                             SDValue &OffImm);
 148   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 149                                  SDValue &OffImm);
 150   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 151                              SDValue &OffReg, SDValue &ShImm);
 152   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 153
 154   inline bool is_so_imm(unsigned Imm) const {
 155     return ARM_AM::getSOImmVal(Imm) != -1;
 156   }
 157
 158   inline bool is_so_imm_not(unsigned Imm) const {
 159     return ARM_AM::getSOImmVal(~Imm) != -1;
 160   }
 161
 162   inline bool is_t2_so_imm(unsigned Imm) const {
 163     return ARM_AM::getT2SOImmVal(Imm) != -1;
 164   }
 165
 166   inline bool is_t2_so_imm_not(unsigned Imm) const {
 167     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 168   }
 169
 170   // Include the pieces autogenerated from the target description.
 171 #include "ARMGenDAGISel.inc"
 172
 173 private:
 174   void transferMemOperands(SDNode *Src, SDNode *Dst);
 175
 176   /// Indexed (pre/post inc/dec) load matching code for ARM.
 177   bool tryARMIndexedLoad(SDNode *N);
 178   bool tryT1IndexedLoad(SDNode *N);
 179   bool tryT2IndexedLoad(SDNode *N);
 180
 181   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 182   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 183   /// loads of D registers and even subregs and odd subregs of Q registers.
 184   /// For NumVecs <= 2, QOpcodes1 is not used.
 185   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 186                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 187                  const uint16_t *QOpcodes1);
 188
 189   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 190   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 191   /// stores of D registers and even subregs and odd subregs of Q registers.
 192   /// For NumVecs <= 2, QOpcodes1 is not used.
 193   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 194                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 195                  const uint16_t *QOpcodes1);
 196
 197   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 198   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 199   /// load/store of D registers and Q registers.
 200   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 201                        unsigned NumVecs, const uint16_t *DOpcodes,
 202                        const uint16_t *QOpcodes);
 203
 204   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 205   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 206   /// for loading D registers.
 207   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
 208                     unsigned NumVecs, const uint16_t *DOpcodes,
 209                     const uint16_t *QOpcodes0 = nullptr,
 210                     const uint16_t *QOpcodes1 = nullptr);
 211
 212   /// Try to select SBFX/UBFX instructions for ARM.
 213   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 214
 215   // Select special operations if node forms integer ABS pattern
 216   bool tryABSOp(SDNode *N);
 217
 218   bool tryReadRegister(SDNode *N);
 219   bool tryWriteRegister(SDNode *N);
 220
 221   bool tryInlineAsm(SDNode *N);
 222
 223   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 224
 225   void SelectCMP_SWAP(SDNode *N);
 226
 227   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 228   /// inline asm expressions.
 229   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 230                                     std::vector<SDValue> &OutOps) override;
 231
 232   // Form pairs of consecutive R, S, D, or Q registers.
 233   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 234   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 235   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 236   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 237
 238   // Form sequences of 4 consecutive S, D, or Q registers.
 239   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 240   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 241   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 242
 243   // Get the alignment operand for a NEON VLD or VST instruction.
 244   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 245                         bool is64BitVector);
 246
 247   /// Returns the number of instructions required to materialize the given
 248   /// constant in a register, or 3 if a literal pool load is needed.
 249   unsigned ConstantMaterializationCost(unsigned Val) const;
 250
 251   /// Checks if N is a multiplication by a constant where we can extract out a
 252   /// power of two from the constant so that it can be used in a shift, but only
 253   /// if it simplifies the materialization of the constant. Returns true if it
 254   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 255   /// out and to NewMulConst the new constant to be multiplied by.
 256   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 257                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 258
 259   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 260   /// selected when N would have been selected.
 261   void replaceDAGValue(const SDValue &N, SDValue M);
 262 };
 263 }
 264
 265 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 266 /// operand. If so Imm will receive the 32-bit value.
 267 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 268   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 269     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 270     return true;
 271   }
 272   return false;
 273 }
 274
 275 // isInt32Immediate - This method tests to see if a constant operand.
 276 // If so Imm will receive the 32 bit value.
 277 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 278   return isInt32Immediate(N.getNode(), Imm);
 279 }
 280
 281 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 282 // opcode and that it has a immediate integer right operand.
 283 // If so Imm will receive the 32 bit value.
 284 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 285   return N->getOpcode() == Opc &&
 286          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 287 }
 288
 289 /// Check whether a particular node is a constant value representable as
 290 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 291 ///
 292 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 293 static bool isScaledConstantInRange(SDValue Node, int Scale,
 294                                     int RangeMin, int RangeMax,
 295                                     int &ScaledConstant) {
 296   assert(Scale > 0 && "Invalid scale!");
 297
 298   // Check that this is a constant.
 299   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 300   if (!C)
 301     return false;
 302
 303   ScaledConstant = (int) C->getZExtValue();
 304   if ((ScaledConstant % Scale) != 0)
 305     return false;
 306
 307   ScaledConstant /= Scale;
 308   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 309 }
 310
 311 void ARMDAGToDAGISel::PreprocessISelDAG() {
 312   if (!Subtarget->hasV6T2Ops())
 313     return;
 314
 315   bool isThumb2 = Subtarget->isThumb();
 316   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 317        E = CurDAG->allnodes_end(); I != E; ) {
 318     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 319
 320     if (N->getOpcode() != ISD::ADD)
 321       continue;
 322
 323     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 324     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 325     // trailing zeros, e.g. 1020.
 326     // Transform the expression to
 327     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 328     // of trailing zeros of c2. The left shift would be folded as an shifter
 329     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 330     // node (UBFX).
 331
 332     SDValue N0 = N->getOperand(0);
 333     SDValue N1 = N->getOperand(1);
 334     unsigned And_imm = 0;
 335     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 336       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 337         std::swap(N0, N1);
 338     }
 339     if (!And_imm)
 340       continue;
 341
 342     // Check if the AND mask is an immediate of the form: 000.....1111111100
 343     unsigned TZ = countTrailingZeros(And_imm);
 344     if (TZ != 1 && TZ != 2)
 345       // Be conservative here. Shifter operands aren't always free. e.g. On
 346       // Swift, left shifter operand of 1 / 2 for free but others are not.
 347       // e.g.
 348       //  ubfx   r3, r1, #16, #8
 349       //  ldr.w  r3, [r0, r3, lsl #2]
 350       // vs.
 351       //  mov.w  r9, #1020
 352       //  and.w  r2, r9, r1, lsr #14
 353       //  ldr    r2, [r0, r2]
 354       continue;
 355     And_imm >>= TZ;
 356     if (And_imm & (And_imm + 1))
 357       continue;
 358
 359     // Look for (and (srl X, c1), c2).
 360     SDValue Srl = N1.getOperand(0);
 361     unsigned Srl_imm = 0;
 362     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 363         (Srl_imm <= 2))
 364       continue;
 365
 366     // Make sure first operand is not a shifter operand which would prevent
 367     // folding of the left shift.
 368     SDValue CPTmp0;
 369     SDValue CPTmp1;
 370     SDValue CPTmp2;
 371     if (isThumb2) {
 372       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 373         continue;
 374     } else {
 375       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 376           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 377         continue;
 378     }
 379
 380     // Now make the transformation.
 381     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 382                           Srl.getOperand(0),
 383                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 384                                               MVT::i32));
 385     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 386                          Srl,
 387                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 388     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 389                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 390     CurDAG->UpdateNodeOperands(N, N0, N1);
 391   }
 392 }
 393
 394 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 395 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 396 /// least on current ARM implementations) which should be avoidded.
 397 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 398   if (OptLevel == CodeGenOpt::None)
 399     return true;
 400
 401   if (!Subtarget->hasVMLxHazards())
 402     return true;
 403
 404   if (!N->hasOneUse())
 405     return false;
 406
 407   SDNode *Use = *N->use_begin();
 408   if (Use->getOpcode() == ISD::CopyToReg)
 409     return true;
 410   if (Use->isMachineOpcode()) {
 411     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 412         CurDAG->getSubtarget().getInstrInfo());
 413
 414     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 415     if (MCID.mayStore())
 416       return true;
 417     unsigned Opcode = MCID.getOpcode();
 418     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 419       return true;
 420     // vmlx feeding into another vmlx. We actually want to unfold
 421     // the use later in the MLxExpansion pass. e.g.
 422     // vmla
 423     // vmla (stall 8 cycles)
 424     //
 425     // vmul (5 cycles)
 426     // vadd (5 cycles)
 427     // vmla
 428     // This adds up to about 18 - 19 cycles.
 429     //
 430     // vmla
 431     // vmul (stall 4 cycles)
 432     // vadd adds up to about 14 cycles.
 433     return TII->isFpMLxInstruction(Opcode);
 434   }
 435
 436   return false;
 437 }
 438
 439 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 440                                             ARM_AM::ShiftOpc ShOpcVal,
 441                                             unsigned ShAmt) {
 442   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 443     return true;
 444   if (Shift.hasOneUse())
 445     return true;
 446   // R << 2 is free.
 447   return ShOpcVal == ARM_AM::lsl &&
 448          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 449 }
 450
 451 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 452   if (Subtarget->isThumb()) {
 453     if (Val <= 255) return 1;                               // MOV
 454     if (Subtarget->hasV6T2Ops() &&
 455         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 456       return 1; // MOVW
 457     if (Val <= 510) return 2;                               // MOV + ADDi8
 458     if (~Val <= 255) return 2;                              // MOV + MVN
 459     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 460   } else {
 461     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 462     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 463     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 464     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 465   }
 466   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 467   return 3; // Literal pool load
 468 }
 469
 470 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 471                                              unsigned MaxShift,
 472                                              unsigned &PowerOfTwo,
 473                                              SDValue &NewMulConst) const {
 474   assert(N.getOpcode() == ISD::MUL);
 475   assert(MaxShift > 0);
 476
 477   // If the multiply is used in more than one place then changing the constant
 478   // will make other uses incorrect, so don't.
 479   if (!N.hasOneUse()) return false;
 480   // Check if the multiply is by a constant
 481   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 482   if (!MulConst) return false;
 483   // If the constant is used in more than one place then modifying it will mean
 484   // we need to materialize two constants instead of one, which is a bad idea.
 485   if (!MulConst->hasOneUse()) return false;
 486   unsigned MulConstVal = MulConst->getZExtValue();
 487   if (MulConstVal == 0) return false;
 488
 489   // Find the largest power of 2 that MulConstVal is a multiple of
 490   PowerOfTwo = MaxShift;
 491   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 492     --PowerOfTwo;
 493     if (PowerOfTwo == 0) return false;
 494   }
 495
 496   // Only optimise if the new cost is better
 497   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 498   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 499   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 500   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 501   return NewCost < OldCost;
 502 }
 503
 504 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 505   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 506   ReplaceUses(N, M);
 507 }
 508
 509 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 510                                               SDValue &BaseReg,
 511                                               SDValue &Opc,
 512                                               bool CheckProfitability) {
 513   if (DisableShifterOp)
 514     return false;
 515
 516   // If N is a multiply-by-constant and it's profitable to extract a shift and
 517   // use it in a shifted operand do so.
 518   if (N.getOpcode() == ISD::MUL) {
 519     unsigned PowerOfTwo = 0;
 520     SDValue NewMulConst;
 521     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 522       HandleSDNode Handle(N);
 523       SDLoc Loc(N);
 524       replaceDAGValue(N.getOperand(1), NewMulConst);
 525       BaseReg = Handle.getValue();
 526       Opc = CurDAG->getTargetConstant(
 527           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 528       return true;
 529     }
 530   }
 531
 532   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 533
 534   // Don't match base register only case. That is matched to a separate
 535   // lower complexity pattern with explicit register operand.
 536   if (ShOpcVal == ARM_AM::no_shift) return false;
 537
 538   BaseReg = N.getOperand(0);
 539   unsigned ShImmVal = 0;
 540   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 541   if (!RHS) return false;
 542   ShImmVal = RHS->getZExtValue() & 31;
 543   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 544                                   SDLoc(N), MVT::i32);
 545   return true;
 546 }
 547
 548 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 549                                               SDValue &BaseReg,
 550                                               SDValue &ShReg,
 551                                               SDValue &Opc,
 552                                               bool CheckProfitability) {
 553   if (DisableShifterOp)
 554     return false;
 555
 556   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 557
 558   // Don't match base register only case. That is matched to a separate
 559   // lower complexity pattern with explicit register operand.
 560   if (ShOpcVal == ARM_AM::no_shift) return false;
 561
 562   BaseReg = N.getOperand(0);
 563   unsigned ShImmVal = 0;
 564   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 565   if (RHS) return false;
 566
 567   ShReg = N.getOperand(1);
 568   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 569     return false;
 570   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 571                                   SDLoc(N), MVT::i32);
 572   return true;
 573 }
 574
 575 // Determine whether an ISD::OR's operands are suitable to turn the operation
 576 // into an addition, which often has more compact encodings.
 577 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
 578   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
 579   Out = N;
 580   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
 581 }
 582
 583
 584 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 585                                           SDValue &Base,
 586                                           SDValue &OffImm) {
 587   // Match simple R + imm12 operands.
 588
 589   // Base only.
 590   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 591       !CurDAG->isBaseWithConstantOffset(N)) {
 592     if (N.getOpcode() == ISD::FrameIndex) {
 593       // Match frame index.
 594       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 595       Base = CurDAG->getTargetFrameIndex(
 596           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 597       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 598       return true;
 599     }
 600
 601     if (N.getOpcode() == ARMISD::Wrapper &&
 602         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 603         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 604         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 605       Base = N.getOperand(0);
 606     } else
 607       Base = N;
 608     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 609     return true;
 610   }
 611
 612   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 613     int RHSC = (int)RHS->getSExtValue();
 614     if (N.getOpcode() == ISD::SUB)
 615       RHSC = -RHSC;
 616
 617     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 618       Base   = N.getOperand(0);
 619       if (Base.getOpcode() == ISD::FrameIndex) {
 620         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 621         Base = CurDAG->getTargetFrameIndex(
 622             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 623       }
 624       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 625       return true;
 626     }
 627   }
 628
 629   // Base only.
 630   Base = N;
 631   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 632   return true;
 633 }
 634
 635
 636
 637 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 638                                       SDValue &Opc) {
 639   if (N.getOpcode() == ISD::MUL &&
 640       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 641     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 642       // X * [3,5,9] -> X + X * [2,4,8] etc.
 643       int RHSC = (int)RHS->getZExtValue();
 644       if (RHSC & 1) {
 645         RHSC = RHSC & ~1;
 646         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 647         if (RHSC < 0) {
 648           AddSub = ARM_AM::sub;
 649           RHSC = - RHSC;
 650         }
 651         if (isPowerOf2_32(RHSC)) {
 652           unsigned ShAmt = Log2_32(RHSC);
 653           Base = Offset = N.getOperand(0);
 654           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 655                                                             ARM_AM::lsl),
 656                                           SDLoc(N), MVT::i32);
 657           return true;
 658         }
 659       }
 660     }
 661   }
 662
 663   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 664       // ISD::OR that is equivalent to an ISD::ADD.
 665       !CurDAG->isBaseWithConstantOffset(N))
 666     return false;
 667
 668   // Leave simple R +/- imm12 operands for LDRi12
 669   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 670     int RHSC;
 671     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 672                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 673       return false;
 674   }
 675
 676   // Otherwise this is R +/- [possibly shifted] R.
 677   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 678   ARM_AM::ShiftOpc ShOpcVal =
 679     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 680   unsigned ShAmt = 0;
 681
 682   Base   = N.getOperand(0);
 683   Offset = N.getOperand(1);
 684
 685   if (ShOpcVal != ARM_AM::no_shift) {
 686     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 687     // it.
 688     if (ConstantSDNode *Sh =
 689            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 690       ShAmt = Sh->getZExtValue();
 691       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 692         Offset = N.getOperand(1).getOperand(0);
 693       else {
 694         ShAmt = 0;
 695         ShOpcVal = ARM_AM::no_shift;
 696       }
 697     } else {
 698       ShOpcVal = ARM_AM::no_shift;
 699     }
 700   }
 701
 702   // Try matching (R shl C) + (R).
 703   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 704       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 705         N.getOperand(0).hasOneUse())) {
 706     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 707     if (ShOpcVal != ARM_AM::no_shift) {
 708       // Check to see if the RHS of the shift is a constant, if not, we can't
 709       // fold it.
 710       if (ConstantSDNode *Sh =
 711           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 712         ShAmt = Sh->getZExtValue();
 713         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 714           Offset = N.getOperand(0).getOperand(0);
 715           Base = N.getOperand(1);
 716         } else {
 717           ShAmt = 0;
 718           ShOpcVal = ARM_AM::no_shift;
 719         }
 720       } else {
 721         ShOpcVal = ARM_AM::no_shift;
 722       }
 723     }
 724   }
 725
 726   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 727   // and use it in a shifted operand do so.
 728   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 729     unsigned PowerOfTwo = 0;
 730     SDValue NewMulConst;
 731     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 732       HandleSDNode Handle(Offset);
 733       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 734       Offset = Handle.getValue();
 735       ShAmt = PowerOfTwo;
 736       ShOpcVal = ARM_AM::lsl;
 737     }
 738   }
 739
 740   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 741                                   SDLoc(N), MVT::i32);
 742   return true;
 743 }
 744
 745 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 746                                             SDValue &Offset, SDValue &Opc) {
 747   unsigned Opcode = Op->getOpcode();
 748   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 749     ? cast<LoadSDNode>(Op)->getAddressingMode()
 750     : cast<StoreSDNode>(Op)->getAddressingMode();
 751   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 752     ? ARM_AM::add : ARM_AM::sub;
 753   int Val;
 754   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 755     return false;
 756
 757   Offset = N;
 758   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 759   unsigned ShAmt = 0;
 760   if (ShOpcVal != ARM_AM::no_shift) {
 761     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 762     // it.
 763     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 764       ShAmt = Sh->getZExtValue();
 765       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 766         Offset = N.getOperand(0);
 767       else {
 768         ShAmt = 0;
 769         ShOpcVal = ARM_AM::no_shift;
 770       }
 771     } else {
 772       ShOpcVal = ARM_AM::no_shift;
 773     }
 774   }
 775
 776   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 777                                   SDLoc(N), MVT::i32);
 778   return true;
 779 }
 780
 781 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 782                                             SDValue &Offset, SDValue &Opc) {
 783   unsigned Opcode = Op->getOpcode();
 784   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 785     ? cast<LoadSDNode>(Op)->getAddressingMode()
 786     : cast<StoreSDNode>(Op)->getAddressingMode();
 787   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 788     ? ARM_AM::add : ARM_AM::sub;
 789   int Val;
 790   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 791     if (AddSub == ARM_AM::sub) Val *= -1;
 792     Offset = CurDAG->getRegister(0, MVT::i32);
 793     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 794     return true;
 795   }
 796
 797   return false;
 798 }
 799
 800
 801 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 802                                             SDValue &Offset, SDValue &Opc) {
 803   unsigned Opcode = Op->getOpcode();
 804   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 805     ? cast<LoadSDNode>(Op)->getAddressingMode()
 806     : cast<StoreSDNode>(Op)->getAddressingMode();
 807   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 808     ? ARM_AM::add : ARM_AM::sub;
 809   int Val;
 810   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 811     Offset = CurDAG->getRegister(0, MVT::i32);
 812     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 813                                                       ARM_AM::no_shift),
 814                                     SDLoc(Op), MVT::i32);
 815     return true;
 816   }
 817
 818   return false;
 819 }
 820
 821 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 822   Base = N;
 823   return true;
 824 }
 825
 826 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 827                                       SDValue &Base, SDValue &Offset,
 828                                       SDValue &Opc) {
 829   if (N.getOpcode() == ISD::SUB) {
 830     // X - C  is canonicalize to X + -C, no need to handle it here.
 831     Base = N.getOperand(0);
 832     Offset = N.getOperand(1);
 833     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 834                                     MVT::i32);
 835     return true;
 836   }
 837
 838   if (!CurDAG->isBaseWithConstantOffset(N)) {
 839     Base = N;
 840     if (N.getOpcode() == ISD::FrameIndex) {
 841       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 842       Base = CurDAG->getTargetFrameIndex(
 843           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 844     }
 845     Offset = CurDAG->getRegister(0, MVT::i32);
 846     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 847                                     MVT::i32);
 848     return true;
 849   }
 850
 851   // If the RHS is +/- imm8, fold into addr mode.
 852   int RHSC;
 853   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 854                               -256 + 1, 256, RHSC)) { // 8 bits.
 855     Base = N.getOperand(0);
 856     if (Base.getOpcode() == ISD::FrameIndex) {
 857       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 858       Base = CurDAG->getTargetFrameIndex(
 859           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 860     }
 861     Offset = CurDAG->getRegister(0, MVT::i32);
 862
 863     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 864     if (RHSC < 0) {
 865       AddSub = ARM_AM::sub;
 866       RHSC = -RHSC;
 867     }
 868     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 869                                     MVT::i32);
 870     return true;
 871   }
 872
 873   Base = N.getOperand(0);
 874   Offset = N.getOperand(1);
 875   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 876                                   MVT::i32);
 877   return true;
 878 }
 879
 880 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 881                                             SDValue &Offset, SDValue &Opc) {
 882   unsigned Opcode = Op->getOpcode();
 883   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 884     ? cast<LoadSDNode>(Op)->getAddressingMode()
 885     : cast<StoreSDNode>(Op)->getAddressingMode();
 886   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 887     ? ARM_AM::add : ARM_AM::sub;
 888   int Val;
 889   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 890     Offset = CurDAG->getRegister(0, MVT::i32);
 891     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 892                                     MVT::i32);
 893     return true;
 894   }
 895
 896   Offset = N;
 897   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 898                                   MVT::i32);
 899   return true;
 900 }
 901
 902 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
 903                                         int Lwb, int Upb, bool FP16) {
 904   if (!CurDAG->isBaseWithConstantOffset(N)) {
 905     Base = N;
 906     if (N.getOpcode() == ISD::FrameIndex) {
 907       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 908       Base = CurDAG->getTargetFrameIndex(
 909           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 910     } else if (N.getOpcode() == ARMISD::Wrapper &&
 911                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 912                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 913                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 914       Base = N.getOperand(0);
 915     }
 916     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 917                                        SDLoc(N), MVT::i32);
 918     return true;
 919   }
 920
 921   // If the RHS is +/- imm8, fold into addr mode.
 922   int RHSC;
 923   const int Scale = FP16 ? 2 : 4;
 924
 925   if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
 926     Base = N.getOperand(0);
 927     if (Base.getOpcode() == ISD::FrameIndex) {
 928       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 929       Base = CurDAG->getTargetFrameIndex(
 930           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 931     }
 932
 933     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 934     if (RHSC < 0) {
 935       AddSub = ARM_AM::sub;
 936       RHSC = -RHSC;
 937     }
 938
 939     if (FP16)
 940       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
 941                                          SDLoc(N), MVT::i32);
 942     else
 943       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 944                                          SDLoc(N), MVT::i32);
 945
 946     return true;
 947   }
 948
 949   Base = N;
 950
 951   if (FP16)
 952     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
 953                                        SDLoc(N), MVT::i32);
 954   else
 955     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 956                                        SDLoc(N), MVT::i32);
 957
 958   return true;
 959 }
 960
 961 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 962                                       SDValue &Base, SDValue &Offset) {
 963   int Lwb = -256 + 1;
 964   int Upb = 256;
 965   return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
 966 }
 967
 968 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
 969                                           SDValue &Base, SDValue &Offset) {
 970   int Lwb = -512 + 1;
 971   int Upb = 512;
 972   return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
 973 }
 974
 975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 976                                       SDValue &Align) {
 977   Addr = N;
 978
 979   unsigned Alignment = 0;
 980
 981   MemSDNode *MemN = cast<MemSDNode>(Parent);
 982
 983   if (isa<LSBaseSDNode>(MemN) ||
 984       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 985         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 986        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 987     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 988     // The maximum alignment is equal to the memory size being referenced.
 989     unsigned MMOAlign = MemN->getAlignment();
 990     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 991     if (MMOAlign >= MemSize && MemSize > 1)
 992       Alignment = MemSize;
 993   } else {
 994     // All other uses of addrmode6 are for intrinsics.  For now just record
 995     // the raw alignment value; it will be refined later based on the legal
 996     // alignment operands for the intrinsic.
 997     Alignment = MemN->getAlignment();
 998   }
 999
1000   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001   return true;
1002 }
1003
1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005                                             SDValue &Offset) {
1006   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008   if (AM != ISD::POST_INC)
1009     return false;
1010   Offset = N;
1011   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013       Offset = CurDAG->getRegister(0, MVT::i32);
1014   }
1015   return true;
1016 }
1017
1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019                                        SDValue &Offset, SDValue &Label) {
1020   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021     Offset = N.getOperand(0);
1022     SDValue N1 = N.getOperand(1);
1023     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024                                       SDLoc(N), MVT::i32);
1025     return true;
1026   }
1027
1028   return false;
1029 }
1030
1031
1032 //===----------------------------------------------------------------------===//
1033 //                         Thumb Addressing Modes
1034 //===----------------------------------------------------------------------===//
1035
1036 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1037                                             SDValue &Base, SDValue &Offset){
1038   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1039     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1040     if (!NC || !NC->isNullValue())
1041       return false;
1042
1043     Base = Offset = N;
1044     return true;
1045   }
1046
1047   Base = N.getOperand(0);
1048   Offset = N.getOperand(1);
1049   return true;
1050 }
1051
1052 bool
1053 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1054                                           SDValue &Base, SDValue &OffImm) {
1055   if (!CurDAG->isBaseWithConstantOffset(N)) {
1056     if (N.getOpcode() == ISD::ADD) {
1057       return false; // We want to select register offset instead
1058     } else if (N.getOpcode() == ARMISD::Wrapper &&
1059         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1060         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1061         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1062         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1063       Base = N.getOperand(0);
1064     } else {
1065       Base = N;
1066     }
1067
1068     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1069     return true;
1070   }
1071
1072   // If the RHS is + imm5 * scale, fold into addr mode.
1073   int RHSC;
1074   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1075     Base = N.getOperand(0);
1076     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1077     return true;
1078   }
1079
1080   // Offset is too large, so use register offset instead.
1081   return false;
1082 }
1083
1084 bool
1085 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1086                                            SDValue &OffImm) {
1087   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1088 }
1089
1090 bool
1091 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1092                                            SDValue &OffImm) {
1093   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1094 }
1095
1096 bool
1097 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1098                                            SDValue &OffImm) {
1099   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1100 }
1101
1102 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1103                                             SDValue &Base, SDValue &OffImm) {
1104   if (N.getOpcode() == ISD::FrameIndex) {
1105     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1106     // Only multiples of 4 are allowed for the offset, so the frame object
1107     // alignment must be at least 4.
1108     MachineFrameInfo &MFI = MF->getFrameInfo();
1109     if (MFI.getObjectAlignment(FI) < 4)
1110       MFI.setObjectAlignment(FI, 4);
1111     Base = CurDAG->getTargetFrameIndex(
1112         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1113     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1114     return true;
1115   }
1116
1117   if (!CurDAG->isBaseWithConstantOffset(N))
1118     return false;
1119
1120   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1121   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1122       (LHSR && LHSR->getReg() == ARM::SP)) {
1123     // If the RHS is + imm8 * scale, fold into addr mode.
1124     int RHSC;
1125     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1126       Base = N.getOperand(0);
1127       if (Base.getOpcode() == ISD::FrameIndex) {
1128         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1129         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1130         // indexed by the LHS must be 4-byte aligned.
1131         MachineFrameInfo &MFI = MF->getFrameInfo();
1132         if (MFI.getObjectAlignment(FI) < 4)
1133           MFI.setObjectAlignment(FI, 4);
1134         Base = CurDAG->getTargetFrameIndex(
1135             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1136       }
1137       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1138       return true;
1139     }
1140   }
1141
1142   return false;
1143 }
1144
1145
1146 //===----------------------------------------------------------------------===//
1147 //                        Thumb 2 Addressing Modes
1148 //===----------------------------------------------------------------------===//
1149
1150
1151 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1152                                             SDValue &Base, SDValue &OffImm) {
1153   // Match simple R + imm12 operands.
1154
1155   // Base only.
1156   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1157       !CurDAG->isBaseWithConstantOffset(N)) {
1158     if (N.getOpcode() == ISD::FrameIndex) {
1159       // Match frame index.
1160       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1161       Base = CurDAG->getTargetFrameIndex(
1162           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1163       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1164       return true;
1165     }
1166
1167     if (N.getOpcode() == ARMISD::Wrapper &&
1168         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1169         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1170         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1171       Base = N.getOperand(0);
1172       if (Base.getOpcode() == ISD::TargetConstantPool)
1173         return false;  // We want to select t2LDRpci instead.
1174     } else
1175       Base = N;
1176     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1177     return true;
1178   }
1179
1180   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1181     if (SelectT2AddrModeImm8(N, Base, OffImm))
1182       // Let t2LDRi8 handle (R - imm8).
1183       return false;
1184
1185     int RHSC = (int)RHS->getZExtValue();
1186     if (N.getOpcode() == ISD::SUB)
1187       RHSC = -RHSC;
1188
1189     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1190       Base   = N.getOperand(0);
1191       if (Base.getOpcode() == ISD::FrameIndex) {
1192         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1193         Base = CurDAG->getTargetFrameIndex(
1194             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1195       }
1196       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1197       return true;
1198     }
1199   }
1200
1201   // Base only.
1202   Base = N;
1203   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1204   return true;
1205 }
1206
1207 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1208                                            SDValue &Base, SDValue &OffImm) {
1209   // Match simple R - imm8 operands.
1210   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1211       !CurDAG->isBaseWithConstantOffset(N))
1212     return false;
1213
1214   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215     int RHSC = (int)RHS->getSExtValue();
1216     if (N.getOpcode() == ISD::SUB)
1217       RHSC = -RHSC;
1218
1219     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1220       Base = N.getOperand(0);
1221       if (Base.getOpcode() == ISD::FrameIndex) {
1222         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223         Base = CurDAG->getTargetFrameIndex(
1224             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225       }
1226       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227       return true;
1228     }
1229   }
1230
1231   return false;
1232 }
1233
1234 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1235                                                  SDValue &OffImm){
1236   unsigned Opcode = Op->getOpcode();
1237   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1238     ? cast<LoadSDNode>(Op)->getAddressingMode()
1239     : cast<StoreSDNode>(Op)->getAddressingMode();
1240   int RHSC;
1241   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1242     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1243       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1244       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1245     return true;
1246   }
1247
1248   return false;
1249 }
1250
1251 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1252                                             SDValue &Base,
1253                                             SDValue &OffReg, SDValue &ShImm) {
1254   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1255   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1256     return false;
1257
1258   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1259   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1260     int RHSC = (int)RHS->getZExtValue();
1261     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1262       return false;
1263     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1264       return false;
1265   }
1266
1267   // Look for (R + R) or (R + (R << [1,2,3])).
1268   unsigned ShAmt = 0;
1269   Base   = N.getOperand(0);
1270   OffReg = N.getOperand(1);
1271
1272   // Swap if it is ((R << c) + R).
1273   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1274   if (ShOpcVal != ARM_AM::lsl) {
1275     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1276     if (ShOpcVal == ARM_AM::lsl)
1277       std::swap(Base, OffReg);
1278   }
1279
1280   if (ShOpcVal == ARM_AM::lsl) {
1281     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1282     // it.
1283     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1284       ShAmt = Sh->getZExtValue();
1285       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1286         OffReg = OffReg.getOperand(0);
1287       else {
1288         ShAmt = 0;
1289       }
1290     }
1291   }
1292
1293   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1294   // and use it in a shifted operand do so.
1295   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1296     unsigned PowerOfTwo = 0;
1297     SDValue NewMulConst;
1298     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1299       HandleSDNode Handle(OffReg);
1300       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1301       OffReg = Handle.getValue();
1302       ShAmt = PowerOfTwo;
1303     }
1304   }
1305
1306   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1307
1308   return true;
1309 }
1310
1311 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1312                                                 SDValue &OffImm) {
1313   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1314   // instructions.
1315   Base = N;
1316   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1317
1318   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1319     return true;
1320
1321   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1322   if (!RHS)
1323     return true;
1324
1325   uint32_t RHSC = (int)RHS->getZExtValue();
1326   if (RHSC > 1020 || RHSC % 4 != 0)
1327     return true;
1328
1329   Base = N.getOperand(0);
1330   if (Base.getOpcode() == ISD::FrameIndex) {
1331     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1332     Base = CurDAG->getTargetFrameIndex(
1333         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1334   }
1335
1336   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1337   return true;
1338 }
1339
1340 //===--------------------------------------------------------------------===//
1341
1342 /// getAL - Returns a ARMCC::AL immediate node.
1343 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1344   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1345 }
1346
1347 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1348   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1349   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1350   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1351 }
1352
1353 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1354   LoadSDNode *LD = cast<LoadSDNode>(N);
1355   ISD::MemIndexedMode AM = LD->getAddressingMode();
1356   if (AM == ISD::UNINDEXED)
1357     return false;
1358
1359   EVT LoadedVT = LD->getMemoryVT();
1360   SDValue Offset, AMOpc;
1361   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1362   unsigned Opcode = 0;
1363   bool Match = false;
1364   if (LoadedVT == MVT::i32 && isPre &&
1365       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1366     Opcode = ARM::LDR_PRE_IMM;
1367     Match = true;
1368   } else if (LoadedVT == MVT::i32 && !isPre &&
1369       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1370     Opcode = ARM::LDR_POST_IMM;
1371     Match = true;
1372   } else if (LoadedVT == MVT::i32 &&
1373       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1374     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1375     Match = true;
1376
1377   } else if (LoadedVT == MVT::i16 &&
1378              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1379     Match = true;
1380     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1381       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1382       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1383   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1384     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1385       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1386         Match = true;
1387         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1388       }
1389     } else {
1390       if (isPre &&
1391           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1392         Match = true;
1393         Opcode = ARM::LDRB_PRE_IMM;
1394       } else if (!isPre &&
1395                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1396         Match = true;
1397         Opcode = ARM::LDRB_POST_IMM;
1398       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1399         Match = true;
1400         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1401       }
1402     }
1403   }
1404
1405   if (Match) {
1406     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1407       SDValue Chain = LD->getChain();
1408       SDValue Base = LD->getBasePtr();
1409       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1410                        CurDAG->getRegister(0, MVT::i32), Chain };
1411       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1412                                            MVT::Other, Ops);
1413       transferMemOperands(N, New);
1414       ReplaceNode(N, New);
1415       return true;
1416     } else {
1417       SDValue Chain = LD->getChain();
1418       SDValue Base = LD->getBasePtr();
1419       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1420                        CurDAG->getRegister(0, MVT::i32), Chain };
1421       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1422                                            MVT::Other, Ops);
1423       transferMemOperands(N, New);
1424       ReplaceNode(N, New);
1425       return true;
1426     }
1427   }
1428
1429   return false;
1430 }
1431
1432 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1433   LoadSDNode *LD = cast<LoadSDNode>(N);
1434   EVT LoadedVT = LD->getMemoryVT();
1435   ISD::MemIndexedMode AM = LD->getAddressingMode();
1436   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1437       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1438     return false;
1439
1440   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1441   if (!COffs || COffs->getZExtValue() != 4)
1442     return false;
1443
1444   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1445   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1446   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1447   // ISel.
1448   SDValue Chain = LD->getChain();
1449   SDValue Base = LD->getBasePtr();
1450   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1451                    CurDAG->getRegister(0, MVT::i32), Chain };
1452   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1453                                        MVT::i32, MVT::Other, Ops);
1454   transferMemOperands(N, New);
1455   ReplaceNode(N, New);
1456   return true;
1457 }
1458
1459 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1460   LoadSDNode *LD = cast<LoadSDNode>(N);
1461   ISD::MemIndexedMode AM = LD->getAddressingMode();
1462   if (AM == ISD::UNINDEXED)
1463     return false;
1464
1465   EVT LoadedVT = LD->getMemoryVT();
1466   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1467   SDValue Offset;
1468   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1469   unsigned Opcode = 0;
1470   bool Match = false;
1471   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1472     switch (LoadedVT.getSimpleVT().SimpleTy) {
1473     case MVT::i32:
1474       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1475       break;
1476     case MVT::i16:
1477       if (isSExtLd)
1478         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1479       else
1480         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1481       break;
1482     case MVT::i8:
1483     case MVT::i1:
1484       if (isSExtLd)
1485         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1486       else
1487         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1488       break;
1489     default:
1490       return false;
1491     }
1492     Match = true;
1493   }
1494
1495   if (Match) {
1496     SDValue Chain = LD->getChain();
1497     SDValue Base = LD->getBasePtr();
1498     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1499                      CurDAG->getRegister(0, MVT::i32), Chain };
1500     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1501                                          MVT::Other, Ops);
1502     transferMemOperands(N, New);
1503     ReplaceNode(N, New);
1504     return true;
1505   }
1506
1507   return false;
1508 }
1509
1510 /// Form a GPRPair pseudo register from a pair of GPR regs.
1511 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1512   SDLoc dl(V0.getNode());
1513   SDValue RegClass =
1514     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1515   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1516   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1517   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1518   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1519 }
1520
1521 /// Form a D register from a pair of S registers.
1522 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1523   SDLoc dl(V0.getNode());
1524   SDValue RegClass =
1525     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1526   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1527   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1528   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1529   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1530 }
1531
1532 /// Form a quad register from a pair of D registers.
1533 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1534   SDLoc dl(V0.getNode());
1535   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1536                                                MVT::i32);
1537   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1538   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1539   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1540   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1541 }
1542
1543 /// Form 4 consecutive D registers from a pair of Q registers.
1544 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1545   SDLoc dl(V0.getNode());
1546   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1547                                                MVT::i32);
1548   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1549   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1550   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1551   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1552 }
1553
1554 /// Form 4 consecutive S registers.
1555 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1556                                    SDValue V2, SDValue V3) {
1557   SDLoc dl(V0.getNode());
1558   SDValue RegClass =
1559     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1560   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1561   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1562   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1563   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1564   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1565                                     V2, SubReg2, V3, SubReg3 };
1566   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1567 }
1568
1569 /// Form 4 consecutive D registers.
1570 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1571                                    SDValue V2, SDValue V3) {
1572   SDLoc dl(V0.getNode());
1573   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1574                                                MVT::i32);
1575   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1576   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1577   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1578   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1579   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1580                                     V2, SubReg2, V3, SubReg3 };
1581   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1582 }
1583
1584 /// Form 4 consecutive Q registers.
1585 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1586                                    SDValue V2, SDValue V3) {
1587   SDLoc dl(V0.getNode());
1588   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1589                                                MVT::i32);
1590   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1591   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1592   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1593   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1594   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1595                                     V2, SubReg2, V3, SubReg3 };
1596   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1597 }
1598
1599 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1600 /// of a NEON VLD or VST instruction.  The supported values depend on the
1601 /// number of registers being loaded.
1602 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1603                                        unsigned NumVecs, bool is64BitVector) {
1604   unsigned NumRegs = NumVecs;
1605   if (!is64BitVector && NumVecs < 3)
1606     NumRegs *= 2;
1607
1608   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1609   if (Alignment >= 32 && NumRegs == 4)
1610     Alignment = 32;
1611   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1612     Alignment = 16;
1613   else if (Alignment >= 8)
1614     Alignment = 8;
1615   else
1616     Alignment = 0;
1617
1618   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1619 }
1620
1621 static bool isVLDfixed(unsigned Opc)
1622 {
1623   switch (Opc) {
1624   default: return false;
1625   case ARM::VLD1d8wb_fixed : return true;
1626   case ARM::VLD1d16wb_fixed : return true;
1627   case ARM::VLD1d64Qwb_fixed : return true;
1628   case ARM::VLD1d32wb_fixed : return true;
1629   case ARM::VLD1d64wb_fixed : return true;
1630   case ARM::VLD1d64TPseudoWB_fixed : return true;
1631   case ARM::VLD1d64QPseudoWB_fixed : return true;
1632   case ARM::VLD1q8wb_fixed : return true;
1633   case ARM::VLD1q16wb_fixed : return true;
1634   case ARM::VLD1q32wb_fixed : return true;
1635   case ARM::VLD1q64wb_fixed : return true;
1636   case ARM::VLD1DUPd8wb_fixed : return true;
1637   case ARM::VLD1DUPd16wb_fixed : return true;
1638   case ARM::VLD1DUPd32wb_fixed : return true;
1639   case ARM::VLD1DUPq8wb_fixed : return true;
1640   case ARM::VLD1DUPq16wb_fixed : return true;
1641   case ARM::VLD1DUPq32wb_fixed : return true;
1642   case ARM::VLD2d8wb_fixed : return true;
1643   case ARM::VLD2d16wb_fixed : return true;
1644   case ARM::VLD2d32wb_fixed : return true;
1645   case ARM::VLD2q8PseudoWB_fixed : return true;
1646   case ARM::VLD2q16PseudoWB_fixed : return true;
1647   case ARM::VLD2q32PseudoWB_fixed : return true;
1648   case ARM::VLD2DUPd8wb_fixed : return true;
1649   case ARM::VLD2DUPd16wb_fixed : return true;
1650   case ARM::VLD2DUPd32wb_fixed : return true;
1651   }
1652 }
1653
1654 static bool isVSTfixed(unsigned Opc)
1655 {
1656   switch (Opc) {
1657   default: return false;
1658   case ARM::VST1d8wb_fixed : return true;
1659   case ARM::VST1d16wb_fixed : return true;
1660   case ARM::VST1d32wb_fixed : return true;
1661   case ARM::VST1d64wb_fixed : return true;
1662   case ARM::VST1q8wb_fixed : return true;
1663   case ARM::VST1q16wb_fixed : return true;
1664   case ARM::VST1q32wb_fixed : return true;
1665   case ARM::VST1q64wb_fixed : return true;
1666   case ARM::VST1d64TPseudoWB_fixed : return true;
1667   case ARM::VST1d64QPseudoWB_fixed : return true;
1668   case ARM::VST2d8wb_fixed : return true;
1669   case ARM::VST2d16wb_fixed : return true;
1670   case ARM::VST2d32wb_fixed : return true;
1671   case ARM::VST2q8PseudoWB_fixed : return true;
1672   case ARM::VST2q16PseudoWB_fixed : return true;
1673   case ARM::VST2q32PseudoWB_fixed : return true;
1674   }
1675 }
1676
1677 // Get the register stride update opcode of a VLD/VST instruction that
1678 // is otherwise equivalent to the given fixed stride updating instruction.
1679 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1680   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1681     && "Incorrect fixed stride updating instruction.");
1682   switch (Opc) {
1683   default: break;
1684   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1685   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1686   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1687   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1688   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1689   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1690   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1691   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1692   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1693   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1694   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1695   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1696   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1697   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1698   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1699   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1700   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1701   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1702
1703   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1704   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1705   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1706   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1707   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1708   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1709   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1710   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1711   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1712   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1713
1714   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1715   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1716   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1717   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1718   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1719   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1720
1721   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1722   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1723   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1724   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1725   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1726   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1727
1728   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1729   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1730   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1731   }
1732   return Opc; // If not one we handle, return it unchanged.
1733 }
1734
1735 /// Returns true if the given increment is a Constant known to be equal to the
1736 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1737 /// be used.
1738 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1739   auto C = dyn_cast<ConstantSDNode>(Inc);
1740   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1741 }
1742
1743 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1744                                 const uint16_t *DOpcodes,
1745                                 const uint16_t *QOpcodes0,
1746                                 const uint16_t *QOpcodes1) {
1747   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1748   SDLoc dl(N);
1749
1750   SDValue MemAddr, Align;
1751   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1752                                    // nodes are not intrinsics.
1753   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1754   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1755     return;
1756
1757   SDValue Chain = N->getOperand(0);
1758   EVT VT = N->getValueType(0);
1759   bool is64BitVector = VT.is64BitVector();
1760   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1761
1762   unsigned OpcodeIndex;
1763   switch (VT.getSimpleVT().SimpleTy) {
1764   default: llvm_unreachable("unhandled vld type");
1765     // Double-register operations:
1766   case MVT::v8i8:  OpcodeIndex = 0; break;
1767   case MVT::v4i16: OpcodeIndex = 1; break;
1768   case MVT::v2f32:
1769   case MVT::v2i32: OpcodeIndex = 2; break;
1770   case MVT::v1i64: OpcodeIndex = 3; break;
1771     // Quad-register operations:
1772   case MVT::v16i8: OpcodeIndex = 0; break;
1773   case MVT::v8i16: OpcodeIndex = 1; break;
1774   case MVT::v4f32:
1775   case MVT::v4i32: OpcodeIndex = 2; break;
1776   case MVT::v2f64:
1777   case MVT::v2i64: OpcodeIndex = 3; break;
1778   }
1779
1780   EVT ResTy;
1781   if (NumVecs == 1)
1782     ResTy = VT;
1783   else {
1784     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1785     if (!is64BitVector)
1786       ResTyElts *= 2;
1787     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1788   }
1789   std::vector<EVT> ResTys;
1790   ResTys.push_back(ResTy);
1791   if (isUpdating)
1792     ResTys.push_back(MVT::i32);
1793   ResTys.push_back(MVT::Other);
1794
1795   SDValue Pred = getAL(CurDAG, dl);
1796   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1797   SDNode *VLd;
1798   SmallVector<SDValue, 7> Ops;
1799
1800   // Double registers and VLD1/VLD2 quad registers are directly supported.
1801   if (is64BitVector || NumVecs <= 2) {
1802     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1803                     QOpcodes0[OpcodeIndex]);
1804     Ops.push_back(MemAddr);
1805     Ops.push_back(Align);
1806     if (isUpdating) {
1807       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1808       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1809       if (!IsImmUpdate) {
1810         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1811         // check for the opcode rather than the number of vector elements.
1812         if (isVLDfixed(Opc))
1813           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1814         Ops.push_back(Inc);
1815       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1816       // the operands if not such an opcode.
1817       } else if (!isVLDfixed(Opc))
1818         Ops.push_back(Reg0);
1819     }
1820     Ops.push_back(Pred);
1821     Ops.push_back(Reg0);
1822     Ops.push_back(Chain);
1823     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1824
1825   } else {
1826     // Otherwise, quad registers are loaded with two separate instructions,
1827     // where one loads the even registers and the other loads the odd registers.
1828     EVT AddrTy = MemAddr.getValueType();
1829
1830     // Load the even subregs.  This is always an updating load, so that it
1831     // provides the address to the second load for the odd subregs.
1832     SDValue ImplDef =
1833       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1834     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1835     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1836                                           ResTy, AddrTy, MVT::Other, OpsA);
1837     Chain = SDValue(VLdA, 2);
1838
1839     // Load the odd subregs.
1840     Ops.push_back(SDValue(VLdA, 1));
1841     Ops.push_back(Align);
1842     if (isUpdating) {
1843       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1844       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1845              "only constant post-increment update allowed for VLD3/4");
1846       (void)Inc;
1847       Ops.push_back(Reg0);
1848     }
1849     Ops.push_back(SDValue(VLdA, 0));
1850     Ops.push_back(Pred);
1851     Ops.push_back(Reg0);
1852     Ops.push_back(Chain);
1853     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1854   }
1855
1856   // Transfer memoperands.
1857   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1858   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1859   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1860
1861   if (NumVecs == 1) {
1862     ReplaceNode(N, VLd);
1863     return;
1864   }
1865
1866   // Extract out the subregisters.
1867   SDValue SuperReg = SDValue(VLd, 0);
1868   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1869                     ARM::qsub_3 == ARM::qsub_0 + 3,
1870                 "Unexpected subreg numbering");
1871   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1872   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1873     ReplaceUses(SDValue(N, Vec),
1874                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1875   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1876   if (isUpdating)
1877     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1878   CurDAG->RemoveDeadNode(N);
1879 }
1880
1881 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1882                                 const uint16_t *DOpcodes,
1883                                 const uint16_t *QOpcodes0,
1884                                 const uint16_t *QOpcodes1) {
1885   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1886   SDLoc dl(N);
1887
1888   SDValue MemAddr, Align;
1889   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1890                                    // nodes are not intrinsics.
1891   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1892   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1893   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1894     return;
1895
1896   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1897   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1898
1899   SDValue Chain = N->getOperand(0);
1900   EVT VT = N->getOperand(Vec0Idx).getValueType();
1901   bool is64BitVector = VT.is64BitVector();
1902   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1903
1904   unsigned OpcodeIndex;
1905   switch (VT.getSimpleVT().SimpleTy) {
1906   default: llvm_unreachable("unhandled vst type");
1907     // Double-register operations:
1908   case MVT::v8i8:  OpcodeIndex = 0; break;
1909   case MVT::v4f16:
1910   case MVT::v4i16: OpcodeIndex = 1; break;
1911   case MVT::v2f32:
1912   case MVT::v2i32: OpcodeIndex = 2; break;
1913   case MVT::v1i64: OpcodeIndex = 3; break;
1914     // Quad-register operations:
1915   case MVT::v16i8: OpcodeIndex = 0; break;
1916   case MVT::v8f16:
1917   case MVT::v8i16: OpcodeIndex = 1; break;
1918   case MVT::v4f32:
1919   case MVT::v4i32: OpcodeIndex = 2; break;
1920   case MVT::v2f64:
1921   case MVT::v2i64: OpcodeIndex = 3; break;
1922   }
1923
1924   std::vector<EVT> ResTys;
1925   if (isUpdating)
1926     ResTys.push_back(MVT::i32);
1927   ResTys.push_back(MVT::Other);
1928
1929   SDValue Pred = getAL(CurDAG, dl);
1930   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1931   SmallVector<SDValue, 7> Ops;
1932
1933   // Double registers and VST1/VST2 quad registers are directly supported.
1934   if (is64BitVector || NumVecs <= 2) {
1935     SDValue SrcReg;
1936     if (NumVecs == 1) {
1937       SrcReg = N->getOperand(Vec0Idx);
1938     } else if (is64BitVector) {
1939       // Form a REG_SEQUENCE to force register allocation.
1940       SDValue V0 = N->getOperand(Vec0Idx + 0);
1941       SDValue V1 = N->getOperand(Vec0Idx + 1);
1942       if (NumVecs == 2)
1943         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1944       else {
1945         SDValue V2 = N->getOperand(Vec0Idx + 2);
1946         // If it's a vst3, form a quad D-register and leave the last part as
1947         // an undef.
1948         SDValue V3 = (NumVecs == 3)
1949           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1950           : N->getOperand(Vec0Idx + 3);
1951         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1952       }
1953     } else {
1954       // Form a QQ register.
1955       SDValue Q0 = N->getOperand(Vec0Idx);
1956       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1957       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1958     }
1959
1960     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1961                     QOpcodes0[OpcodeIndex]);
1962     Ops.push_back(MemAddr);
1963     Ops.push_back(Align);
1964     if (isUpdating) {
1965       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1966       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1967       if (!IsImmUpdate) {
1968         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
1969         // check for the opcode rather than the number of vector elements.
1970         if (isVSTfixed(Opc))
1971           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1972         Ops.push_back(Inc);
1973       }
1974       // VST1/VST2 fixed increment does not need Reg0 so only include it in
1975       // the operands if not such an opcode.
1976       else if (!isVSTfixed(Opc))
1977         Ops.push_back(Reg0);
1978     }
1979     Ops.push_back(SrcReg);
1980     Ops.push_back(Pred);
1981     Ops.push_back(Reg0);
1982     Ops.push_back(Chain);
1983     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1984
1985     // Transfer memoperands.
1986     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
1987
1988     ReplaceNode(N, VSt);
1989     return;
1990   }
1991
1992   // Otherwise, quad registers are stored with two separate instructions,
1993   // where one stores the even registers and the other stores the odd registers.
1994
1995   // Form the QQQQ REG_SEQUENCE.
1996   SDValue V0 = N->getOperand(Vec0Idx + 0);
1997   SDValue V1 = N->getOperand(Vec0Idx + 1);
1998   SDValue V2 = N->getOperand(Vec0Idx + 2);
1999   SDValue V3 = (NumVecs == 3)
2000     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2001     : N->getOperand(Vec0Idx + 3);
2002   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2003
2004   // Store the even D registers.  This is always an updating store, so that it
2005   // provides the address to the second store for the odd subregs.
2006   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2007   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2008                                         MemAddr.getValueType(),
2009                                         MVT::Other, OpsA);
2010   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2011   Chain = SDValue(VStA, 1);
2012
2013   // Store the odd D registers.
2014   Ops.push_back(SDValue(VStA, 0));
2015   Ops.push_back(Align);
2016   if (isUpdating) {
2017     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2018     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2019            "only constant post-increment update allowed for VST3/4");
2020     (void)Inc;
2021     Ops.push_back(Reg0);
2022   }
2023   Ops.push_back(RegSeq);
2024   Ops.push_back(Pred);
2025   Ops.push_back(Reg0);
2026   Ops.push_back(Chain);
2027   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2028                                         Ops);
2029   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2030   ReplaceNode(N, VStB);
2031 }
2032
2033 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2034                                       unsigned NumVecs,
2035                                       const uint16_t *DOpcodes,
2036                                       const uint16_t *QOpcodes) {
2037   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2038   SDLoc dl(N);
2039
2040   SDValue MemAddr, Align;
2041   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2042                                    // nodes are not intrinsics.
2043   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2044   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2045   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2046     return;
2047
2048   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2049   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2050
2051   SDValue Chain = N->getOperand(0);
2052   unsigned Lane =
2053     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2054   EVT VT = N->getOperand(Vec0Idx).getValueType();
2055   bool is64BitVector = VT.is64BitVector();
2056
2057   unsigned Alignment = 0;
2058   if (NumVecs != 3) {
2059     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2060     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2061     if (Alignment > NumBytes)
2062       Alignment = NumBytes;
2063     if (Alignment < 8 && Alignment < NumBytes)
2064       Alignment = 0;
2065     // Alignment must be a power of two; make sure of that.
2066     Alignment = (Alignment & -Alignment);
2067     if (Alignment == 1)
2068       Alignment = 0;
2069   }
2070   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2071
2072   unsigned OpcodeIndex;
2073   switch (VT.getSimpleVT().SimpleTy) {
2074   default: llvm_unreachable("unhandled vld/vst lane type");
2075     // Double-register operations:
2076   case MVT::v8i8:  OpcodeIndex = 0; break;
2077   case MVT::v4i16: OpcodeIndex = 1; break;
2078   case MVT::v2f32:
2079   case MVT::v2i32: OpcodeIndex = 2; break;
2080     // Quad-register operations:
2081   case MVT::v8i16: OpcodeIndex = 0; break;
2082   case MVT::v4f32:
2083   case MVT::v4i32: OpcodeIndex = 1; break;
2084   }
2085
2086   std::vector<EVT> ResTys;
2087   if (IsLoad) {
2088     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2089     if (!is64BitVector)
2090       ResTyElts *= 2;
2091     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2092                                       MVT::i64, ResTyElts));
2093   }
2094   if (isUpdating)
2095     ResTys.push_back(MVT::i32);
2096   ResTys.push_back(MVT::Other);
2097
2098   SDValue Pred = getAL(CurDAG, dl);
2099   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2100
2101   SmallVector<SDValue, 8> Ops;
2102   Ops.push_back(MemAddr);
2103   Ops.push_back(Align);
2104   if (isUpdating) {
2105     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2106     bool IsImmUpdate =
2107         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2108     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2109   }
2110
2111   SDValue SuperReg;
2112   SDValue V0 = N->getOperand(Vec0Idx + 0);
2113   SDValue V1 = N->getOperand(Vec0Idx + 1);
2114   if (NumVecs == 2) {
2115     if (is64BitVector)
2116       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2117     else
2118       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2119   } else {
2120     SDValue V2 = N->getOperand(Vec0Idx + 2);
2121     SDValue V3 = (NumVecs == 3)
2122       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2123       : N->getOperand(Vec0Idx + 3);
2124     if (is64BitVector)
2125       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2126     else
2127       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2128   }
2129   Ops.push_back(SuperReg);
2130   Ops.push_back(getI32Imm(Lane, dl));
2131   Ops.push_back(Pred);
2132   Ops.push_back(Reg0);
2133   Ops.push_back(Chain);
2134
2135   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2136                                   QOpcodes[OpcodeIndex]);
2137   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2138   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2139   if (!IsLoad) {
2140     ReplaceNode(N, VLdLn);
2141     return;
2142   }
2143
2144   // Extract the subregisters.
2145   SuperReg = SDValue(VLdLn, 0);
2146   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2147                     ARM::qsub_3 == ARM::qsub_0 + 3,
2148                 "Unexpected subreg numbering");
2149   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2150   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2151     ReplaceUses(SDValue(N, Vec),
2152                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2153   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2154   if (isUpdating)
2155     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2156   CurDAG->RemoveDeadNode(N);
2157 }
2158
2159 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2160                                    bool isUpdating, unsigned NumVecs,
2161                                    const uint16_t *DOpcodes,
2162                                    const uint16_t *QOpcodes0,
2163                                    const uint16_t *QOpcodes1) {
2164   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2165   SDLoc dl(N);
2166
2167   SDValue MemAddr, Align;
2168   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2169   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2170     return;
2171
2172   SDValue Chain = N->getOperand(0);
2173   EVT VT = N->getValueType(0);
2174   bool is64BitVector = VT.is64BitVector();
2175
2176   unsigned Alignment = 0;
2177   if (NumVecs != 3) {
2178     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2179     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2180     if (Alignment > NumBytes)
2181       Alignment = NumBytes;
2182     if (Alignment < 8 && Alignment < NumBytes)
2183       Alignment = 0;
2184     // Alignment must be a power of two; make sure of that.
2185     Alignment = (Alignment & -Alignment);
2186     if (Alignment == 1)
2187       Alignment = 0;
2188   }
2189   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2190
2191   unsigned OpcodeIndex;
2192   switch (VT.getSimpleVT().SimpleTy) {
2193   default: llvm_unreachable("unhandled vld-dup type");
2194   case MVT::v8i8:
2195   case MVT::v16i8: OpcodeIndex = 0; break;
2196   case MVT::v4i16:
2197   case MVT::v8i16: OpcodeIndex = 1; break;
2198   case MVT::v2f32:
2199   case MVT::v2i32:
2200   case MVT::v4f32:
2201   case MVT::v4i32: OpcodeIndex = 2; break;
2202   case MVT::v1f64:
2203   case MVT::v1i64: OpcodeIndex = 3; break;
2204   }
2205
2206   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2207   if (!is64BitVector)
2208     ResTyElts *= 2;
2209   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2210
2211   std::vector<EVT> ResTys;
2212   ResTys.push_back(ResTy);
2213   if (isUpdating)
2214     ResTys.push_back(MVT::i32);
2215   ResTys.push_back(MVT::Other);
2216
2217   SDValue Pred = getAL(CurDAG, dl);
2218   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2219
2220   SDNode *VLdDup;
2221   if (is64BitVector || NumVecs == 1) {
2222     SmallVector<SDValue, 6> Ops;
2223     Ops.push_back(MemAddr);
2224     Ops.push_back(Align);
2225     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2226                                    QOpcodes0[OpcodeIndex];
2227     if (isUpdating) {
2228       // fixed-stride update instructions don't have an explicit writeback
2229       // operand. It's implicit in the opcode itself.
2230       SDValue Inc = N->getOperand(2);
2231       bool IsImmUpdate =
2232           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2233       if (NumVecs <= 2 && !IsImmUpdate)
2234         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2235       if (!IsImmUpdate)
2236         Ops.push_back(Inc);
2237       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2238       else if (NumVecs > 2)
2239         Ops.push_back(Reg0);
2240     }
2241     Ops.push_back(Pred);
2242     Ops.push_back(Reg0);
2243     Ops.push_back(Chain);
2244     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2245   } else if (NumVecs == 2) {
2246     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2247     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2248                                           dl, ResTys, OpsA);
2249
2250     Chain = SDValue(VLdA, 1);
2251     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2252     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2253   } else {
2254     SDValue ImplDef =
2255       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2256     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2257     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2258                                           dl, ResTys, OpsA);
2259
2260     SDValue SuperReg = SDValue(VLdA, 0);
2261     Chain = SDValue(VLdA, 1);
2262     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2263     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2264   }
2265
2266   // Transfer memoperands.
2267   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2268   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2269   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2270
2271   // Extract the subregisters.
2272   if (NumVecs == 1) {
2273     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2274   } else {
2275     SDValue SuperReg = SDValue(VLdDup, 0);
2276     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2277     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2278     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2279       ReplaceUses(SDValue(N, Vec),
2280                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2281     }
2282   }
2283   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2284   if (isUpdating)
2285     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2286   CurDAG->RemoveDeadNode(N);
2287 }
2288
2289 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2290   if (!Subtarget->hasV6T2Ops())
2291     return false;
2292
2293   unsigned Opc = isSigned
2294     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2295     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2296   SDLoc dl(N);
2297
2298   // For unsigned extracts, check for a shift right and mask
2299   unsigned And_imm = 0;
2300   if (N->getOpcode() == ISD::AND) {
2301     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2302
2303       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2304       if (And_imm & (And_imm + 1))
2305         return false;
2306
2307       unsigned Srl_imm = 0;
2308       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2309                                 Srl_imm)) {
2310         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2311
2312         // Note: The width operand is encoded as width-1.
2313         unsigned Width = countTrailingOnes(And_imm) - 1;
2314         unsigned LSB = Srl_imm;
2315
2316         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2317
2318         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2319           // It's cheaper to use a right shift to extract the top bits.
2320           if (Subtarget->isThumb()) {
2321             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2322             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2323                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2324                               getAL(CurDAG, dl), Reg0, Reg0 };
2325             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2326             return true;
2327           }
2328
2329           // ARM models shift instructions as MOVsi with shifter operand.
2330           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2331           SDValue ShOpc =
2332             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2333                                       MVT::i32);
2334           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2335                             getAL(CurDAG, dl), Reg0, Reg0 };
2336           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2337           return true;
2338         }
2339
2340         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2341         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2342                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2343                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2344                           getAL(CurDAG, dl), Reg0 };
2345         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2346         return true;
2347       }
2348     }
2349     return false;
2350   }
2351
2352   // Otherwise, we're looking for a shift of a shift
2353   unsigned Shl_imm = 0;
2354   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2355     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2356     unsigned Srl_imm = 0;
2357     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2358       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2359       // Note: The width operand is encoded as width-1.
2360       unsigned Width = 32 - Srl_imm - 1;
2361       int LSB = Srl_imm - Shl_imm;
2362       if (LSB < 0)
2363         return false;
2364       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2365       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2366       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2367                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2368                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2369                         getAL(CurDAG, dl), Reg0 };
2370       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2371       return true;
2372     }
2373   }
2374
2375   // Or we are looking for a shift of an and, with a mask operand
2376   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2377       isShiftedMask_32(And_imm)) {
2378     unsigned Srl_imm = 0;
2379     unsigned LSB = countTrailingZeros(And_imm);
2380     // Shift must be the same as the ands lsb
2381     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2382       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2383       unsigned MSB = 31 - countLeadingZeros(And_imm);
2384       // Note: The width operand is encoded as width-1.
2385       unsigned Width = MSB - LSB;
2386       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2387       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2388       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2390                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2391                         getAL(CurDAG, dl), Reg0 };
2392       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2393       return true;
2394     }
2395   }
2396
2397   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2398     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2399     unsigned LSB = 0;
2400     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2401         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2402       return false;
2403
2404     if (LSB + Width > 32)
2405       return false;
2406
2407     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2408     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2409     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2410                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2411                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2412                       getAL(CurDAG, dl), Reg0 };
2413     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2414     return true;
2415   }
2416
2417   return false;
2418 }
2419
2420 /// Target-specific DAG combining for ISD::XOR.
2421 /// Target-independent combining lowers SELECT_CC nodes of the form
2422 /// select_cc setg[ge] X,  0,  X, -X
2423 /// select_cc setgt    X, -1,  X, -X
2424 /// select_cc setl[te] X,  0, -X,  X
2425 /// select_cc setlt    X,  1, -X,  X
2426 /// which represent Integer ABS into:
2427 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2428 /// ARM instruction selection detects the latter and matches it to
2429 /// ARM::ABS or ARM::t2ABS machine node.
2430 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2431   SDValue XORSrc0 = N->getOperand(0);
2432   SDValue XORSrc1 = N->getOperand(1);
2433   EVT VT = N->getValueType(0);
2434
2435   if (Subtarget->isThumb1Only())
2436     return false;
2437
2438   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2439     return false;
2440
2441   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2442   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2443   SDValue SRASrc0 = XORSrc1.getOperand(0);
2444   SDValue SRASrc1 = XORSrc1.getOperand(1);
2445   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2446   EVT XType = SRASrc0.getValueType();
2447   unsigned Size = XType.getSizeInBits() - 1;
2448
2449   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2450       XType.isInteger() && SRAConstant != nullptr &&
2451       Size == SRAConstant->getZExtValue()) {
2452     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2453     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2454     return true;
2455   }
2456
2457   return false;
2458 }
2459
2460 /// We've got special pseudo-instructions for these
2461 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2462   unsigned Opcode;
2463   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2464   if (MemTy == MVT::i8)
2465     Opcode = ARM::CMP_SWAP_8;
2466   else if (MemTy == MVT::i16)
2467     Opcode = ARM::CMP_SWAP_16;
2468   else if (MemTy == MVT::i32)
2469     Opcode = ARM::CMP_SWAP_32;
2470   else
2471     llvm_unreachable("Unknown AtomicCmpSwap type");
2472
2473   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2474                    N->getOperand(0)};
2475   SDNode *CmpSwap = CurDAG->getMachineNode(
2476       Opcode, SDLoc(N),
2477       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2478
2479   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2480   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2481   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2482
2483   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2484   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2485   CurDAG->RemoveDeadNode(N);
2486 }
2487
2488 static Optional<std::pair<unsigned, unsigned>>
2489 getContiguousRangeOfSetBits(const APInt &A) {
2490   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2491   unsigned LastOne = A.countTrailingZeros();
2492   if (A.countPopulation() != (FirstOne - LastOne + 1))
2493     return Optional<std::pair<unsigned,unsigned>>();
2494   return std::make_pair(FirstOne, LastOne);
2495 }
2496
2497 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2498   assert(N->getOpcode() == ARMISD::CMPZ);
2499   SwitchEQNEToPLMI = false;
2500
2501   if (!Subtarget->isThumb())
2502     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2503     // LSR don't exist as standalone instructions - they need the barrel shifter.
2504     return;
2505
2506   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2507   SDValue And = N->getOperand(0);
2508   if (!And->hasOneUse())
2509     return;
2510
2511   SDValue Zero = N->getOperand(1);
2512   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2513       And->getOpcode() != ISD::AND)
2514     return;
2515   SDValue X = And.getOperand(0);
2516   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2517
2518   if (!C)
2519     return;
2520   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2521   if (!Range)
2522     return;
2523
2524   // There are several ways to lower this:
2525   SDNode *NewN;
2526   SDLoc dl(N);
2527
2528   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2529     if (Subtarget->isThumb2()) {
2530       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2531       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2532                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2533                         CurDAG->getRegister(0, MVT::i32) };
2534       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2535     } else {
2536       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2537                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2538                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2539       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2540     }
2541   };
2542
2543   if (Range->second == 0) {
2544     //  1. Mask includes the LSB -> Simply shift the top N bits off
2545     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2546     ReplaceNode(And.getNode(), NewN);
2547   } else if (Range->first == 31) {
2548     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2549     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2550     ReplaceNode(And.getNode(), NewN);
2551   } else if (Range->first == Range->second) {
2552     //  3. Only one bit is set. We can shift this into the sign bit and use a
2553     //     PL/MI comparison.
2554     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2555     ReplaceNode(And.getNode(), NewN);
2556
2557     SwitchEQNEToPLMI = true;
2558   } else if (!Subtarget->hasV6T2Ops()) {
2559     //  4. Do a double shift to clear bottom and top bits, but only in
2560     //     thumb-1 mode as in thumb-2 we can use UBFX.
2561     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2562     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2563                      Range->second + (31 - Range->first));
2564     ReplaceNode(And.getNode(), NewN);
2565   }
2566
2567 }
2568
2569 void ARMDAGToDAGISel::Select(SDNode *N) {
2570   SDLoc dl(N);
2571
2572   if (N->isMachineOpcode()) {
2573     N->setNodeId(-1);
2574     return;   // Already selected.
2575   }
2576
2577   switch (N->getOpcode()) {
2578   default: break;
2579   case ISD::WRITE_REGISTER:
2580     if (tryWriteRegister(N))
2581       return;
2582     break;
2583   case ISD::READ_REGISTER:
2584     if (tryReadRegister(N))
2585       return;
2586     break;
2587   case ISD::INLINEASM:
2588     if (tryInlineAsm(N))
2589       return;
2590     break;
2591   case ISD::XOR:
2592     // Select special operations if XOR node forms integer ABS pattern
2593     if (tryABSOp(N))
2594       return;
2595     // Other cases are autogenerated.
2596     break;
2597   case ISD::Constant: {
2598     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2599     // If we can't materialize the constant we need to use a literal pool
2600     if (ConstantMaterializationCost(Val) > 2) {
2601       SDValue CPIdx = CurDAG->getTargetConstantPool(
2602           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2603           TLI->getPointerTy(CurDAG->getDataLayout()));
2604
2605       SDNode *ResNode;
2606       if (Subtarget->isThumb()) {
2607         SDValue Ops[] = {
2608           CPIdx,
2609           getAL(CurDAG, dl),
2610           CurDAG->getRegister(0, MVT::i32),
2611           CurDAG->getEntryNode()
2612         };
2613         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2614                                          Ops);
2615       } else {
2616         SDValue Ops[] = {
2617           CPIdx,
2618           CurDAG->getTargetConstant(0, dl, MVT::i32),
2619           getAL(CurDAG, dl),
2620           CurDAG->getRegister(0, MVT::i32),
2621           CurDAG->getEntryNode()
2622         };
2623         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2624                                          Ops);
2625       }
2626       // Annotate the Node with memory operand information so that MachineInstr
2627       // queries work properly. This e.g. gives the register allocation the
2628       // required information for rematerialization.
2629       MachineFunction& MF = CurDAG->getMachineFunction();
2630       MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
2631       MemOp[0] = MF.getMachineMemOperand(
2632           MachinePointerInfo::getConstantPool(MF),
2633           MachineMemOperand::MOLoad, 4, 4);
2634
2635       cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
2636
2637       ReplaceNode(N, ResNode);
2638       return;
2639     }
2640
2641     // Other cases are autogenerated.
2642     break;
2643   }
2644   case ISD::FrameIndex: {
2645     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2646     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2647     SDValue TFI = CurDAG->getTargetFrameIndex(
2648         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2649     if (Subtarget->isThumb1Only()) {
2650       // Set the alignment of the frame object to 4, to avoid having to generate
2651       // more than one ADD
2652       MachineFrameInfo &MFI = MF->getFrameInfo();
2653       if (MFI.getObjectAlignment(FI) < 4)
2654         MFI.setObjectAlignment(FI, 4);
2655       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2656                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2657       return;
2658     } else {
2659       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2660                       ARM::t2ADDri : ARM::ADDri);
2661       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2662                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2663                         CurDAG->getRegister(0, MVT::i32) };
2664       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2665       return;
2666     }
2667   }
2668   case ISD::SRL:
2669     if (tryV6T2BitfieldExtractOp(N, false))
2670       return;
2671     break;
2672   case ISD::SIGN_EXTEND_INREG:
2673   case ISD::SRA:
2674     if (tryV6T2BitfieldExtractOp(N, true))
2675       return;
2676     break;
2677   case ISD::MUL:
2678     if (Subtarget->isThumb1Only())
2679       break;
2680     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2681       unsigned RHSV = C->getZExtValue();
2682       if (!RHSV) break;
2683       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2684         unsigned ShImm = Log2_32(RHSV-1);
2685         if (ShImm >= 32)
2686           break;
2687         SDValue V = N->getOperand(0);
2688         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2689         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2690         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2691         if (Subtarget->isThumb()) {
2692           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2693           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2694           return;
2695         } else {
2696           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2697                             Reg0 };
2698           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2699           return;
2700         }
2701       }
2702       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2703         unsigned ShImm = Log2_32(RHSV+1);
2704         if (ShImm >= 32)
2705           break;
2706         SDValue V = N->getOperand(0);
2707         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2708         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2709         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2710         if (Subtarget->isThumb()) {
2711           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2712           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2713           return;
2714         } else {
2715           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2716                             Reg0 };
2717           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2718           return;
2719         }
2720       }
2721     }
2722     break;
2723   case ISD::AND: {
2724     // Check for unsigned bitfield extract
2725     if (tryV6T2BitfieldExtractOp(N, false))
2726       return;
2727
2728     // If an immediate is used in an AND node, it is possible that the immediate
2729     // can be more optimally materialized when negated. If this is the case we
2730     // can negate the immediate and use a BIC instead.
2731     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2732     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2733       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2734
2735       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2736       // immediate can be negated and fit in the immediate operand of
2737       // a t2BIC, don't do any manual transform here as this can be
2738       // handled by the generic ISel machinery.
2739       bool PreferImmediateEncoding =
2740         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2741       if (!PreferImmediateEncoding &&
2742           ConstantMaterializationCost(Imm) >
2743               ConstantMaterializationCost(~Imm)) {
2744         // The current immediate costs more to materialize than a negated
2745         // immediate, so negate the immediate and use a BIC.
2746         SDValue NewImm =
2747           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2748         // If the new constant didn't exist before, reposition it in the topological
2749         // ordering so it is just before N. Otherwise, don't touch its location.
2750         if (NewImm->getNodeId() == -1)
2751           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2752
2753         if (!Subtarget->hasThumb2()) {
2754           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2755                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2756                            CurDAG->getRegister(0, MVT::i32)};
2757           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2758           return;
2759         } else {
2760           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2761                            CurDAG->getRegister(0, MVT::i32),
2762                            CurDAG->getRegister(0, MVT::i32)};
2763           ReplaceNode(N,
2764                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2765           return;
2766         }
2767       }
2768     }
2769
2770     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2771     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2772     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2773     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2774     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2775     EVT VT = N->getValueType(0);
2776     if (VT != MVT::i32)
2777       break;
2778     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2779       ? ARM::t2MOVTi16
2780       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2781     if (!Opc)
2782       break;
2783     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2784     N1C = dyn_cast<ConstantSDNode>(N1);
2785     if (!N1C)
2786       break;
2787     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2788       SDValue N2 = N0.getOperand(1);
2789       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2790       if (!N2C)
2791         break;
2792       unsigned N1CVal = N1C->getZExtValue();
2793       unsigned N2CVal = N2C->getZExtValue();
2794       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2795           (N1CVal & 0xffffU) == 0xffffU &&
2796           (N2CVal & 0xffffU) == 0x0U) {
2797         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2798                                                   dl, MVT::i32);
2799         SDValue Ops[] = { N0.getOperand(0), Imm16,
2800                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2801         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2802         return;
2803       }
2804     }
2805
2806     break;
2807   }
2808   case ARMISD::UMAAL: {
2809     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2810     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2811                       N->getOperand(2), N->getOperand(3),
2812                       getAL(CurDAG, dl),
2813                       CurDAG->getRegister(0, MVT::i32) };
2814     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2815     return;
2816   }
2817   case ARMISD::UMLAL:{
2818     if (Subtarget->isThumb()) {
2819       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2820                         N->getOperand(3), getAL(CurDAG, dl),
2821                         CurDAG->getRegister(0, MVT::i32)};
2822       ReplaceNode(
2823           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2824       return;
2825     }else{
2826       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2827                         N->getOperand(3), getAL(CurDAG, dl),
2828                         CurDAG->getRegister(0, MVT::i32),
2829                         CurDAG->getRegister(0, MVT::i32) };
2830       ReplaceNode(N, CurDAG->getMachineNode(
2831                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2832                          MVT::i32, MVT::i32, Ops));
2833       return;
2834     }
2835   }
2836   case ARMISD::SMLAL:{
2837     if (Subtarget->isThumb()) {
2838       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2839                         N->getOperand(3), getAL(CurDAG, dl),
2840                         CurDAG->getRegister(0, MVT::i32)};
2841       ReplaceNode(
2842           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2843       return;
2844     }else{
2845       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2846                         N->getOperand(3), getAL(CurDAG, dl),
2847                         CurDAG->getRegister(0, MVT::i32),
2848                         CurDAG->getRegister(0, MVT::i32) };
2849       ReplaceNode(N, CurDAG->getMachineNode(
2850                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2851                          MVT::i32, MVT::i32, Ops));
2852       return;
2853     }
2854   }
2855   case ARMISD::SUBE: {
2856     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2857       break;
2858     // Look for a pattern to match SMMLS
2859     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2860     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2861         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2862         !SDValue(N, 1).use_empty())
2863       break;
2864
2865     if (Subtarget->isThumb())
2866       assert(Subtarget->hasThumb2() &&
2867              "This pattern should not be generated for Thumb");
2868
2869     SDValue SmulLoHi = N->getOperand(1);
2870     SDValue Subc = N->getOperand(2);
2871     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2872
2873     if (!Zero || Zero->getZExtValue() != 0 ||
2874         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2875         N->getOperand(1) != SmulLoHi.getValue(1) ||
2876         N->getOperand(2) != Subc.getValue(1))
2877       break;
2878
2879     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2880     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2881                       N->getOperand(0), getAL(CurDAG, dl),
2882                       CurDAG->getRegister(0, MVT::i32) };
2883     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2884     return;
2885   }
2886   case ISD::LOAD: {
2887     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2888       if (tryT2IndexedLoad(N))
2889         return;
2890     } else if (Subtarget->isThumb()) {
2891       if (tryT1IndexedLoad(N))
2892         return;
2893     } else if (tryARMIndexedLoad(N))
2894       return;
2895     // Other cases are autogenerated.
2896     break;
2897   }
2898   case ARMISD::BRCOND: {
2899     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2900     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2901     // Pattern complexity = 6  cost = 1  size = 0
2902
2903     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2904     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2905     // Pattern complexity = 6  cost = 1  size = 0
2906
2907     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2908     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2909     // Pattern complexity = 6  cost = 1  size = 0
2910
2911     unsigned Opc = Subtarget->isThumb() ?
2912       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2913     SDValue Chain = N->getOperand(0);
2914     SDValue N1 = N->getOperand(1);
2915     SDValue N2 = N->getOperand(2);
2916     SDValue N3 = N->getOperand(3);
2917     SDValue InFlag = N->getOperand(4);
2918     assert(N1.getOpcode() == ISD::BasicBlock);
2919     assert(N2.getOpcode() == ISD::Constant);
2920     assert(N3.getOpcode() == ISD::Register);
2921
2922     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2923
2924     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2925       bool SwitchEQNEToPLMI;
2926       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2927       InFlag = N->getOperand(4);
2928
2929       if (SwitchEQNEToPLMI) {
2930         switch ((ARMCC::CondCodes)CC) {
2931         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2932         case ARMCC::NE:
2933           CC = (unsigned)ARMCC::MI;
2934           break;
2935         case ARMCC::EQ:
2936           CC = (unsigned)ARMCC::PL;
2937           break;
2938         }
2939       }
2940     }
2941
2942     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
2943     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2944     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2945                                              MVT::Glue, Ops);
2946     Chain = SDValue(ResNode, 0);
2947     if (N->getNumValues() == 2) {
2948       InFlag = SDValue(ResNode, 1);
2949       ReplaceUses(SDValue(N, 1), InFlag);
2950     }
2951     ReplaceUses(SDValue(N, 0),
2952                 SDValue(Chain.getNode(), Chain.getResNo()));
2953     CurDAG->RemoveDeadNode(N);
2954     return;
2955   }
2956
2957   case ARMISD::CMPZ: {
2958     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2959     //   This allows us to avoid materializing the expensive negative constant.
2960     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
2961     //   for its glue output.
2962     SDValue X = N->getOperand(0);
2963     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
2964     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
2965       int64_t Addend = -C->getSExtValue();
2966
2967       SDNode *Add = nullptr;
2968       // ADDS can be better than CMN if the immediate fits in a
2969       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
2970       // Outside that range we can just use a CMN which is 32-bit but has a
2971       // 12-bit immediate range.
2972       if (Addend < 1<<8) {
2973         if (Subtarget->isThumb2()) {
2974           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2975                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2976                             CurDAG->getRegister(0, MVT::i32) };
2977           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
2978         } else {
2979           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
2980           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
2981                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2982                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2983           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2984         }
2985       }
2986       if (Add) {
2987         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
2988         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
2989       }
2990     }
2991     // Other cases are autogenerated.
2992     break;
2993   }
2994
2995   case ARMISD::CMOV: {
2996     SDValue InFlag = N->getOperand(4);
2997
2998     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2999       bool SwitchEQNEToPLMI;
3000       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3001
3002       if (SwitchEQNEToPLMI) {
3003         SDValue ARMcc = N->getOperand(2);
3004         ARMCC::CondCodes CC =
3005           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3006
3007         switch (CC) {
3008         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3009         case ARMCC::NE:
3010           CC = ARMCC::MI;
3011           break;
3012         case ARMCC::EQ:
3013           CC = ARMCC::PL;
3014           break;
3015         }
3016         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3017         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3018                          N->getOperand(3), N->getOperand(4)};
3019         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3020       }
3021
3022     }
3023     // Other cases are autogenerated.
3024     break;
3025   }
3026
3027   case ARMISD::VZIP: {
3028     unsigned Opc = 0;
3029     EVT VT = N->getValueType(0);
3030     switch (VT.getSimpleVT().SimpleTy) {
3031     default: return;
3032     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3033     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3034     case MVT::v2f32:
3035     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3036     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3037     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3038     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3039     case MVT::v4f32:
3040     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3041     }
3042     SDValue Pred = getAL(CurDAG, dl);
3043     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3044     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3045     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3046     return;
3047   }
3048   case ARMISD::VUZP: {
3049     unsigned Opc = 0;
3050     EVT VT = N->getValueType(0);
3051     switch (VT.getSimpleVT().SimpleTy) {
3052     default: return;
3053     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3054     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3055     case MVT::v2f32:
3056     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3057     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3058     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3059     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3060     case MVT::v4f32:
3061     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3062     }
3063     SDValue Pred = getAL(CurDAG, dl);
3064     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3065     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3066     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3067     return;
3068   }
3069   case ARMISD::VTRN: {
3070     unsigned Opc = 0;
3071     EVT VT = N->getValueType(0);
3072     switch (VT.getSimpleVT().SimpleTy) {
3073     default: return;
3074     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3075     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3076     case MVT::v2f32:
3077     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3078     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3079     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3080     case MVT::v4f32:
3081     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3082     }
3083     SDValue Pred = getAL(CurDAG, dl);
3084     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3085     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3086     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3087     return;
3088   }
3089   case ARMISD::BUILD_VECTOR: {
3090     EVT VecVT = N->getValueType(0);
3091     EVT EltVT = VecVT.getVectorElementType();
3092     unsigned NumElts = VecVT.getVectorNumElements();
3093     if (EltVT == MVT::f64) {
3094       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3095       ReplaceNode(
3096           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3097       return;
3098     }
3099     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3100     if (NumElts == 2) {
3101       ReplaceNode(
3102           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3103       return;
3104     }
3105     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3106     ReplaceNode(N,
3107                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3108                                     N->getOperand(2), N->getOperand(3)));
3109     return;
3110   }
3111
3112   case ARMISD::VLD1DUP: {
3113     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3114                                          ARM::VLD1DUPd32 };
3115     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3116                                          ARM::VLD1DUPq32 };
3117     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3118     return;
3119   }
3120
3121   case ARMISD::VLD2DUP: {
3122     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3123                                         ARM::VLD2DUPd32 };
3124     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3125     return;
3126   }
3127
3128   case ARMISD::VLD3DUP: {
3129     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3130                                         ARM::VLD3DUPd16Pseudo,
3131                                         ARM::VLD3DUPd32Pseudo };
3132     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3133     return;
3134   }
3135
3136   case ARMISD::VLD4DUP: {
3137     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3138                                         ARM::VLD4DUPd16Pseudo,
3139                                         ARM::VLD4DUPd32Pseudo };
3140     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3141     return;
3142   }
3143
3144   case ARMISD::VLD1DUP_UPD: {
3145     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3146                                          ARM::VLD1DUPd16wb_fixed,
3147                                          ARM::VLD1DUPd32wb_fixed };
3148     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3149                                          ARM::VLD1DUPq16wb_fixed,
3150                                          ARM::VLD1DUPq32wb_fixed };
3151     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3152     return;
3153   }
3154
3155   case ARMISD::VLD2DUP_UPD: {
3156     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3157                                         ARM::VLD2DUPd16wb_fixed,
3158                                         ARM::VLD2DUPd32wb_fixed };
3159     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3160     return;
3161   }
3162
3163   case ARMISD::VLD3DUP_UPD: {
3164     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3165                                         ARM::VLD3DUPd16Pseudo_UPD,
3166                                         ARM::VLD3DUPd32Pseudo_UPD };
3167     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3168     return;
3169   }
3170
3171   case ARMISD::VLD4DUP_UPD: {
3172     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3173                                         ARM::VLD4DUPd16Pseudo_UPD,
3174                                         ARM::VLD4DUPd32Pseudo_UPD };
3175     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3176     return;
3177   }
3178
3179   case ARMISD::VLD1_UPD: {
3180     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3181                                          ARM::VLD1d16wb_fixed,
3182                                          ARM::VLD1d32wb_fixed,
3183                                          ARM::VLD1d64wb_fixed };
3184     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3185                                          ARM::VLD1q16wb_fixed,
3186                                          ARM::VLD1q32wb_fixed,
3187                                          ARM::VLD1q64wb_fixed };
3188     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3189     return;
3190   }
3191
3192   case ARMISD::VLD2_UPD: {
3193     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3194                                          ARM::VLD2d16wb_fixed,
3195                                          ARM::VLD2d32wb_fixed,
3196                                          ARM::VLD1q64wb_fixed};
3197     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3198                                          ARM::VLD2q16PseudoWB_fixed,
3199                                          ARM::VLD2q32PseudoWB_fixed };
3200     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3201     return;
3202   }
3203
3204   case ARMISD::VLD3_UPD: {
3205     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3206                                          ARM::VLD3d16Pseudo_UPD,
3207                                          ARM::VLD3d32Pseudo_UPD,
3208                                          ARM::VLD1d64TPseudoWB_fixed};
3209     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3210                                           ARM::VLD3q16Pseudo_UPD,
3211                                           ARM::VLD3q32Pseudo_UPD };
3212     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3213                                           ARM::VLD3q16oddPseudo_UPD,
3214                                           ARM::VLD3q32oddPseudo_UPD };
3215     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3216     return;
3217   }
3218
3219   case ARMISD::VLD4_UPD: {
3220     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3221                                          ARM::VLD4d16Pseudo_UPD,
3222                                          ARM::VLD4d32Pseudo_UPD,
3223                                          ARM::VLD1d64QPseudoWB_fixed};
3224     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3225                                           ARM::VLD4q16Pseudo_UPD,
3226                                           ARM::VLD4q32Pseudo_UPD };
3227     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3228                                           ARM::VLD4q16oddPseudo_UPD,
3229                                           ARM::VLD4q32oddPseudo_UPD };
3230     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3231     return;
3232   }
3233
3234   case ARMISD::VLD2LN_UPD: {
3235     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3236                                          ARM::VLD2LNd16Pseudo_UPD,
3237                                          ARM::VLD2LNd32Pseudo_UPD };
3238     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3239                                          ARM::VLD2LNq32Pseudo_UPD };
3240     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3241     return;
3242   }
3243
3244   case ARMISD::VLD3LN_UPD: {
3245     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3246                                          ARM::VLD3LNd16Pseudo_UPD,
3247                                          ARM::VLD3LNd32Pseudo_UPD };
3248     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3249                                          ARM::VLD3LNq32Pseudo_UPD };
3250     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3251     return;
3252   }
3253
3254   case ARMISD::VLD4LN_UPD: {
3255     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3256                                          ARM::VLD4LNd16Pseudo_UPD,
3257                                          ARM::VLD4LNd32Pseudo_UPD };
3258     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3259                                          ARM::VLD4LNq32Pseudo_UPD };
3260     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3261     return;
3262   }
3263
3264   case ARMISD::VST1_UPD: {
3265     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3266                                          ARM::VST1d16wb_fixed,
3267                                          ARM::VST1d32wb_fixed,
3268                                          ARM::VST1d64wb_fixed };
3269     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3270                                          ARM::VST1q16wb_fixed,
3271                                          ARM::VST1q32wb_fixed,
3272                                          ARM::VST1q64wb_fixed };
3273     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3274     return;
3275   }
3276
3277   case ARMISD::VST2_UPD: {
3278     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3279                                          ARM::VST2d16wb_fixed,
3280                                          ARM::VST2d32wb_fixed,
3281                                          ARM::VST1q64wb_fixed};
3282     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3283                                          ARM::VST2q16PseudoWB_fixed,
3284                                          ARM::VST2q32PseudoWB_fixed };
3285     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3286     return;
3287   }
3288
3289   case ARMISD::VST3_UPD: {
3290     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3291                                          ARM::VST3d16Pseudo_UPD,
3292                                          ARM::VST3d32Pseudo_UPD,
3293                                          ARM::VST1d64TPseudoWB_fixed};
3294     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3295                                           ARM::VST3q16Pseudo_UPD,
3296                                           ARM::VST3q32Pseudo_UPD };
3297     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3298                                           ARM::VST3q16oddPseudo_UPD,
3299                                           ARM::VST3q32oddPseudo_UPD };
3300     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3301     return;
3302   }
3303
3304   case ARMISD::VST4_UPD: {
3305     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3306                                          ARM::VST4d16Pseudo_UPD,
3307                                          ARM::VST4d32Pseudo_UPD,
3308                                          ARM::VST1d64QPseudoWB_fixed};
3309     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3310                                           ARM::VST4q16Pseudo_UPD,
3311                                           ARM::VST4q32Pseudo_UPD };
3312     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3313                                           ARM::VST4q16oddPseudo_UPD,
3314                                           ARM::VST4q32oddPseudo_UPD };
3315     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3316     return;
3317   }
3318
3319   case ARMISD::VST2LN_UPD: {
3320     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3321                                          ARM::VST2LNd16Pseudo_UPD,
3322                                          ARM::VST2LNd32Pseudo_UPD };
3323     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3324                                          ARM::VST2LNq32Pseudo_UPD };
3325     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3326     return;
3327   }
3328
3329   case ARMISD::VST3LN_UPD: {
3330     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3331                                          ARM::VST3LNd16Pseudo_UPD,
3332                                          ARM::VST3LNd32Pseudo_UPD };
3333     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3334                                          ARM::VST3LNq32Pseudo_UPD };
3335     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3336     return;
3337   }
3338
3339   case ARMISD::VST4LN_UPD: {
3340     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3341                                          ARM::VST4LNd16Pseudo_UPD,
3342                                          ARM::VST4LNd32Pseudo_UPD };
3343     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3344                                          ARM::VST4LNq32Pseudo_UPD };
3345     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3346     return;
3347   }
3348
3349   case ISD::INTRINSIC_VOID:
3350   case ISD::INTRINSIC_W_CHAIN: {
3351     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3352     switch (IntNo) {
3353     default:
3354       break;
3355
3356     case Intrinsic::arm_mrrc:
3357     case Intrinsic::arm_mrrc2: {
3358       SDLoc dl(N);
3359       SDValue Chain = N->getOperand(0);
3360       unsigned Opc;
3361
3362       if (Subtarget->isThumb())
3363         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3364       else
3365         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3366
3367       SmallVector<SDValue, 5> Ops;
3368       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3369       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3370       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3371
3372       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3373       // instruction will always be '1111' but it is possible in assembly language to specify
3374       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3375       if (Opc != ARM::MRRC2) {
3376         Ops.push_back(getAL(CurDAG, dl));
3377         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3378       }
3379
3380       Ops.push_back(Chain);
3381
3382       // Writes to two registers.
3383       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3384
3385       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3386       return;
3387     }
3388     case Intrinsic::arm_ldaexd:
3389     case Intrinsic::arm_ldrexd: {
3390       SDLoc dl(N);
3391       SDValue Chain = N->getOperand(0);
3392       SDValue MemAddr = N->getOperand(2);
3393       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3394
3395       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3396       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3397                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3398
3399       // arm_ldrexd returns a i64 value in {i32, i32}
3400       std::vector<EVT> ResTys;
3401       if (isThumb) {
3402         ResTys.push_back(MVT::i32);
3403         ResTys.push_back(MVT::i32);
3404       } else
3405         ResTys.push_back(MVT::Untyped);
3406       ResTys.push_back(MVT::Other);
3407
3408       // Place arguments in the right order.
3409       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3410                        CurDAG->getRegister(0, MVT::i32), Chain};
3411       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3412       // Transfer memoperands.
3413       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3414       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3415       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3416
3417       // Remap uses.
3418       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3419       if (!SDValue(N, 0).use_empty()) {
3420         SDValue Result;
3421         if (isThumb)
3422           Result = SDValue(Ld, 0);
3423         else {
3424           SDValue SubRegIdx =
3425             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3426           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3427               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3428           Result = SDValue(ResNode,0);
3429         }
3430         ReplaceUses(SDValue(N, 0), Result);
3431       }
3432       if (!SDValue(N, 1).use_empty()) {
3433         SDValue Result;
3434         if (isThumb)
3435           Result = SDValue(Ld, 1);
3436         else {
3437           SDValue SubRegIdx =
3438             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3439           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3440               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3441           Result = SDValue(ResNode,0);
3442         }
3443         ReplaceUses(SDValue(N, 1), Result);
3444       }
3445       ReplaceUses(SDValue(N, 2), OutChain);
3446       CurDAG->RemoveDeadNode(N);
3447       return;
3448     }
3449     case Intrinsic::arm_stlexd:
3450     case Intrinsic::arm_strexd: {
3451       SDLoc dl(N);
3452       SDValue Chain = N->getOperand(0);
3453       SDValue Val0 = N->getOperand(2);
3454       SDValue Val1 = N->getOperand(3);
3455       SDValue MemAddr = N->getOperand(4);
3456
3457       // Store exclusive double return a i32 value which is the return status
3458       // of the issued store.
3459       const EVT ResTys[] = {MVT::i32, MVT::Other};
3460
3461       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3462       // Place arguments in the right order.
3463       SmallVector<SDValue, 7> Ops;
3464       if (isThumb) {
3465         Ops.push_back(Val0);
3466         Ops.push_back(Val1);
3467       } else
3468         // arm_strexd uses GPRPair.
3469         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3470       Ops.push_back(MemAddr);
3471       Ops.push_back(getAL(CurDAG, dl));
3472       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3473       Ops.push_back(Chain);
3474
3475       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3476       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3477                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3478
3479       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3480       // Transfer memoperands.
3481       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3482       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3483       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3484
3485       ReplaceNode(N, St);
3486       return;
3487     }
3488
3489     case Intrinsic::arm_neon_vld1: {
3490       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3491                                            ARM::VLD1d32, ARM::VLD1d64 };
3492       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3493                                            ARM::VLD1q32, ARM::VLD1q64};
3494       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3495       return;
3496     }
3497
3498     case Intrinsic::arm_neon_vld1x2: {
3499       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3500                                            ARM::VLD1q32, ARM::VLD1q64 };
3501       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3502                                            ARM::VLD1d16QPseudo,
3503                                            ARM::VLD1d32QPseudo,
3504                                            ARM::VLD1d64QPseudo };
3505       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3506       return;
3507     }
3508
3509     case Intrinsic::arm_neon_vld1x3: {
3510       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3511                                            ARM::VLD1d16TPseudo,
3512                                            ARM::VLD1d32TPseudo,
3513                                            ARM::VLD1d64TPseudo };
3514       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3515                                             ARM::VLD1q16LowTPseudo_UPD,
3516                                             ARM::VLD1q32LowTPseudo_UPD,
3517                                             ARM::VLD1q64LowTPseudo_UPD };
3518       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3519                                             ARM::VLD1q16HighTPseudo,
3520                                             ARM::VLD1q32HighTPseudo,
3521                                             ARM::VLD1q64HighTPseudo };
3522       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3523       return;
3524     }
3525
3526     case Intrinsic::arm_neon_vld1x4: {
3527       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3528                                            ARM::VLD1d16QPseudo,
3529                                            ARM::VLD1d32QPseudo,
3530                                            ARM::VLD1d64QPseudo };
3531       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3532                                             ARM::VLD1q16LowQPseudo_UPD,
3533                                             ARM::VLD1q32LowQPseudo_UPD,
3534                                             ARM::VLD1q64LowQPseudo_UPD };
3535       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3536                                             ARM::VLD1q16HighQPseudo,
3537                                             ARM::VLD1q32HighQPseudo,
3538                                             ARM::VLD1q64HighQPseudo };
3539       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3540       return;
3541     }
3542
3543     case Intrinsic::arm_neon_vld2: {
3544       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3545                                            ARM::VLD2d32, ARM::VLD1q64 };
3546       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3547                                            ARM::VLD2q32Pseudo };
3548       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3549       return;
3550     }
3551
3552     case Intrinsic::arm_neon_vld3: {
3553       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3554                                            ARM::VLD3d16Pseudo,
3555                                            ARM::VLD3d32Pseudo,
3556                                            ARM::VLD1d64TPseudo };
3557       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3558                                             ARM::VLD3q16Pseudo_UPD,
3559                                             ARM::VLD3q32Pseudo_UPD };
3560       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3561                                             ARM::VLD3q16oddPseudo,
3562                                             ARM::VLD3q32oddPseudo };
3563       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3564       return;
3565     }
3566
3567     case Intrinsic::arm_neon_vld4: {
3568       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3569                                            ARM::VLD4d16Pseudo,
3570                                            ARM::VLD4d32Pseudo,
3571                                            ARM::VLD1d64QPseudo };
3572       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3573                                             ARM::VLD4q16Pseudo_UPD,
3574                                             ARM::VLD4q32Pseudo_UPD };
3575       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3576                                             ARM::VLD4q16oddPseudo,
3577                                             ARM::VLD4q32oddPseudo };
3578       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3579       return;
3580     }
3581
3582     case Intrinsic::arm_neon_vld2dup: {
3583       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3584                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3585       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3586                                             ARM::VLD2DUPq16EvenPseudo,
3587                                             ARM::VLD2DUPq32EvenPseudo };
3588       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3589                                             ARM::VLD2DUPq16OddPseudo,
3590                                             ARM::VLD2DUPq32OddPseudo };
3591       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3592                    DOpcodes, QOpcodes0, QOpcodes1);
3593       return;
3594     }
3595
3596     case Intrinsic::arm_neon_vld3dup: {
3597       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3598                                            ARM::VLD3DUPd16Pseudo,
3599                                            ARM::VLD3DUPd32Pseudo,
3600                                            ARM::VLD1d64TPseudo };
3601       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3602                                             ARM::VLD3DUPq16EvenPseudo,
3603                                             ARM::VLD3DUPq32EvenPseudo };
3604       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3605                                             ARM::VLD3DUPq16OddPseudo,
3606                                             ARM::VLD3DUPq32OddPseudo };
3607       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3608                    DOpcodes, QOpcodes0, QOpcodes1);
3609       return;
3610     }
3611
3612     case Intrinsic::arm_neon_vld4dup: {
3613       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3614                                            ARM::VLD4DUPd16Pseudo,
3615                                            ARM::VLD4DUPd32Pseudo,
3616                                            ARM::VLD1d64QPseudo };
3617       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3618                                             ARM::VLD4DUPq16EvenPseudo,
3619                                             ARM::VLD4DUPq32EvenPseudo };
3620       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3621                                             ARM::VLD4DUPq16OddPseudo,
3622                                             ARM::VLD4DUPq32OddPseudo };
3623       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3624                    DOpcodes, QOpcodes0, QOpcodes1);
3625       return;
3626     }
3627
3628     case Intrinsic::arm_neon_vld2lane: {
3629       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3630                                            ARM::VLD2LNd16Pseudo,
3631                                            ARM::VLD2LNd32Pseudo };
3632       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3633                                            ARM::VLD2LNq32Pseudo };
3634       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3635       return;
3636     }
3637
3638     case Intrinsic::arm_neon_vld3lane: {
3639       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3640                                            ARM::VLD3LNd16Pseudo,
3641                                            ARM::VLD3LNd32Pseudo };
3642       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3643                                            ARM::VLD3LNq32Pseudo };
3644       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3645       return;
3646     }
3647
3648     case Intrinsic::arm_neon_vld4lane: {
3649       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3650                                            ARM::VLD4LNd16Pseudo,
3651                                            ARM::VLD4LNd32Pseudo };
3652       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3653                                            ARM::VLD4LNq32Pseudo };
3654       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3655       return;
3656     }
3657
3658     case Intrinsic::arm_neon_vst1: {
3659       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3660                                            ARM::VST1d32, ARM::VST1d64 };
3661       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3662                                            ARM::VST1q32, ARM::VST1q64 };
3663       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3664       return;
3665     }
3666
3667     case Intrinsic::arm_neon_vst1x2: {
3668       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3669                                            ARM::VST1q32, ARM::VST1q64 };
3670       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3671                                            ARM::VST1d16QPseudo,
3672                                            ARM::VST1d32QPseudo,
3673                                            ARM::VST1d64QPseudo };
3674       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3675       return;
3676     }
3677
3678     case Intrinsic::arm_neon_vst1x3: {
3679       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3680                                            ARM::VST1d16TPseudo,
3681                                            ARM::VST1d32TPseudo,
3682                                            ARM::VST1d64TPseudo };
3683       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3684                                             ARM::VST1q16LowTPseudo_UPD,
3685                                             ARM::VST1q32LowTPseudo_UPD,
3686                                             ARM::VST1q64LowTPseudo_UPD };
3687       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3688                                             ARM::VST1q16HighTPseudo,
3689                                             ARM::VST1q32HighTPseudo,
3690                                             ARM::VST1q64HighTPseudo };
3691       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3692       return;
3693     }
3694
3695     case Intrinsic::arm_neon_vst1x4: {
3696       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3697                                            ARM::VST1d16QPseudo,
3698                                            ARM::VST1d32QPseudo,
3699                                            ARM::VST1d64QPseudo };
3700       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3701                                             ARM::VST1q16LowQPseudo_UPD,
3702                                             ARM::VST1q32LowQPseudo_UPD,
3703                                             ARM::VST1q64LowQPseudo_UPD };
3704       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3705                                             ARM::VST1q16HighQPseudo,
3706                                             ARM::VST1q32HighQPseudo,
3707                                             ARM::VST1q64HighQPseudo };
3708       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3709       return;
3710     }
3711
3712     case Intrinsic::arm_neon_vst2: {
3713       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3714                                            ARM::VST2d32, ARM::VST1q64 };
3715       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3716                                            ARM::VST2q32Pseudo };
3717       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3718       return;
3719     }
3720
3721     case Intrinsic::arm_neon_vst3: {
3722       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3723                                            ARM::VST3d16Pseudo,
3724                                            ARM::VST3d32Pseudo,
3725                                            ARM::VST1d64TPseudo };
3726       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3727                                             ARM::VST3q16Pseudo_UPD,
3728                                             ARM::VST3q32Pseudo_UPD };
3729       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3730                                             ARM::VST3q16oddPseudo,
3731                                             ARM::VST3q32oddPseudo };
3732       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3733       return;
3734     }
3735
3736     case Intrinsic::arm_neon_vst4: {
3737       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3738                                            ARM::VST4d16Pseudo,
3739                                            ARM::VST4d32Pseudo,
3740                                            ARM::VST1d64QPseudo };
3741       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3742                                             ARM::VST4q16Pseudo_UPD,
3743                                             ARM::VST4q32Pseudo_UPD };
3744       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3745                                             ARM::VST4q16oddPseudo,
3746                                             ARM::VST4q32oddPseudo };
3747       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3748       return;
3749     }
3750
3751     case Intrinsic::arm_neon_vst2lane: {
3752       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3753                                            ARM::VST2LNd16Pseudo,
3754                                            ARM::VST2LNd32Pseudo };
3755       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3756                                            ARM::VST2LNq32Pseudo };
3757       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3758       return;
3759     }
3760
3761     case Intrinsic::arm_neon_vst3lane: {
3762       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3763                                            ARM::VST3LNd16Pseudo,
3764                                            ARM::VST3LNd32Pseudo };
3765       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3766                                            ARM::VST3LNq32Pseudo };
3767       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3768       return;
3769     }
3770
3771     case Intrinsic::arm_neon_vst4lane: {
3772       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3773                                            ARM::VST4LNd16Pseudo,
3774                                            ARM::VST4LNd32Pseudo };
3775       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3776                                            ARM::VST4LNq32Pseudo };
3777       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3778       return;
3779     }
3780     }
3781     break;
3782   }
3783
3784   case ISD::ATOMIC_CMP_SWAP:
3785     SelectCMP_SWAP(N);
3786     return;
3787   }
3788
3789   SelectCode(N);
3790 }
3791
3792 // Inspect a register string of the form
3793 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3794 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3795 // and obtain the integer operands from them, adding these operands to the
3796 // provided vector.
3797 static void getIntOperandsFromRegisterString(StringRef RegString,
3798                                              SelectionDAG *CurDAG,
3799                                              const SDLoc &DL,
3800                                              std::vector<SDValue> &Ops) {
3801   SmallVector<StringRef, 5> Fields;
3802   RegString.split(Fields, ':');
3803
3804   if (Fields.size() > 1) {
3805     bool AllIntFields = true;
3806
3807     for (StringRef Field : Fields) {
3808       // Need to trim out leading 'cp' characters and get the integer field.
3809       unsigned IntField;
3810       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3811       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3812     }
3813
3814     assert(AllIntFields &&
3815             "Unexpected non-integer value in special register string.");
3816   }
3817 }
3818
3819 // Maps a Banked Register string to its mask value. The mask value returned is
3820 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3821 // mask operand, which expresses which register is to be used, e.g. r8, and in
3822 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3823 // was invalid.
3824 static inline int getBankedRegisterMask(StringRef RegString) {
3825   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3826   if (!TheReg)
3827      return -1;
3828   return TheReg->Encoding;
3829 }
3830
3831 // The flags here are common to those allowed for apsr in the A class cores and
3832 // those allowed for the special registers in the M class cores. Returns a
3833 // value representing which flags were present, -1 if invalid.
3834 static inline int getMClassFlagsMask(StringRef Flags) {
3835   return StringSwitch<int>(Flags)
3836           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3837                          // correct when flags are not permitted
3838           .Case("g", 0x1)
3839           .Case("nzcvq", 0x2)
3840           .Case("nzcvqg", 0x3)
3841           .Default(-1);
3842 }
3843
3844 // Maps MClass special registers string to its value for use in the
3845 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3846 // Returns -1 to signify that the string was invalid.
3847 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3848   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3849   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3850   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3851     return -1;
3852   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3853 }
3854
3855 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3856   // The mask operand contains the special register (R Bit) in bit 4, whether
3857   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3858   // bits 3-0 contains the fields to be accessed in the special register, set by
3859   // the flags provided with the register.
3860   int Mask = 0;
3861   if (Reg == "apsr") {
3862     // The flags permitted for apsr are the same flags that are allowed in
3863     // M class registers. We get the flag value and then shift the flags into
3864     // the correct place to combine with the mask.
3865     Mask = getMClassFlagsMask(Flags);
3866     if (Mask == -1)
3867       return -1;
3868     return Mask << 2;
3869   }
3870
3871   if (Reg != "cpsr" && Reg != "spsr") {
3872     return -1;
3873   }
3874
3875   // This is the same as if the flags were "fc"
3876   if (Flags.empty() || Flags == "all")
3877     return Mask | 0x9;
3878
3879   // Inspect the supplied flags string and set the bits in the mask for
3880   // the relevant and valid flags allowed for cpsr and spsr.
3881   for (char Flag : Flags) {
3882     int FlagVal;
3883     switch (Flag) {
3884       case 'c':
3885         FlagVal = 0x1;
3886         break;
3887       case 'x':
3888         FlagVal = 0x2;
3889         break;
3890       case 's':
3891         FlagVal = 0x4;
3892         break;
3893       case 'f':
3894         FlagVal = 0x8;
3895         break;
3896       default:
3897         FlagVal = 0;
3898     }
3899
3900     // This avoids allowing strings where the same flag bit appears twice.
3901     if (!FlagVal || (Mask & FlagVal))
3902       return -1;
3903     Mask |= FlagVal;
3904   }
3905
3906   // If the register is spsr then we need to set the R bit.
3907   if (Reg == "spsr")
3908     Mask |= 0x10;
3909
3910   return Mask;
3911 }
3912
3913 // Lower the read_register intrinsic to ARM specific DAG nodes
3914 // using the supplied metadata string to select the instruction node to use
3915 // and the registers/masks to construct as operands for the node.
3916 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3917   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3918   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3919   bool IsThumb2 = Subtarget->isThumb2();
3920   SDLoc DL(N);
3921
3922   std::vector<SDValue> Ops;
3923   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3924
3925   if (!Ops.empty()) {
3926     // If the special register string was constructed of fields (as defined
3927     // in the ACLE) then need to lower to MRC node (32 bit) or
3928     // MRRC node(64 bit), we can make the distinction based on the number of
3929     // operands we have.
3930     unsigned Opcode;
3931     SmallVector<EVT, 3> ResTypes;
3932     if (Ops.size() == 5){
3933       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3934       ResTypes.append({ MVT::i32, MVT::Other });
3935     } else {
3936       assert(Ops.size() == 3 &&
3937               "Invalid number of fields in special register string.");
3938       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3939       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3940     }
3941
3942     Ops.push_back(getAL(CurDAG, DL));
3943     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3944     Ops.push_back(N->getOperand(0));
3945     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3946     return true;
3947   }
3948
3949   std::string SpecialReg = RegString->getString().lower();
3950
3951   int BankedReg = getBankedRegisterMask(SpecialReg);
3952   if (BankedReg != -1) {
3953     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3954             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3955             N->getOperand(0) };
3956     ReplaceNode(
3957         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3958                                   DL, MVT::i32, MVT::Other, Ops));
3959     return true;
3960   }
3961
3962   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3963   // corresponding to the register that is being read from. So we switch on the
3964   // string to find which opcode we need to use.
3965   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3966                     .Case("fpscr", ARM::VMRS)
3967                     .Case("fpexc", ARM::VMRS_FPEXC)
3968                     .Case("fpsid", ARM::VMRS_FPSID)
3969                     .Case("mvfr0", ARM::VMRS_MVFR0)
3970                     .Case("mvfr1", ARM::VMRS_MVFR1)
3971                     .Case("mvfr2", ARM::VMRS_MVFR2)
3972                     .Case("fpinst", ARM::VMRS_FPINST)
3973                     .Case("fpinst2", ARM::VMRS_FPINST2)
3974                     .Default(0);
3975
3976   // If an opcode was found then we can lower the read to a VFP instruction.
3977   if (Opcode) {
3978     if (!Subtarget->hasVFP2())
3979       return false;
3980     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3981       return false;
3982
3983     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3984             N->getOperand(0) };
3985     ReplaceNode(N,
3986                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
3987     return true;
3988   }
3989
3990   // If the target is M Class then need to validate that the register string
3991   // is an acceptable value, so check that a mask can be constructed from the
3992   // string.
3993   if (Subtarget->isMClass()) {
3994     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
3995     if (SYSmValue == -1)
3996       return false;
3997
3998     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3999                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4000                       N->getOperand(0) };
4001     ReplaceNode(
4002         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4003     return true;
4004   }
4005
4006   // Here we know the target is not M Class so we need to check if it is one
4007   // of the remaining possible values which are apsr, cpsr or spsr.
4008   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4009     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4010             N->getOperand(0) };
4011     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4012                                           DL, MVT::i32, MVT::Other, Ops));
4013     return true;
4014   }
4015
4016   if (SpecialReg == "spsr") {
4017     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4018             N->getOperand(0) };
4019     ReplaceNode(
4020         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4021                                   MVT::i32, MVT::Other, Ops));
4022     return true;
4023   }
4024
4025   return false;
4026 }
4027
4028 // Lower the write_register intrinsic to ARM specific DAG nodes
4029 // using the supplied metadata string to select the instruction node to use
4030 // and the registers/masks to use in the nodes
4031 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4032   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4033   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4034   bool IsThumb2 = Subtarget->isThumb2();
4035   SDLoc DL(N);
4036
4037   std::vector<SDValue> Ops;
4038   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4039
4040   if (!Ops.empty()) {
4041     // If the special register string was constructed of fields (as defined
4042     // in the ACLE) then need to lower to MCR node (32 bit) or
4043     // MCRR node(64 bit), we can make the distinction based on the number of
4044     // operands we have.
4045     unsigned Opcode;
4046     if (Ops.size() == 5) {
4047       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4048       Ops.insert(Ops.begin()+2, N->getOperand(2));
4049     } else {
4050       assert(Ops.size() == 3 &&
4051               "Invalid number of fields in special register string.");
4052       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4053       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4054       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4055     }
4056
4057     Ops.push_back(getAL(CurDAG, DL));
4058     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4059     Ops.push_back(N->getOperand(0));
4060
4061     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4062     return true;
4063   }
4064
4065   std::string SpecialReg = RegString->getString().lower();
4066   int BankedReg = getBankedRegisterMask(SpecialReg);
4067   if (BankedReg != -1) {
4068     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4069             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4070             N->getOperand(0) };
4071     ReplaceNode(
4072         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4073                                   DL, MVT::Other, Ops));
4074     return true;
4075   }
4076
4077   // The VFP registers are written to by creating SelectionDAG nodes with
4078   // opcodes corresponding to the register that is being written. So we switch
4079   // on the string to find which opcode we need to use.
4080   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4081                     .Case("fpscr", ARM::VMSR)
4082                     .Case("fpexc", ARM::VMSR_FPEXC)
4083                     .Case("fpsid", ARM::VMSR_FPSID)
4084                     .Case("fpinst", ARM::VMSR_FPINST)
4085                     .Case("fpinst2", ARM::VMSR_FPINST2)
4086                     .Default(0);
4087
4088   if (Opcode) {
4089     if (!Subtarget->hasVFP2())
4090       return false;
4091     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4092             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4093     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4094     return true;
4095   }
4096
4097   std::pair<StringRef, StringRef> Fields;
4098   Fields = StringRef(SpecialReg).rsplit('_');
4099   std::string Reg = Fields.first.str();
4100   StringRef Flags = Fields.second;
4101
4102   // If the target was M Class then need to validate the special register value
4103   // and retrieve the mask for use in the instruction node.
4104   if (Subtarget->isMClass()) {
4105     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4106     if (SYSmValue == -1)
4107       return false;
4108
4109     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4110                       N->getOperand(2), getAL(CurDAG, DL),
4111                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4112     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4113     return true;
4114   }
4115
4116   // We then check to see if a valid mask can be constructed for one of the
4117   // register string values permitted for the A and R class cores. These values
4118   // are apsr, spsr and cpsr; these are also valid on older cores.
4119   int Mask = getARClassRegisterMask(Reg, Flags);
4120   if (Mask != -1) {
4121     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4122             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4123             N->getOperand(0) };
4124     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4125                                           DL, MVT::Other, Ops));
4126     return true;
4127   }
4128
4129   return false;
4130 }
4131
4132 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4133   std::vector<SDValue> AsmNodeOperands;
4134   unsigned Flag, Kind;
4135   bool Changed = false;
4136   unsigned NumOps = N->getNumOperands();
4137
4138   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4139   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4140   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4141   // respectively. Since there is no constraint to explicitly specify a
4142   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4143   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4144   // them into a GPRPair.
4145
4146   SDLoc dl(N);
4147   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4148                                    : SDValue(nullptr,0);
4149
4150   SmallVector<bool, 8> OpChanged;
4151   // Glue node will be appended late.
4152   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4153     SDValue op = N->getOperand(i);
4154     AsmNodeOperands.push_back(op);
4155
4156     if (i < InlineAsm::Op_FirstOperand)
4157       continue;
4158
4159     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4160       Flag = C->getZExtValue();
4161       Kind = InlineAsm::getKind(Flag);
4162     }
4163     else
4164       continue;
4165
4166     // Immediate operands to inline asm in the SelectionDAG are modeled with
4167     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4168     // the second is a constant with the value of the immediate. If we get here
4169     // and we have a Kind_Imm, skip the next operand, and continue.
4170     if (Kind == InlineAsm::Kind_Imm) {
4171       SDValue op = N->getOperand(++i);
4172       AsmNodeOperands.push_back(op);
4173       continue;
4174     }
4175
4176     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4177     if (NumRegs)
4178       OpChanged.push_back(false);
4179
4180     unsigned DefIdx = 0;
4181     bool IsTiedToChangedOp = false;
4182     // If it's a use that is tied with a previous def, it has no
4183     // reg class constraint.
4184     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4185       IsTiedToChangedOp = OpChanged[DefIdx];
4186
4187     // Memory operands to inline asm in the SelectionDAG are modeled with two
4188     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4189     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4190     // it doesn't get misinterpreted), and continue. We do this here because
4191     // it's important to update the OpChanged array correctly before moving on.
4192     if (Kind == InlineAsm::Kind_Mem) {
4193       SDValue op = N->getOperand(++i);
4194       AsmNodeOperands.push_back(op);
4195       continue;
4196     }
4197
4198     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4199         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4200       continue;
4201
4202     unsigned RC;
4203     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4204     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4205         || NumRegs != 2)
4206       continue;
4207
4208     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4209     SDValue V0 = N->getOperand(i+1);
4210     SDValue V1 = N->getOperand(i+2);
4211     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4212     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4213     SDValue PairedReg;
4214     MachineRegisterInfo &MRI = MF->getRegInfo();
4215
4216     if (Kind == InlineAsm::Kind_RegDef ||
4217         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4218       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4219       // the original GPRs.
4220
4221       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4222       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4223       SDValue Chain = SDValue(N,0);
4224
4225       SDNode *GU = N->getGluedUser();
4226       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4227                                                Chain.getValue(1));
4228
4229       // Extract values from a GPRPair reg and copy to the original GPR reg.
4230       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4231                                                     RegCopy);
4232       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4233                                                     RegCopy);
4234       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4235                                         RegCopy.getValue(1));
4236       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4237
4238       // Update the original glue user.
4239       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4240       Ops.push_back(T1.getValue(1));
4241       CurDAG->UpdateNodeOperands(GU, Ops);
4242     }
4243     else {
4244       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4245       // GPRPair and then pass the GPRPair to the inline asm.
4246       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4247
4248       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4249       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4250                                           Chain.getValue(1));
4251       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4252                                           T0.getValue(1));
4253       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4254
4255       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4256       // i32 VRs of inline asm with it.
4257       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4258       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4259       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4260
4261       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4262       Glue = Chain.getValue(1);
4263     }
4264
4265     Changed = true;
4266
4267     if(PairedReg.getNode()) {
4268       OpChanged[OpChanged.size() -1 ] = true;
4269       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4270       if (IsTiedToChangedOp)
4271         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4272       else
4273         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4274       // Replace the current flag.
4275       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4276           Flag, dl, MVT::i32);
4277       // Add the new register node and skip the original two GPRs.
4278       AsmNodeOperands.push_back(PairedReg);
4279       // Skip the next two GPRs.
4280       i += 2;
4281     }
4282   }
4283
4284   if (Glue.getNode())
4285     AsmNodeOperands.push_back(Glue);
4286   if (!Changed)
4287     return false;
4288
4289   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4290       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4291   New->setNodeId(-1);
4292   ReplaceNode(N, New.getNode());
4293   return true;
4294 }
4295
4296
4297 bool ARMDAGToDAGISel::
4298 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4299                              std::vector<SDValue> &OutOps) {
4300   switch(ConstraintID) {
4301   default:
4302     llvm_unreachable("Unexpected asm memory constraint");
4303   case InlineAsm::Constraint_i:
4304     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4305     //        be an immediate and not a memory constraint.
4306     LLVM_FALLTHROUGH;
4307   case InlineAsm::Constraint_m:
4308   case InlineAsm::Constraint_o:
4309   case InlineAsm::Constraint_Q:
4310   case InlineAsm::Constraint_Um:
4311   case InlineAsm::Constraint_Un:
4312   case InlineAsm::Constraint_Uq:
4313   case InlineAsm::Constraint_Us:
4314   case InlineAsm::Constraint_Ut:
4315   case InlineAsm::Constraint_Uv:
4316   case InlineAsm::Constraint_Uy:
4317     // Require the address to be in a register.  That is safe for all ARM
4318     // variants and it is hard to do anything much smarter without knowing
4319     // how the operand is used.
4320     OutOps.push_back(Op);
4321     return false;
4322   }
4323   return true;
4324 }
4325
4326 /// createARMISelDag - This pass converts a legalized DAG into a
4327 /// ARM-specific DAG, ready for instruction scheduling.
4328 ///
4329 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4330                                      CodeGenOpt::Level OptLevel) {
4331   return new ARMDAGToDAGISel(TM, OptLevel);
4332 }