contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 232   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 233   /// generated to force the table registers to be consecutive.
 234   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 235
 236   /// Try to select SBFX/UBFX instructions for ARM.
 237   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 238
 239   // Select special operations if node forms integer ABS pattern
 240   bool tryABSOp(SDNode *N);
 241
 242   bool tryReadRegister(SDNode *N);
 243   bool tryWriteRegister(SDNode *N);
 244
 245   bool tryInlineAsm(SDNode *N);
 246
 247   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 248
 249   void SelectCMP_SWAP(SDNode *N);
 250
 251   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 252   /// inline asm expressions.
 253   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 254                                     std::vector<SDValue> &OutOps) override;
 255
 256   // Form pairs of consecutive R, S, D, or Q registers.
 257   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 258   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 259   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 260   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 261
 262   // Form sequences of 4 consecutive S, D, or Q registers.
 263   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 264   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 265   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 266
 267   // Get the alignment operand for a NEON VLD or VST instruction.
 268   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 269                         bool is64BitVector);
 270
 271   /// Returns the number of instructions required to materialize the given
 272   /// constant in a register, or 3 if a literal pool load is needed.
 273   unsigned ConstantMaterializationCost(unsigned Val) const;
 274
 275   /// Checks if N is a multiplication by a constant where we can extract out a
 276   /// power of two from the constant so that it can be used in a shift, but only
 277   /// if it simplifies the materialization of the constant. Returns true if it
 278   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 279   /// out and to NewMulConst the new constant to be multiplied by.
 280   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 281                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 282
 283   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 284   /// selected when N would have been selected.
 285   void replaceDAGValue(const SDValue &N, SDValue M);
 286 };
 287 }
 288
 289 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 290 /// operand. If so Imm will receive the 32-bit value.
 291 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 292   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 293     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 294     return true;
 295   }
 296   return false;
 297 }
 298
 299 // isInt32Immediate - This method tests to see if a constant operand.
 300 // If so Imm will receive the 32 bit value.
 301 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 302   return isInt32Immediate(N.getNode(), Imm);
 303 }
 304
 305 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 306 // opcode and that it has a immediate integer right operand.
 307 // If so Imm will receive the 32 bit value.
 308 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 309   return N->getOpcode() == Opc &&
 310          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 311 }
 312
 313 /// \brief Check whether a particular node is a constant value representable as
 314 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 315 ///
 316 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 317 static bool isScaledConstantInRange(SDValue Node, int Scale,
 318                                     int RangeMin, int RangeMax,
 319                                     int &ScaledConstant) {
 320   assert(Scale > 0 && "Invalid scale!");
 321
 322   // Check that this is a constant.
 323   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 324   if (!C)
 325     return false;
 326
 327   ScaledConstant = (int) C->getZExtValue();
 328   if ((ScaledConstant % Scale) != 0)
 329     return false;
 330
 331   ScaledConstant /= Scale;
 332   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 333 }
 334
 335 void ARMDAGToDAGISel::PreprocessISelDAG() {
 336   if (!Subtarget->hasV6T2Ops())
 337     return;
 338
 339   bool isThumb2 = Subtarget->isThumb();
 340   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 341        E = CurDAG->allnodes_end(); I != E; ) {
 342     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 343
 344     if (N->getOpcode() != ISD::ADD)
 345       continue;
 346
 347     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 348     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 349     // trailing zeros, e.g. 1020.
 350     // Transform the expression to
 351     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 352     // of trailing zeros of c2. The left shift would be folded as an shifter
 353     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 354     // node (UBFX).
 355
 356     SDValue N0 = N->getOperand(0);
 357     SDValue N1 = N->getOperand(1);
 358     unsigned And_imm = 0;
 359     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 360       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 361         std::swap(N0, N1);
 362     }
 363     if (!And_imm)
 364       continue;
 365
 366     // Check if the AND mask is an immediate of the form: 000.....1111111100
 367     unsigned TZ = countTrailingZeros(And_imm);
 368     if (TZ != 1 && TZ != 2)
 369       // Be conservative here. Shifter operands aren't always free. e.g. On
 370       // Swift, left shifter operand of 1 / 2 for free but others are not.
 371       // e.g.
 372       //  ubfx   r3, r1, #16, #8
 373       //  ldr.w  r3, [r0, r3, lsl #2]
 374       // vs.
 375       //  mov.w  r9, #1020
 376       //  and.w  r2, r9, r1, lsr #14
 377       //  ldr    r2, [r0, r2]
 378       continue;
 379     And_imm >>= TZ;
 380     if (And_imm & (And_imm + 1))
 381       continue;
 382
 383     // Look for (and (srl X, c1), c2).
 384     SDValue Srl = N1.getOperand(0);
 385     unsigned Srl_imm = 0;
 386     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 387         (Srl_imm <= 2))
 388       continue;
 389
 390     // Make sure first operand is not a shifter operand which would prevent
 391     // folding of the left shift.
 392     SDValue CPTmp0;
 393     SDValue CPTmp1;
 394     SDValue CPTmp2;
 395     if (isThumb2) {
 396       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 397         continue;
 398     } else {
 399       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 400           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 401         continue;
 402     }
 403
 404     // Now make the transformation.
 405     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 406                           Srl.getOperand(0),
 407                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 408                                               MVT::i32));
 409     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 410                          Srl,
 411                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 412     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 413                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 414     CurDAG->UpdateNodeOperands(N, N0, N1);
 415   }
 416 }
 417
 418 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 419 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 420 /// least on current ARM implementations) which should be avoidded.
 421 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 422   if (OptLevel == CodeGenOpt::None)
 423     return true;
 424
 425   if (!Subtarget->hasVMLxHazards())
 426     return true;
 427
 428   if (!N->hasOneUse())
 429     return false;
 430
 431   SDNode *Use = *N->use_begin();
 432   if (Use->getOpcode() == ISD::CopyToReg)
 433     return true;
 434   if (Use->isMachineOpcode()) {
 435     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 436         CurDAG->getSubtarget().getInstrInfo());
 437
 438     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 439     if (MCID.mayStore())
 440       return true;
 441     unsigned Opcode = MCID.getOpcode();
 442     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 443       return true;
 444     // vmlx feeding into another vmlx. We actually want to unfold
 445     // the use later in the MLxExpansion pass. e.g.
 446     // vmla
 447     // vmla (stall 8 cycles)
 448     //
 449     // vmul (5 cycles)
 450     // vadd (5 cycles)
 451     // vmla
 452     // This adds up to about 18 - 19 cycles.
 453     //
 454     // vmla
 455     // vmul (stall 4 cycles)
 456     // vadd adds up to about 14 cycles.
 457     return TII->isFpMLxInstruction(Opcode);
 458   }
 459
 460   return false;
 461 }
 462
 463 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 464                                             ARM_AM::ShiftOpc ShOpcVal,
 465                                             unsigned ShAmt) {
 466   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 467     return true;
 468   if (Shift.hasOneUse())
 469     return true;
 470   // R << 2 is free.
 471   return ShOpcVal == ARM_AM::lsl &&
 472          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 473 }
 474
 475 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 476   if (Subtarget->isThumb()) {
 477     if (Val <= 255) return 1;                               // MOV
 478     if (Subtarget->hasV6T2Ops() &&
 479         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 480       return 1; // MOVW
 481     if (Val <= 510) return 2;                               // MOV + ADDi8
 482     if (~Val <= 255) return 2;                              // MOV + MVN
 483     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 484   } else {
 485     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 486     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 487     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 488     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 489   }
 490   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 491   return 3; // Literal pool load
 492 }
 493
 494 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 495                                              unsigned MaxShift,
 496                                              unsigned &PowerOfTwo,
 497                                              SDValue &NewMulConst) const {
 498   assert(N.getOpcode() == ISD::MUL);
 499   assert(MaxShift > 0);
 500
 501   // If the multiply is used in more than one place then changing the constant
 502   // will make other uses incorrect, so don't.
 503   if (!N.hasOneUse()) return false;
 504   // Check if the multiply is by a constant
 505   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 506   if (!MulConst) return false;
 507   // If the constant is used in more than one place then modifying it will mean
 508   // we need to materialize two constants instead of one, which is a bad idea.
 509   if (!MulConst->hasOneUse()) return false;
 510   unsigned MulConstVal = MulConst->getZExtValue();
 511   if (MulConstVal == 0) return false;
 512
 513   // Find the largest power of 2 that MulConstVal is a multiple of
 514   PowerOfTwo = MaxShift;
 515   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 516     --PowerOfTwo;
 517     if (PowerOfTwo == 0) return false;
 518   }
 519
 520   // Only optimise if the new cost is better
 521   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 522   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 523   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 524   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 525   return NewCost < OldCost;
 526 }
 527
 528 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 529   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 530   CurDAG->ReplaceAllUsesWith(N, M);
 531 }
 532
 533 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 534                                               SDValue &BaseReg,
 535                                               SDValue &Opc,
 536                                               bool CheckProfitability) {
 537   if (DisableShifterOp)
 538     return false;
 539
 540   // If N is a multiply-by-constant and it's profitable to extract a shift and
 541   // use it in a shifted operand do so.
 542   if (N.getOpcode() == ISD::MUL) {
 543     unsigned PowerOfTwo = 0;
 544     SDValue NewMulConst;
 545     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 546       HandleSDNode Handle(N);
 547       replaceDAGValue(N.getOperand(1), NewMulConst);
 548       BaseReg = Handle.getValue();
 549       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 550                                                           PowerOfTwo),
 551                                       SDLoc(N), MVT::i32);
 552       return true;
 553     }
 554   }
 555
 556   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 557
 558   // Don't match base register only case. That is matched to a separate
 559   // lower complexity pattern with explicit register operand.
 560   if (ShOpcVal == ARM_AM::no_shift) return false;
 561
 562   BaseReg = N.getOperand(0);
 563   unsigned ShImmVal = 0;
 564   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 565   if (!RHS) return false;
 566   ShImmVal = RHS->getZExtValue() & 31;
 567   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 568                                   SDLoc(N), MVT::i32);
 569   return true;
 570 }
 571
 572 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 573                                               SDValue &BaseReg,
 574                                               SDValue &ShReg,
 575                                               SDValue &Opc,
 576                                               bool CheckProfitability) {
 577   if (DisableShifterOp)
 578     return false;
 579
 580   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 581
 582   // Don't match base register only case. That is matched to a separate
 583   // lower complexity pattern with explicit register operand.
 584   if (ShOpcVal == ARM_AM::no_shift) return false;
 585
 586   BaseReg = N.getOperand(0);
 587   unsigned ShImmVal = 0;
 588   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 589   if (RHS) return false;
 590
 591   ShReg = N.getOperand(1);
 592   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 593     return false;
 594   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 595                                   SDLoc(N), MVT::i32);
 596   return true;
 597 }
 598
 599
 600 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 601                                           SDValue &Base,
 602                                           SDValue &OffImm) {
 603   // Match simple R + imm12 operands.
 604
 605   // Base only.
 606   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 607       !CurDAG->isBaseWithConstantOffset(N)) {
 608     if (N.getOpcode() == ISD::FrameIndex) {
 609       // Match frame index.
 610       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 611       Base = CurDAG->getTargetFrameIndex(
 612           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 613       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 614       return true;
 615     }
 616
 617     if (N.getOpcode() == ARMISD::Wrapper &&
 618         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 619         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 620         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 621       Base = N.getOperand(0);
 622     } else
 623       Base = N;
 624     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 625     return true;
 626   }
 627
 628   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 629     int RHSC = (int)RHS->getSExtValue();
 630     if (N.getOpcode() == ISD::SUB)
 631       RHSC = -RHSC;
 632
 633     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 634       Base   = N.getOperand(0);
 635       if (Base.getOpcode() == ISD::FrameIndex) {
 636         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 637         Base = CurDAG->getTargetFrameIndex(
 638             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 639       }
 640       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 641       return true;
 642     }
 643   }
 644
 645   // Base only.
 646   Base = N;
 647   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 648   return true;
 649 }
 650
 651
 652
 653 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 654                                       SDValue &Opc) {
 655   if (N.getOpcode() == ISD::MUL &&
 656       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 657     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 658       // X * [3,5,9] -> X + X * [2,4,8] etc.
 659       int RHSC = (int)RHS->getZExtValue();
 660       if (RHSC & 1) {
 661         RHSC = RHSC & ~1;
 662         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 663         if (RHSC < 0) {
 664           AddSub = ARM_AM::sub;
 665           RHSC = - RHSC;
 666         }
 667         if (isPowerOf2_32(RHSC)) {
 668           unsigned ShAmt = Log2_32(RHSC);
 669           Base = Offset = N.getOperand(0);
 670           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 671                                                             ARM_AM::lsl),
 672                                           SDLoc(N), MVT::i32);
 673           return true;
 674         }
 675       }
 676     }
 677   }
 678
 679   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 680       // ISD::OR that is equivalent to an ISD::ADD.
 681       !CurDAG->isBaseWithConstantOffset(N))
 682     return false;
 683
 684   // Leave simple R +/- imm12 operands for LDRi12
 685   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 686     int RHSC;
 687     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 688                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 689       return false;
 690   }
 691
 692   // Otherwise this is R +/- [possibly shifted] R.
 693   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 694   ARM_AM::ShiftOpc ShOpcVal =
 695     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 696   unsigned ShAmt = 0;
 697
 698   Base   = N.getOperand(0);
 699   Offset = N.getOperand(1);
 700
 701   if (ShOpcVal != ARM_AM::no_shift) {
 702     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 703     // it.
 704     if (ConstantSDNode *Sh =
 705            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 706       ShAmt = Sh->getZExtValue();
 707       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 708         Offset = N.getOperand(1).getOperand(0);
 709       else {
 710         ShAmt = 0;
 711         ShOpcVal = ARM_AM::no_shift;
 712       }
 713     } else {
 714       ShOpcVal = ARM_AM::no_shift;
 715     }
 716   }
 717
 718   // Try matching (R shl C) + (R).
 719   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 720       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 721         N.getOperand(0).hasOneUse())) {
 722     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 723     if (ShOpcVal != ARM_AM::no_shift) {
 724       // Check to see if the RHS of the shift is a constant, if not, we can't
 725       // fold it.
 726       if (ConstantSDNode *Sh =
 727           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 728         ShAmt = Sh->getZExtValue();
 729         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 730           Offset = N.getOperand(0).getOperand(0);
 731           Base = N.getOperand(1);
 732         } else {
 733           ShAmt = 0;
 734           ShOpcVal = ARM_AM::no_shift;
 735         }
 736       } else {
 737         ShOpcVal = ARM_AM::no_shift;
 738       }
 739     }
 740   }
 741
 742   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 743   // and use it in a shifted operand do so.
 744   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 745     unsigned PowerOfTwo = 0;
 746     SDValue NewMulConst;
 747     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 748       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 749       ShAmt = PowerOfTwo;
 750       ShOpcVal = ARM_AM::lsl;
 751     }
 752   }
 753
 754   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 755                                   SDLoc(N), MVT::i32);
 756   return true;
 757 }
 758
 759
 760 //-----
 761
 762 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 763                                                      SDValue &Base,
 764                                                      SDValue &Offset,
 765                                                      SDValue &Opc) {
 766   if (N.getOpcode() == ISD::MUL &&
 767       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 768     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 769       // X * [3,5,9] -> X + X * [2,4,8] etc.
 770       int RHSC = (int)RHS->getZExtValue();
 771       if (RHSC & 1) {
 772         RHSC = RHSC & ~1;
 773         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 774         if (RHSC < 0) {
 775           AddSub = ARM_AM::sub;
 776           RHSC = - RHSC;
 777         }
 778         if (isPowerOf2_32(RHSC)) {
 779           unsigned ShAmt = Log2_32(RHSC);
 780           Base = Offset = N.getOperand(0);
 781           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 782                                                             ARM_AM::lsl),
 783                                           SDLoc(N), MVT::i32);
 784           return AM2_SHOP;
 785         }
 786       }
 787     }
 788   }
 789
 790   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 791       // ISD::OR that is equivalent to an ADD.
 792       !CurDAG->isBaseWithConstantOffset(N)) {
 793     Base = N;
 794     if (N.getOpcode() == ISD::FrameIndex) {
 795       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 796       Base = CurDAG->getTargetFrameIndex(
 797           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 798     } else if (N.getOpcode() == ARMISD::Wrapper &&
 799                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 800                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 801                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 802       Base = N.getOperand(0);
 803     }
 804     Offset = CurDAG->getRegister(0, MVT::i32);
 805     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 806                                                       ARM_AM::no_shift),
 807                                     SDLoc(N), MVT::i32);
 808     return AM2_BASE;
 809   }
 810
 811   // Match simple R +/- imm12 operands.
 812   if (N.getOpcode() != ISD::SUB) {
 813     int RHSC;
 814     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 815                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 816       Base = N.getOperand(0);
 817       if (Base.getOpcode() == ISD::FrameIndex) {
 818         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 819         Base = CurDAG->getTargetFrameIndex(
 820             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 821       }
 822       Offset = CurDAG->getRegister(0, MVT::i32);
 823
 824       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 825       if (RHSC < 0) {
 826         AddSub = ARM_AM::sub;
 827         RHSC = - RHSC;
 828       }
 829       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 830                                                         ARM_AM::no_shift),
 831                                       SDLoc(N), MVT::i32);
 832       return AM2_BASE;
 833     }
 834   }
 835
 836   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 837     // Compute R +/- (R << N) and reuse it.
 838     Base = N;
 839     Offset = CurDAG->getRegister(0, MVT::i32);
 840     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 841                                                       ARM_AM::no_shift),
 842                                     SDLoc(N), MVT::i32);
 843     return AM2_BASE;
 844   }
 845
 846   // Otherwise this is R +/- [possibly shifted] R.
 847   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 848   ARM_AM::ShiftOpc ShOpcVal =
 849     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 850   unsigned ShAmt = 0;
 851
 852   Base   = N.getOperand(0);
 853   Offset = N.getOperand(1);
 854
 855   if (ShOpcVal != ARM_AM::no_shift) {
 856     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 857     // it.
 858     if (ConstantSDNode *Sh =
 859            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 860       ShAmt = Sh->getZExtValue();
 861       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 862         Offset = N.getOperand(1).getOperand(0);
 863       else {
 864         ShAmt = 0;
 865         ShOpcVal = ARM_AM::no_shift;
 866       }
 867     } else {
 868       ShOpcVal = ARM_AM::no_shift;
 869     }
 870   }
 871
 872   // Try matching (R shl C) + (R).
 873   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 874       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 875         N.getOperand(0).hasOneUse())) {
 876     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 877     if (ShOpcVal != ARM_AM::no_shift) {
 878       // Check to see if the RHS of the shift is a constant, if not, we can't
 879       // fold it.
 880       if (ConstantSDNode *Sh =
 881           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 882         ShAmt = Sh->getZExtValue();
 883         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 884           Offset = N.getOperand(0).getOperand(0);
 885           Base = N.getOperand(1);
 886         } else {
 887           ShAmt = 0;
 888           ShOpcVal = ARM_AM::no_shift;
 889         }
 890       } else {
 891         ShOpcVal = ARM_AM::no_shift;
 892       }
 893     }
 894   }
 895
 896   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 897                                   SDLoc(N), MVT::i32);
 898   return AM2_SHOP;
 899 }
 900
 901 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 902                                             SDValue &Offset, SDValue &Opc) {
 903   unsigned Opcode = Op->getOpcode();
 904   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 905     ? cast<LoadSDNode>(Op)->getAddressingMode()
 906     : cast<StoreSDNode>(Op)->getAddressingMode();
 907   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 908     ? ARM_AM::add : ARM_AM::sub;
 909   int Val;
 910   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 911     return false;
 912
 913   Offset = N;
 914   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 915   unsigned ShAmt = 0;
 916   if (ShOpcVal != ARM_AM::no_shift) {
 917     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 918     // it.
 919     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 920       ShAmt = Sh->getZExtValue();
 921       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 922         Offset = N.getOperand(0);
 923       else {
 924         ShAmt = 0;
 925         ShOpcVal = ARM_AM::no_shift;
 926       }
 927     } else {
 928       ShOpcVal = ARM_AM::no_shift;
 929     }
 930   }
 931
 932   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 933                                   SDLoc(N), MVT::i32);
 934   return true;
 935 }
 936
 937 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 938                                             SDValue &Offset, SDValue &Opc) {
 939   unsigned Opcode = Op->getOpcode();
 940   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 941     ? cast<LoadSDNode>(Op)->getAddressingMode()
 942     : cast<StoreSDNode>(Op)->getAddressingMode();
 943   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 944     ? ARM_AM::add : ARM_AM::sub;
 945   int Val;
 946   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 947     if (AddSub == ARM_AM::sub) Val *= -1;
 948     Offset = CurDAG->getRegister(0, MVT::i32);
 949     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 950     return true;
 951   }
 952
 953   return false;
 954 }
 955
 956
 957 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 958                                             SDValue &Offset, SDValue &Opc) {
 959   unsigned Opcode = Op->getOpcode();
 960   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 961     ? cast<LoadSDNode>(Op)->getAddressingMode()
 962     : cast<StoreSDNode>(Op)->getAddressingMode();
 963   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 964     ? ARM_AM::add : ARM_AM::sub;
 965   int Val;
 966   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 967     Offset = CurDAG->getRegister(0, MVT::i32);
 968     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 969                                                       ARM_AM::no_shift),
 970                                     SDLoc(Op), MVT::i32);
 971     return true;
 972   }
 973
 974   return false;
 975 }
 976
 977 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 978   Base = N;
 979   return true;
 980 }
 981
 982 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 983                                       SDValue &Base, SDValue &Offset,
 984                                       SDValue &Opc) {
 985   if (N.getOpcode() == ISD::SUB) {
 986     // X - C  is canonicalize to X + -C, no need to handle it here.
 987     Base = N.getOperand(0);
 988     Offset = N.getOperand(1);
 989     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 990                                     MVT::i32);
 991     return true;
 992   }
 993
 994   if (!CurDAG->isBaseWithConstantOffset(N)) {
 995     Base = N;
 996     if (N.getOpcode() == ISD::FrameIndex) {
 997       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 998       Base = CurDAG->getTargetFrameIndex(
 999           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1000     }
1001     Offset = CurDAG->getRegister(0, MVT::i32);
1002     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1003                                     MVT::i32);
1004     return true;
1005   }
1006
1007   // If the RHS is +/- imm8, fold into addr mode.
1008   int RHSC;
1009   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1010                               -256 + 1, 256, RHSC)) { // 8 bits.
1011     Base = N.getOperand(0);
1012     if (Base.getOpcode() == ISD::FrameIndex) {
1013       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014       Base = CurDAG->getTargetFrameIndex(
1015           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1016     }
1017     Offset = CurDAG->getRegister(0, MVT::i32);
1018
1019     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1020     if (RHSC < 0) {
1021       AddSub = ARM_AM::sub;
1022       RHSC = -RHSC;
1023     }
1024     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1025                                     MVT::i32);
1026     return true;
1027   }
1028
1029   Base = N.getOperand(0);
1030   Offset = N.getOperand(1);
1031   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1032                                   MVT::i32);
1033   return true;
1034 }
1035
1036 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1037                                             SDValue &Offset, SDValue &Opc) {
1038   unsigned Opcode = Op->getOpcode();
1039   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1040     ? cast<LoadSDNode>(Op)->getAddressingMode()
1041     : cast<StoreSDNode>(Op)->getAddressingMode();
1042   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1043     ? ARM_AM::add : ARM_AM::sub;
1044   int Val;
1045   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1046     Offset = CurDAG->getRegister(0, MVT::i32);
1047     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1048                                     MVT::i32);
1049     return true;
1050   }
1051
1052   Offset = N;
1053   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1054                                   MVT::i32);
1055   return true;
1056 }
1057
1058 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1059                                       SDValue &Base, SDValue &Offset) {
1060   if (!CurDAG->isBaseWithConstantOffset(N)) {
1061     Base = N;
1062     if (N.getOpcode() == ISD::FrameIndex) {
1063       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1064       Base = CurDAG->getTargetFrameIndex(
1065           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1066     } else if (N.getOpcode() == ARMISD::Wrapper &&
1067                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1068                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1069                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1070       Base = N.getOperand(0);
1071     }
1072     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1073                                        SDLoc(N), MVT::i32);
1074     return true;
1075   }
1076
1077   // If the RHS is +/- imm8, fold into addr mode.
1078   int RHSC;
1079   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1080                               -256 + 1, 256, RHSC)) {
1081     Base = N.getOperand(0);
1082     if (Base.getOpcode() == ISD::FrameIndex) {
1083       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1084       Base = CurDAG->getTargetFrameIndex(
1085           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1086     }
1087
1088     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1089     if (RHSC < 0) {
1090       AddSub = ARM_AM::sub;
1091       RHSC = -RHSC;
1092     }
1093     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1094                                        SDLoc(N), MVT::i32);
1095     return true;
1096   }
1097
1098   Base = N;
1099   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1100                                      SDLoc(N), MVT::i32);
1101   return true;
1102 }
1103
1104 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1105                                       SDValue &Align) {
1106   Addr = N;
1107
1108   unsigned Alignment = 0;
1109
1110   MemSDNode *MemN = cast<MemSDNode>(Parent);
1111
1112   if (isa<LSBaseSDNode>(MemN) ||
1113       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1114         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1115        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1116     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1117     // The maximum alignment is equal to the memory size being referenced.
1118     unsigned MMOAlign = MemN->getAlignment();
1119     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1120     if (MMOAlign >= MemSize && MemSize > 1)
1121       Alignment = MemSize;
1122   } else {
1123     // All other uses of addrmode6 are for intrinsics.  For now just record
1124     // the raw alignment value; it will be refined later based on the legal
1125     // alignment operands for the intrinsic.
1126     Alignment = MemN->getAlignment();
1127   }
1128
1129   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1130   return true;
1131 }
1132
1133 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1134                                             SDValue &Offset) {
1135   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1136   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1137   if (AM != ISD::POST_INC)
1138     return false;
1139   Offset = N;
1140   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1141     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1142       Offset = CurDAG->getRegister(0, MVT::i32);
1143   }
1144   return true;
1145 }
1146
1147 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1148                                        SDValue &Offset, SDValue &Label) {
1149   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1150     Offset = N.getOperand(0);
1151     SDValue N1 = N.getOperand(1);
1152     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1153                                       SDLoc(N), MVT::i32);
1154     return true;
1155   }
1156
1157   return false;
1158 }
1159
1160
1161 //===----------------------------------------------------------------------===//
1162 //                         Thumb Addressing Modes
1163 //===----------------------------------------------------------------------===//
1164
1165 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1166                                             SDValue &Base, SDValue &Offset){
1167   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1168     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1169     if (!NC || !NC->isNullValue())
1170       return false;
1171
1172     Base = Offset = N;
1173     return true;
1174   }
1175
1176   Base = N.getOperand(0);
1177   Offset = N.getOperand(1);
1178   return true;
1179 }
1180
1181 bool
1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1183                                           SDValue &Base, SDValue &OffImm) {
1184   if (!CurDAG->isBaseWithConstantOffset(N)) {
1185     if (N.getOpcode() == ISD::ADD) {
1186       return false; // We want to select register offset instead
1187     } else if (N.getOpcode() == ARMISD::Wrapper &&
1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1189         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1190         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1191         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1192       Base = N.getOperand(0);
1193     } else {
1194       Base = N;
1195     }
1196
1197     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1198     return true;
1199   }
1200
1201   // If the RHS is + imm5 * scale, fold into addr mode.
1202   int RHSC;
1203   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1204     Base = N.getOperand(0);
1205     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1206     return true;
1207   }
1208
1209   // Offset is too large, so use register offset instead.
1210   return false;
1211 }
1212
1213 bool
1214 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1215                                            SDValue &OffImm) {
1216   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1217 }
1218
1219 bool
1220 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1221                                            SDValue &OffImm) {
1222   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1223 }
1224
1225 bool
1226 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1227                                            SDValue &OffImm) {
1228   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1229 }
1230
1231 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1232                                             SDValue &Base, SDValue &OffImm) {
1233   if (N.getOpcode() == ISD::FrameIndex) {
1234     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1235     // Only multiples of 4 are allowed for the offset, so the frame object
1236     // alignment must be at least 4.
1237     MachineFrameInfo &MFI = MF->getFrameInfo();
1238     if (MFI.getObjectAlignment(FI) < 4)
1239       MFI.setObjectAlignment(FI, 4);
1240     Base = CurDAG->getTargetFrameIndex(
1241         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1242     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1243     return true;
1244   }
1245
1246   if (!CurDAG->isBaseWithConstantOffset(N))
1247     return false;
1248
1249   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1250   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1251       (LHSR && LHSR->getReg() == ARM::SP)) {
1252     // If the RHS is + imm8 * scale, fold into addr mode.
1253     int RHSC;
1254     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1255       Base = N.getOperand(0);
1256       if (Base.getOpcode() == ISD::FrameIndex) {
1257         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1258         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1259         // indexed by the LHS must be 4-byte aligned.
1260         MachineFrameInfo &MFI = MF->getFrameInfo();
1261         if (MFI.getObjectAlignment(FI) < 4)
1262           MFI.setObjectAlignment(FI, 4);
1263         Base = CurDAG->getTargetFrameIndex(
1264             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1265       }
1266       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1267       return true;
1268     }
1269   }
1270
1271   return false;
1272 }
1273
1274
1275 //===----------------------------------------------------------------------===//
1276 //                        Thumb 2 Addressing Modes
1277 //===----------------------------------------------------------------------===//
1278
1279
1280 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1281                                             SDValue &Base, SDValue &OffImm) {
1282   // Match simple R + imm12 operands.
1283
1284   // Base only.
1285   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1286       !CurDAG->isBaseWithConstantOffset(N)) {
1287     if (N.getOpcode() == ISD::FrameIndex) {
1288       // Match frame index.
1289       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1290       Base = CurDAG->getTargetFrameIndex(
1291           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1292       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1293       return true;
1294     }
1295
1296     if (N.getOpcode() == ARMISD::Wrapper &&
1297         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1298         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1299         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1300       Base = N.getOperand(0);
1301       if (Base.getOpcode() == ISD::TargetConstantPool)
1302         return false;  // We want to select t2LDRpci instead.
1303     } else
1304       Base = N;
1305     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1306     return true;
1307   }
1308
1309   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1310     if (SelectT2AddrModeImm8(N, Base, OffImm))
1311       // Let t2LDRi8 handle (R - imm8).
1312       return false;
1313
1314     int RHSC = (int)RHS->getZExtValue();
1315     if (N.getOpcode() == ISD::SUB)
1316       RHSC = -RHSC;
1317
1318     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1319       Base   = N.getOperand(0);
1320       if (Base.getOpcode() == ISD::FrameIndex) {
1321         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1322         Base = CurDAG->getTargetFrameIndex(
1323             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1324       }
1325       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1326       return true;
1327     }
1328   }
1329
1330   // Base only.
1331   Base = N;
1332   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1333   return true;
1334 }
1335
1336 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1337                                            SDValue &Base, SDValue &OffImm) {
1338   // Match simple R - imm8 operands.
1339   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1340       !CurDAG->isBaseWithConstantOffset(N))
1341     return false;
1342
1343   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1344     int RHSC = (int)RHS->getSExtValue();
1345     if (N.getOpcode() == ISD::SUB)
1346       RHSC = -RHSC;
1347
1348     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1349       Base = N.getOperand(0);
1350       if (Base.getOpcode() == ISD::FrameIndex) {
1351         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1352         Base = CurDAG->getTargetFrameIndex(
1353             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1354       }
1355       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1356       return true;
1357     }
1358   }
1359
1360   return false;
1361 }
1362
1363 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1364                                                  SDValue &OffImm){
1365   unsigned Opcode = Op->getOpcode();
1366   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1367     ? cast<LoadSDNode>(Op)->getAddressingMode()
1368     : cast<StoreSDNode>(Op)->getAddressingMode();
1369   int RHSC;
1370   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1371     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1372       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1373       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1374     return true;
1375   }
1376
1377   return false;
1378 }
1379
1380 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1381                                             SDValue &Base,
1382                                             SDValue &OffReg, SDValue &ShImm) {
1383   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1384   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1385     return false;
1386
1387   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1388   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1389     int RHSC = (int)RHS->getZExtValue();
1390     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1391       return false;
1392     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1393       return false;
1394   }
1395
1396   // Look for (R + R) or (R + (R << [1,2,3])).
1397   unsigned ShAmt = 0;
1398   Base   = N.getOperand(0);
1399   OffReg = N.getOperand(1);
1400
1401   // Swap if it is ((R << c) + R).
1402   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1403   if (ShOpcVal != ARM_AM::lsl) {
1404     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1405     if (ShOpcVal == ARM_AM::lsl)
1406       std::swap(Base, OffReg);
1407   }
1408
1409   if (ShOpcVal == ARM_AM::lsl) {
1410     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1411     // it.
1412     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1413       ShAmt = Sh->getZExtValue();
1414       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1415         OffReg = OffReg.getOperand(0);
1416       else {
1417         ShAmt = 0;
1418       }
1419     }
1420   }
1421
1422   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1423   // and use it in a shifted operand do so.
1424   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1425     unsigned PowerOfTwo = 0;
1426     SDValue NewMulConst;
1427     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1428       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1429       ShAmt = PowerOfTwo;
1430     }
1431   }
1432
1433   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1434
1435   return true;
1436 }
1437
1438 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1439                                                 SDValue &OffImm) {
1440   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1441   // instructions.
1442   Base = N;
1443   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1444
1445   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1446     return true;
1447
1448   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1449   if (!RHS)
1450     return true;
1451
1452   uint32_t RHSC = (int)RHS->getZExtValue();
1453   if (RHSC > 1020 || RHSC % 4 != 0)
1454     return true;
1455
1456   Base = N.getOperand(0);
1457   if (Base.getOpcode() == ISD::FrameIndex) {
1458     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1459     Base = CurDAG->getTargetFrameIndex(
1460         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1461   }
1462
1463   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1464   return true;
1465 }
1466
1467 //===--------------------------------------------------------------------===//
1468
1469 /// getAL - Returns a ARMCC::AL immediate node.
1470 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1471   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1472 }
1473
1474 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1475   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1476   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1477   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1478 }
1479
1480 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1481   LoadSDNode *LD = cast<LoadSDNode>(N);
1482   ISD::MemIndexedMode AM = LD->getAddressingMode();
1483   if (AM == ISD::UNINDEXED)
1484     return false;
1485
1486   EVT LoadedVT = LD->getMemoryVT();
1487   SDValue Offset, AMOpc;
1488   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1489   unsigned Opcode = 0;
1490   bool Match = false;
1491   if (LoadedVT == MVT::i32 && isPre &&
1492       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1493     Opcode = ARM::LDR_PRE_IMM;
1494     Match = true;
1495   } else if (LoadedVT == MVT::i32 && !isPre &&
1496       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1497     Opcode = ARM::LDR_POST_IMM;
1498     Match = true;
1499   } else if (LoadedVT == MVT::i32 &&
1500       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1501     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1502     Match = true;
1503
1504   } else if (LoadedVT == MVT::i16 &&
1505              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1506     Match = true;
1507     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1508       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1509       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1510   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1511     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1512       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1513         Match = true;
1514         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1515       }
1516     } else {
1517       if (isPre &&
1518           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1519         Match = true;
1520         Opcode = ARM::LDRB_PRE_IMM;
1521       } else if (!isPre &&
1522                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1523         Match = true;
1524         Opcode = ARM::LDRB_POST_IMM;
1525       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1526         Match = true;
1527         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1528       }
1529     }
1530   }
1531
1532   if (Match) {
1533     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1534       SDValue Chain = LD->getChain();
1535       SDValue Base = LD->getBasePtr();
1536       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1537                        CurDAG->getRegister(0, MVT::i32), Chain };
1538       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1539                                            MVT::Other, Ops);
1540       transferMemOperands(N, New);
1541       ReplaceNode(N, New);
1542       return true;
1543     } else {
1544       SDValue Chain = LD->getChain();
1545       SDValue Base = LD->getBasePtr();
1546       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1547                        CurDAG->getRegister(0, MVT::i32), Chain };
1548       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1549                                            MVT::Other, Ops);
1550       transferMemOperands(N, New);
1551       ReplaceNode(N, New);
1552       return true;
1553     }
1554   }
1555
1556   return false;
1557 }
1558
1559 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1560   LoadSDNode *LD = cast<LoadSDNode>(N);
1561   EVT LoadedVT = LD->getMemoryVT();
1562   ISD::MemIndexedMode AM = LD->getAddressingMode();
1563   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1564       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1565     return false;
1566
1567   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1568   if (!COffs || COffs->getZExtValue() != 4)
1569     return false;
1570
1571   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1572   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1573   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1574   // ISel.
1575   SDValue Chain = LD->getChain();
1576   SDValue Base = LD->getBasePtr();
1577   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1578                    CurDAG->getRegister(0, MVT::i32), Chain };
1579   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1580                                        MVT::i32, MVT::Other, Ops);
1581   transferMemOperands(N, New);
1582   ReplaceNode(N, New);
1583   return true;
1584 }
1585
1586 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1587   LoadSDNode *LD = cast<LoadSDNode>(N);
1588   ISD::MemIndexedMode AM = LD->getAddressingMode();
1589   if (AM == ISD::UNINDEXED)
1590     return false;
1591
1592   EVT LoadedVT = LD->getMemoryVT();
1593   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1594   SDValue Offset;
1595   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1596   unsigned Opcode = 0;
1597   bool Match = false;
1598   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1599     switch (LoadedVT.getSimpleVT().SimpleTy) {
1600     case MVT::i32:
1601       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1602       break;
1603     case MVT::i16:
1604       if (isSExtLd)
1605         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1606       else
1607         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1608       break;
1609     case MVT::i8:
1610     case MVT::i1:
1611       if (isSExtLd)
1612         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1613       else
1614         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1615       break;
1616     default:
1617       return false;
1618     }
1619     Match = true;
1620   }
1621
1622   if (Match) {
1623     SDValue Chain = LD->getChain();
1624     SDValue Base = LD->getBasePtr();
1625     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1626                      CurDAG->getRegister(0, MVT::i32), Chain };
1627     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1628                                          MVT::Other, Ops);
1629     transferMemOperands(N, New);
1630     ReplaceNode(N, New);
1631     return true;
1632   }
1633
1634   return false;
1635 }
1636
1637 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1638 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1639   SDLoc dl(V0.getNode());
1640   SDValue RegClass =
1641     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1642   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1643   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1644   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1645   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1646 }
1647
1648 /// \brief Form a D register from a pair of S registers.
1649 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1650   SDLoc dl(V0.getNode());
1651   SDValue RegClass =
1652     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1653   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1654   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1655   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1656   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1657 }
1658
1659 /// \brief Form a quad register from a pair of D registers.
1660 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1661   SDLoc dl(V0.getNode());
1662   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1663                                                MVT::i32);
1664   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1665   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1666   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1667   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1668 }
1669
1670 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1671 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1672   SDLoc dl(V0.getNode());
1673   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1674                                                MVT::i32);
1675   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1676   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1677   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1678   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1679 }
1680
1681 /// \brief Form 4 consecutive S registers.
1682 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1683                                    SDValue V2, SDValue V3) {
1684   SDLoc dl(V0.getNode());
1685   SDValue RegClass =
1686     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1687   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1688   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1689   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1690   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1691   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1692                                     V2, SubReg2, V3, SubReg3 };
1693   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1694 }
1695
1696 /// \brief Form 4 consecutive D registers.
1697 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1698                                    SDValue V2, SDValue V3) {
1699   SDLoc dl(V0.getNode());
1700   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1701                                                MVT::i32);
1702   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1703   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1704   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1705   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1706   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1707                                     V2, SubReg2, V3, SubReg3 };
1708   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1709 }
1710
1711 /// \brief Form 4 consecutive Q registers.
1712 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1713                                    SDValue V2, SDValue V3) {
1714   SDLoc dl(V0.getNode());
1715   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1716                                                MVT::i32);
1717   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1718   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1719   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1720   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1721   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1722                                     V2, SubReg2, V3, SubReg3 };
1723   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1724 }
1725
1726 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1727 /// of a NEON VLD or VST instruction.  The supported values depend on the
1728 /// number of registers being loaded.
1729 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1730                                        unsigned NumVecs, bool is64BitVector) {
1731   unsigned NumRegs = NumVecs;
1732   if (!is64BitVector && NumVecs < 3)
1733     NumRegs *= 2;
1734
1735   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1736   if (Alignment >= 32 && NumRegs == 4)
1737     Alignment = 32;
1738   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1739     Alignment = 16;
1740   else if (Alignment >= 8)
1741     Alignment = 8;
1742   else
1743     Alignment = 0;
1744
1745   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1746 }
1747
1748 static bool isVLDfixed(unsigned Opc)
1749 {
1750   switch (Opc) {
1751   default: return false;
1752   case ARM::VLD1d8wb_fixed : return true;
1753   case ARM::VLD1d16wb_fixed : return true;
1754   case ARM::VLD1d64Qwb_fixed : return true;
1755   case ARM::VLD1d32wb_fixed : return true;
1756   case ARM::VLD1d64wb_fixed : return true;
1757   case ARM::VLD1d64TPseudoWB_fixed : return true;
1758   case ARM::VLD1d64QPseudoWB_fixed : return true;
1759   case ARM::VLD1q8wb_fixed : return true;
1760   case ARM::VLD1q16wb_fixed : return true;
1761   case ARM::VLD1q32wb_fixed : return true;
1762   case ARM::VLD1q64wb_fixed : return true;
1763   case ARM::VLD1DUPd8wb_fixed : return true;
1764   case ARM::VLD1DUPd16wb_fixed : return true;
1765   case ARM::VLD1DUPd32wb_fixed : return true;
1766   case ARM::VLD1DUPq8wb_fixed : return true;
1767   case ARM::VLD1DUPq16wb_fixed : return true;
1768   case ARM::VLD1DUPq32wb_fixed : return true;
1769   case ARM::VLD2d8wb_fixed : return true;
1770   case ARM::VLD2d16wb_fixed : return true;
1771   case ARM::VLD2d32wb_fixed : return true;
1772   case ARM::VLD2q8PseudoWB_fixed : return true;
1773   case ARM::VLD2q16PseudoWB_fixed : return true;
1774   case ARM::VLD2q32PseudoWB_fixed : return true;
1775   case ARM::VLD2DUPd8wb_fixed : return true;
1776   case ARM::VLD2DUPd16wb_fixed : return true;
1777   case ARM::VLD2DUPd32wb_fixed : return true;
1778   }
1779 }
1780
1781 static bool isVSTfixed(unsigned Opc)
1782 {
1783   switch (Opc) {
1784   default: return false;
1785   case ARM::VST1d8wb_fixed : return true;
1786   case ARM::VST1d16wb_fixed : return true;
1787   case ARM::VST1d32wb_fixed : return true;
1788   case ARM::VST1d64wb_fixed : return true;
1789   case ARM::VST1q8wb_fixed : return true;
1790   case ARM::VST1q16wb_fixed : return true;
1791   case ARM::VST1q32wb_fixed : return true;
1792   case ARM::VST1q64wb_fixed : return true;
1793   case ARM::VST1d64TPseudoWB_fixed : return true;
1794   case ARM::VST1d64QPseudoWB_fixed : return true;
1795   case ARM::VST2d8wb_fixed : return true;
1796   case ARM::VST2d16wb_fixed : return true;
1797   case ARM::VST2d32wb_fixed : return true;
1798   case ARM::VST2q8PseudoWB_fixed : return true;
1799   case ARM::VST2q16PseudoWB_fixed : return true;
1800   case ARM::VST2q32PseudoWB_fixed : return true;
1801   }
1802 }
1803
1804 // Get the register stride update opcode of a VLD/VST instruction that
1805 // is otherwise equivalent to the given fixed stride updating instruction.
1806 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1807   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1808     && "Incorrect fixed stride updating instruction.");
1809   switch (Opc) {
1810   default: break;
1811   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1812   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1813   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1814   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1815   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1816   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1817   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1818   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1819   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1820   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1821   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1822   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1823   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1824   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1825   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1826   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1827   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1828   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1829
1830   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1831   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1832   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1833   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1834   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1835   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1836   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1837   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1838   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1839   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1840
1841   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1842   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1843   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1844   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1845   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1846   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1847
1848   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1849   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1850   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1851   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1852   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1853   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1854
1855   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1856   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1857   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1858   }
1859   return Opc; // If not one we handle, return it unchanged.
1860 }
1861
1862 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1863                                 const uint16_t *DOpcodes,
1864                                 const uint16_t *QOpcodes0,
1865                                 const uint16_t *QOpcodes1) {
1866   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1867   SDLoc dl(N);
1868
1869   SDValue MemAddr, Align;
1870   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1871   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1872     return;
1873
1874   SDValue Chain = N->getOperand(0);
1875   EVT VT = N->getValueType(0);
1876   bool is64BitVector = VT.is64BitVector();
1877   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1878
1879   unsigned OpcodeIndex;
1880   switch (VT.getSimpleVT().SimpleTy) {
1881   default: llvm_unreachable("unhandled vld type");
1882     // Double-register operations:
1883   case MVT::v8i8:  OpcodeIndex = 0; break;
1884   case MVT::v4i16: OpcodeIndex = 1; break;
1885   case MVT::v2f32:
1886   case MVT::v2i32: OpcodeIndex = 2; break;
1887   case MVT::v1i64: OpcodeIndex = 3; break;
1888     // Quad-register operations:
1889   case MVT::v16i8: OpcodeIndex = 0; break;
1890   case MVT::v8i16: OpcodeIndex = 1; break;
1891   case MVT::v4f32:
1892   case MVT::v4i32: OpcodeIndex = 2; break;
1893   case MVT::v2f64:
1894   case MVT::v2i64: OpcodeIndex = 3;
1895     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1896     break;
1897   }
1898
1899   EVT ResTy;
1900   if (NumVecs == 1)
1901     ResTy = VT;
1902   else {
1903     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1904     if (!is64BitVector)
1905       ResTyElts *= 2;
1906     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1907   }
1908   std::vector<EVT> ResTys;
1909   ResTys.push_back(ResTy);
1910   if (isUpdating)
1911     ResTys.push_back(MVT::i32);
1912   ResTys.push_back(MVT::Other);
1913
1914   SDValue Pred = getAL(CurDAG, dl);
1915   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1916   SDNode *VLd;
1917   SmallVector<SDValue, 7> Ops;
1918
1919   // Double registers and VLD1/VLD2 quad registers are directly supported.
1920   if (is64BitVector || NumVecs <= 2) {
1921     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1922                     QOpcodes0[OpcodeIndex]);
1923     Ops.push_back(MemAddr);
1924     Ops.push_back(Align);
1925     if (isUpdating) {
1926       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1927       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1928       // case entirely when the rest are updated to that form, too.
1929       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1930         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1931       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1932       // check for that explicitly too. Horribly hacky, but temporary.
1933       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1934           !isa<ConstantSDNode>(Inc.getNode()))
1935         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1936     }
1937     Ops.push_back(Pred);
1938     Ops.push_back(Reg0);
1939     Ops.push_back(Chain);
1940     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1941
1942   } else {
1943     // Otherwise, quad registers are loaded with two separate instructions,
1944     // where one loads the even registers and the other loads the odd registers.
1945     EVT AddrTy = MemAddr.getValueType();
1946
1947     // Load the even subregs.  This is always an updating load, so that it
1948     // provides the address to the second load for the odd subregs.
1949     SDValue ImplDef =
1950       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1951     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1952     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1953                                           ResTy, AddrTy, MVT::Other, OpsA);
1954     Chain = SDValue(VLdA, 2);
1955
1956     // Load the odd subregs.
1957     Ops.push_back(SDValue(VLdA, 1));
1958     Ops.push_back(Align);
1959     if (isUpdating) {
1960       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1961       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1962              "only constant post-increment update allowed for VLD3/4");
1963       (void)Inc;
1964       Ops.push_back(Reg0);
1965     }
1966     Ops.push_back(SDValue(VLdA, 0));
1967     Ops.push_back(Pred);
1968     Ops.push_back(Reg0);
1969     Ops.push_back(Chain);
1970     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1971   }
1972
1973   // Transfer memoperands.
1974   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1975   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1976   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1977
1978   if (NumVecs == 1) {
1979     ReplaceNode(N, VLd);
1980     return;
1981   }
1982
1983   // Extract out the subregisters.
1984   SDValue SuperReg = SDValue(VLd, 0);
1985   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1986                     ARM::qsub_3 == ARM::qsub_0 + 3,
1987                 "Unexpected subreg numbering");
1988   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1989   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1990     ReplaceUses(SDValue(N, Vec),
1991                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1992   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1993   if (isUpdating)
1994     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1995   CurDAG->RemoveDeadNode(N);
1996 }
1997
1998 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1999                                 const uint16_t *DOpcodes,
2000                                 const uint16_t *QOpcodes0,
2001                                 const uint16_t *QOpcodes1) {
2002   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2003   SDLoc dl(N);
2004
2005   SDValue MemAddr, Align;
2006   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2007   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2008   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2009     return;
2010
2011   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2012   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2013
2014   SDValue Chain = N->getOperand(0);
2015   EVT VT = N->getOperand(Vec0Idx).getValueType();
2016   bool is64BitVector = VT.is64BitVector();
2017   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2018
2019   unsigned OpcodeIndex;
2020   switch (VT.getSimpleVT().SimpleTy) {
2021   default: llvm_unreachable("unhandled vst type");
2022     // Double-register operations:
2023   case MVT::v8i8:  OpcodeIndex = 0; break;
2024   case MVT::v4i16: OpcodeIndex = 1; break;
2025   case MVT::v2f32:
2026   case MVT::v2i32: OpcodeIndex = 2; break;
2027   case MVT::v1i64: OpcodeIndex = 3; break;
2028     // Quad-register operations:
2029   case MVT::v16i8: OpcodeIndex = 0; break;
2030   case MVT::v8i16: OpcodeIndex = 1; break;
2031   case MVT::v4f32:
2032   case MVT::v4i32: OpcodeIndex = 2; break;
2033   case MVT::v2f64:
2034   case MVT::v2i64: OpcodeIndex = 3;
2035     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2036     break;
2037   }
2038
2039   std::vector<EVT> ResTys;
2040   if (isUpdating)
2041     ResTys.push_back(MVT::i32);
2042   ResTys.push_back(MVT::Other);
2043
2044   SDValue Pred = getAL(CurDAG, dl);
2045   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2046   SmallVector<SDValue, 7> Ops;
2047
2048   // Double registers and VST1/VST2 quad registers are directly supported.
2049   if (is64BitVector || NumVecs <= 2) {
2050     SDValue SrcReg;
2051     if (NumVecs == 1) {
2052       SrcReg = N->getOperand(Vec0Idx);
2053     } else if (is64BitVector) {
2054       // Form a REG_SEQUENCE to force register allocation.
2055       SDValue V0 = N->getOperand(Vec0Idx + 0);
2056       SDValue V1 = N->getOperand(Vec0Idx + 1);
2057       if (NumVecs == 2)
2058         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2059       else {
2060         SDValue V2 = N->getOperand(Vec0Idx + 2);
2061         // If it's a vst3, form a quad D-register and leave the last part as
2062         // an undef.
2063         SDValue V3 = (NumVecs == 3)
2064           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2065           : N->getOperand(Vec0Idx + 3);
2066         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2067       }
2068     } else {
2069       // Form a QQ register.
2070       SDValue Q0 = N->getOperand(Vec0Idx);
2071       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2072       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2073     }
2074
2075     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2076                     QOpcodes0[OpcodeIndex]);
2077     Ops.push_back(MemAddr);
2078     Ops.push_back(Align);
2079     if (isUpdating) {
2080       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2081       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2082       // case entirely when the rest are updated to that form, too.
2083       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2084         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2085       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2086       // check for that explicitly too. Horribly hacky, but temporary.
2087       if  (!isa<ConstantSDNode>(Inc.getNode()))
2088         Ops.push_back(Inc);
2089       else if (NumVecs > 2 && !isVSTfixed(Opc))
2090         Ops.push_back(Reg0);
2091     }
2092     Ops.push_back(SrcReg);
2093     Ops.push_back(Pred);
2094     Ops.push_back(Reg0);
2095     Ops.push_back(Chain);
2096     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2097
2098     // Transfer memoperands.
2099     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2100
2101     ReplaceNode(N, VSt);
2102     return;
2103   }
2104
2105   // Otherwise, quad registers are stored with two separate instructions,
2106   // where one stores the even registers and the other stores the odd registers.
2107
2108   // Form the QQQQ REG_SEQUENCE.
2109   SDValue V0 = N->getOperand(Vec0Idx + 0);
2110   SDValue V1 = N->getOperand(Vec0Idx + 1);
2111   SDValue V2 = N->getOperand(Vec0Idx + 2);
2112   SDValue V3 = (NumVecs == 3)
2113     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2114     : N->getOperand(Vec0Idx + 3);
2115   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2116
2117   // Store the even D registers.  This is always an updating store, so that it
2118   // provides the address to the second store for the odd subregs.
2119   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2120   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2121                                         MemAddr.getValueType(),
2122                                         MVT::Other, OpsA);
2123   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2124   Chain = SDValue(VStA, 1);
2125
2126   // Store the odd D registers.
2127   Ops.push_back(SDValue(VStA, 0));
2128   Ops.push_back(Align);
2129   if (isUpdating) {
2130     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2131     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2132            "only constant post-increment update allowed for VST3/4");
2133     (void)Inc;
2134     Ops.push_back(Reg0);
2135   }
2136   Ops.push_back(RegSeq);
2137   Ops.push_back(Pred);
2138   Ops.push_back(Reg0);
2139   Ops.push_back(Chain);
2140   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2141                                         Ops);
2142   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2143   ReplaceNode(N, VStB);
2144 }
2145
2146 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2147                                       unsigned NumVecs,
2148                                       const uint16_t *DOpcodes,
2149                                       const uint16_t *QOpcodes) {
2150   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2151   SDLoc dl(N);
2152
2153   SDValue MemAddr, Align;
2154   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2155   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2156   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2157     return;
2158
2159   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2160   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2161
2162   SDValue Chain = N->getOperand(0);
2163   unsigned Lane =
2164     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2165   EVT VT = N->getOperand(Vec0Idx).getValueType();
2166   bool is64BitVector = VT.is64BitVector();
2167
2168   unsigned Alignment = 0;
2169   if (NumVecs != 3) {
2170     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2171     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2172     if (Alignment > NumBytes)
2173       Alignment = NumBytes;
2174     if (Alignment < 8 && Alignment < NumBytes)
2175       Alignment = 0;
2176     // Alignment must be a power of two; make sure of that.
2177     Alignment = (Alignment & -Alignment);
2178     if (Alignment == 1)
2179       Alignment = 0;
2180   }
2181   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2182
2183   unsigned OpcodeIndex;
2184   switch (VT.getSimpleVT().SimpleTy) {
2185   default: llvm_unreachable("unhandled vld/vst lane type");
2186     // Double-register operations:
2187   case MVT::v8i8:  OpcodeIndex = 0; break;
2188   case MVT::v4i16: OpcodeIndex = 1; break;
2189   case MVT::v2f32:
2190   case MVT::v2i32: OpcodeIndex = 2; break;
2191     // Quad-register operations:
2192   case MVT::v8i16: OpcodeIndex = 0; break;
2193   case MVT::v4f32:
2194   case MVT::v4i32: OpcodeIndex = 1; break;
2195   }
2196
2197   std::vector<EVT> ResTys;
2198   if (IsLoad) {
2199     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2200     if (!is64BitVector)
2201       ResTyElts *= 2;
2202     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2203                                       MVT::i64, ResTyElts));
2204   }
2205   if (isUpdating)
2206     ResTys.push_back(MVT::i32);
2207   ResTys.push_back(MVT::Other);
2208
2209   SDValue Pred = getAL(CurDAG, dl);
2210   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2211
2212   SmallVector<SDValue, 8> Ops;
2213   Ops.push_back(MemAddr);
2214   Ops.push_back(Align);
2215   if (isUpdating) {
2216     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2217     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2218   }
2219
2220   SDValue SuperReg;
2221   SDValue V0 = N->getOperand(Vec0Idx + 0);
2222   SDValue V1 = N->getOperand(Vec0Idx + 1);
2223   if (NumVecs == 2) {
2224     if (is64BitVector)
2225       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2226     else
2227       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2228   } else {
2229     SDValue V2 = N->getOperand(Vec0Idx + 2);
2230     SDValue V3 = (NumVecs == 3)
2231       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2232       : N->getOperand(Vec0Idx + 3);
2233     if (is64BitVector)
2234       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2235     else
2236       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2237   }
2238   Ops.push_back(SuperReg);
2239   Ops.push_back(getI32Imm(Lane, dl));
2240   Ops.push_back(Pred);
2241   Ops.push_back(Reg0);
2242   Ops.push_back(Chain);
2243
2244   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2245                                   QOpcodes[OpcodeIndex]);
2246   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2247   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2248   if (!IsLoad) {
2249     ReplaceNode(N, VLdLn);
2250     return;
2251   }
2252
2253   // Extract the subregisters.
2254   SuperReg = SDValue(VLdLn, 0);
2255   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2256                     ARM::qsub_3 == ARM::qsub_0 + 3,
2257                 "Unexpected subreg numbering");
2258   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2259   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2260     ReplaceUses(SDValue(N, Vec),
2261                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2262   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2263   if (isUpdating)
2264     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2265   CurDAG->RemoveDeadNode(N);
2266 }
2267
2268 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2269                                    const uint16_t *DOpcodes,
2270                                    const uint16_t *QOpcodes) {
2271   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2272   SDLoc dl(N);
2273
2274   SDValue MemAddr, Align;
2275   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2276     return;
2277
2278   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2279   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2280
2281   SDValue Chain = N->getOperand(0);
2282   EVT VT = N->getValueType(0);
2283
2284   unsigned Alignment = 0;
2285   if (NumVecs != 3) {
2286     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2287     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2288     if (Alignment > NumBytes)
2289       Alignment = NumBytes;
2290     if (Alignment < 8 && Alignment < NumBytes)
2291       Alignment = 0;
2292     // Alignment must be a power of two; make sure of that.
2293     Alignment = (Alignment & -Alignment);
2294     if (Alignment == 1)
2295       Alignment = 0;
2296   }
2297   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2298
2299   unsigned Opc;
2300   switch (VT.getSimpleVT().SimpleTy) {
2301   default: llvm_unreachable("unhandled vld-dup type");
2302   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2303   case MVT::v16i8: Opc = QOpcodes[0]; break;
2304   case MVT::v4i16: Opc = DOpcodes[1]; break;
2305   case MVT::v8i16: Opc = QOpcodes[1]; break;
2306   case MVT::v2f32:
2307   case MVT::v2i32: Opc = DOpcodes[2]; break;
2308   case MVT::v4f32:
2309   case MVT::v4i32: Opc = QOpcodes[2]; break;
2310   }
2311
2312   SDValue Pred = getAL(CurDAG, dl);
2313   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2314   SmallVector<SDValue, 6> Ops;
2315   Ops.push_back(MemAddr);
2316   Ops.push_back(Align);
2317   if (isUpdating) {
2318     // fixed-stride update instructions don't have an explicit writeback
2319     // operand. It's implicit in the opcode itself.
2320     SDValue Inc = N->getOperand(2);
2321     if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2322       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2323     if (!isa<ConstantSDNode>(Inc.getNode()))
2324       Ops.push_back(Inc);
2325     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2326     else if (NumVecs > 2)
2327       Ops.push_back(Reg0);
2328   }
2329   Ops.push_back(Pred);
2330   Ops.push_back(Reg0);
2331   Ops.push_back(Chain);
2332
2333   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2334   std::vector<EVT> ResTys;
2335   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2336   if (isUpdating)
2337     ResTys.push_back(MVT::i32);
2338   ResTys.push_back(MVT::Other);
2339   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2340   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2341
2342   // Extract the subregisters.
2343   if (NumVecs == 1) {
2344     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2345   } else {
2346     SDValue SuperReg = SDValue(VLdDup, 0);
2347     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2348     unsigned SubIdx = ARM::dsub_0;
2349     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2350       ReplaceUses(SDValue(N, Vec),
2351                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2352   }
2353   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2354   if (isUpdating)
2355     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2356   CurDAG->RemoveDeadNode(N);
2357 }
2358
2359 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2360                                  unsigned Opc) {
2361   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2362   SDLoc dl(N);
2363   EVT VT = N->getValueType(0);
2364   unsigned FirstTblReg = IsExt ? 2 : 1;
2365
2366   // Form a REG_SEQUENCE to force register allocation.
2367   SDValue RegSeq;
2368   SDValue V0 = N->getOperand(FirstTblReg + 0);
2369   SDValue V1 = N->getOperand(FirstTblReg + 1);
2370   if (NumVecs == 2)
2371     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2372   else {
2373     SDValue V2 = N->getOperand(FirstTblReg + 2);
2374     // If it's a vtbl3, form a quad D-register and leave the last part as
2375     // an undef.
2376     SDValue V3 = (NumVecs == 3)
2377       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2378       : N->getOperand(FirstTblReg + 3);
2379     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2380   }
2381
2382   SmallVector<SDValue, 6> Ops;
2383   if (IsExt)
2384     Ops.push_back(N->getOperand(1));
2385   Ops.push_back(RegSeq);
2386   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2387   Ops.push_back(getAL(CurDAG, dl)); // predicate
2388   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2389   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2390 }
2391
2392 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2393   if (!Subtarget->hasV6T2Ops())
2394     return false;
2395
2396   unsigned Opc = isSigned
2397     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2398     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2399   SDLoc dl(N);
2400
2401   // For unsigned extracts, check for a shift right and mask
2402   unsigned And_imm = 0;
2403   if (N->getOpcode() == ISD::AND) {
2404     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2405
2406       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2407       if (And_imm & (And_imm + 1))
2408         return false;
2409
2410       unsigned Srl_imm = 0;
2411       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2412                                 Srl_imm)) {
2413         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2414
2415         // Note: The width operand is encoded as width-1.
2416         unsigned Width = countTrailingOnes(And_imm) - 1;
2417         unsigned LSB = Srl_imm;
2418
2419         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2420
2421         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2422           // It's cheaper to use a right shift to extract the top bits.
2423           if (Subtarget->isThumb()) {
2424             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2425             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2426                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2427                               getAL(CurDAG, dl), Reg0, Reg0 };
2428             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2429             return true;
2430           }
2431
2432           // ARM models shift instructions as MOVsi with shifter operand.
2433           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2434           SDValue ShOpc =
2435             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2436                                       MVT::i32);
2437           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2438                             getAL(CurDAG, dl), Reg0, Reg0 };
2439           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2440           return true;
2441         }
2442
2443         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2444                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2445                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2446                           getAL(CurDAG, dl), Reg0 };
2447         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2448         return true;
2449       }
2450     }
2451     return false;
2452   }
2453
2454   // Otherwise, we're looking for a shift of a shift
2455   unsigned Shl_imm = 0;
2456   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2457     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2458     unsigned Srl_imm = 0;
2459     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2460       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2461       // Note: The width operand is encoded as width-1.
2462       unsigned Width = 32 - Srl_imm - 1;
2463       int LSB = Srl_imm - Shl_imm;
2464       if (LSB < 0)
2465         return false;
2466       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2467       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2468                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2469                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2470                         getAL(CurDAG, dl), Reg0 };
2471       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2472       return true;
2473     }
2474   }
2475
2476   // Or we are looking for a shift of an and, with a mask operand
2477   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2478       isShiftedMask_32(And_imm)) {
2479     unsigned Srl_imm = 0;
2480     unsigned LSB = countTrailingZeros(And_imm);
2481     // Shift must be the same as the ands lsb
2482     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2483       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2484       unsigned MSB = 31 - countLeadingZeros(And_imm);
2485       // Note: The width operand is encoded as width-1.
2486       unsigned Width = MSB - LSB;
2487       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2488       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2489                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2490                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2491                         getAL(CurDAG, dl), Reg0 };
2492       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2493       return true;
2494     }
2495   }
2496
2497   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2498     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2499     unsigned LSB = 0;
2500     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2501         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2502       return false;
2503
2504     if (LSB + Width > 32)
2505       return false;
2506
2507     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2508     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2509                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2510                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2511                       getAL(CurDAG, dl), Reg0 };
2512     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2513     return true;
2514   }
2515
2516   return false;
2517 }
2518
2519 /// Target-specific DAG combining for ISD::XOR.
2520 /// Target-independent combining lowers SELECT_CC nodes of the form
2521 /// select_cc setg[ge] X,  0,  X, -X
2522 /// select_cc setgt    X, -1,  X, -X
2523 /// select_cc setl[te] X,  0, -X,  X
2524 /// select_cc setlt    X,  1, -X,  X
2525 /// which represent Integer ABS into:
2526 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2527 /// ARM instruction selection detects the latter and matches it to
2528 /// ARM::ABS or ARM::t2ABS machine node.
2529 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2530   SDValue XORSrc0 = N->getOperand(0);
2531   SDValue XORSrc1 = N->getOperand(1);
2532   EVT VT = N->getValueType(0);
2533
2534   if (Subtarget->isThumb1Only())
2535     return false;
2536
2537   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2538     return false;
2539
2540   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2541   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2542   SDValue SRASrc0 = XORSrc1.getOperand(0);
2543   SDValue SRASrc1 = XORSrc1.getOperand(1);
2544   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2545   EVT XType = SRASrc0.getValueType();
2546   unsigned Size = XType.getSizeInBits() - 1;
2547
2548   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2549       XType.isInteger() && SRAConstant != nullptr &&
2550       Size == SRAConstant->getZExtValue()) {
2551     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2552     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2553     return true;
2554   }
2555
2556   return false;
2557 }
2558
2559 /// We've got special pseudo-instructions for these
2560 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2561   unsigned Opcode;
2562   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2563   if (MemTy == MVT::i8)
2564     Opcode = ARM::CMP_SWAP_8;
2565   else if (MemTy == MVT::i16)
2566     Opcode = ARM::CMP_SWAP_16;
2567   else if (MemTy == MVT::i32)
2568     Opcode = ARM::CMP_SWAP_32;
2569   else
2570     llvm_unreachable("Unknown AtomicCmpSwap type");
2571
2572   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2573                    N->getOperand(0)};
2574   SDNode *CmpSwap = CurDAG->getMachineNode(
2575       Opcode, SDLoc(N),
2576       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2577
2578   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2579   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2580   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2581
2582   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2583   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2584   CurDAG->RemoveDeadNode(N);
2585 }
2586
2587 static Optional<std::pair<unsigned, unsigned>>
2588 getContiguousRangeOfSetBits(const APInt &A) {
2589   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2590   unsigned LastOne = A.countTrailingZeros();
2591   if (A.countPopulation() != (FirstOne - LastOne + 1))
2592     return Optional<std::pair<unsigned,unsigned>>();
2593   return std::make_pair(FirstOne, LastOne);
2594 }
2595
2596 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2597   assert(N->getOpcode() == ARMISD::CMPZ);
2598   SwitchEQNEToPLMI = false;
2599
2600   if (!Subtarget->isThumb())
2601     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2602     // LSR don't exist as standalone instructions - they need the barrel shifter.
2603     return;
2604
2605   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2606   SDValue And = N->getOperand(0);
2607   if (!And->hasOneUse())
2608     return;
2609
2610   SDValue Zero = N->getOperand(1);
2611   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2612       And->getOpcode() != ISD::AND)
2613     return;
2614   SDValue X = And.getOperand(0);
2615   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2616
2617   if (!C || !X->hasOneUse())
2618     return;
2619   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2620   if (!Range)
2621     return;
2622
2623   // There are several ways to lower this:
2624   SDNode *NewN;
2625   SDLoc dl(N);
2626
2627   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2628     if (Subtarget->isThumb2()) {
2629       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2630       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2631                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2632                         CurDAG->getRegister(0, MVT::i32) };
2633       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2634     } else {
2635       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2636                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2637                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2638       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2639     }
2640   };
2641
2642   if (Range->second == 0) {
2643     //  1. Mask includes the LSB -> Simply shift the top N bits off
2644     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2645     ReplaceNode(And.getNode(), NewN);
2646   } else if (Range->first == 31) {
2647     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2648     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2649     ReplaceNode(And.getNode(), NewN);
2650   } else if (Range->first == Range->second) {
2651     //  3. Only one bit is set. We can shift this into the sign bit and use a
2652     //     PL/MI comparison.
2653     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2654     ReplaceNode(And.getNode(), NewN);
2655
2656     SwitchEQNEToPLMI = true;
2657   } else if (!Subtarget->hasV6T2Ops()) {
2658     //  4. Do a double shift to clear bottom and top bits, but only in
2659     //     thumb-1 mode as in thumb-2 we can use UBFX.
2660     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2661     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2662                      Range->second + (31 - Range->first));
2663     ReplaceNode(And.getNode(), NewN);
2664   }
2665
2666 }
2667
2668 void ARMDAGToDAGISel::Select(SDNode *N) {
2669   SDLoc dl(N);
2670
2671   if (N->isMachineOpcode()) {
2672     N->setNodeId(-1);
2673     return;   // Already selected.
2674   }
2675
2676   switch (N->getOpcode()) {
2677   default: break;
2678   case ISD::WRITE_REGISTER:
2679     if (tryWriteRegister(N))
2680       return;
2681     break;
2682   case ISD::READ_REGISTER:
2683     if (tryReadRegister(N))
2684       return;
2685     break;
2686   case ISD::INLINEASM:
2687     if (tryInlineAsm(N))
2688       return;
2689     break;
2690   case ISD::XOR:
2691     // Select special operations if XOR node forms integer ABS pattern
2692     if (tryABSOp(N))
2693       return;
2694     // Other cases are autogenerated.
2695     break;
2696   case ISD::Constant: {
2697     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2698     // If we can't materialize the constant we need to use a literal pool
2699     if (ConstantMaterializationCost(Val) > 2) {
2700       SDValue CPIdx = CurDAG->getTargetConstantPool(
2701           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2702           TLI->getPointerTy(CurDAG->getDataLayout()));
2703
2704       SDNode *ResNode;
2705       if (Subtarget->isThumb()) {
2706         SDValue Pred = getAL(CurDAG, dl);
2707         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2708         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2709         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2710                                          Ops);
2711       } else {
2712         SDValue Ops[] = {
2713           CPIdx,
2714           CurDAG->getTargetConstant(0, dl, MVT::i32),
2715           getAL(CurDAG, dl),
2716           CurDAG->getRegister(0, MVT::i32),
2717           CurDAG->getEntryNode()
2718         };
2719         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2720                                          Ops);
2721       }
2722       ReplaceNode(N, ResNode);
2723       return;
2724     }
2725
2726     // Other cases are autogenerated.
2727     break;
2728   }
2729   case ISD::FrameIndex: {
2730     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2731     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2732     SDValue TFI = CurDAG->getTargetFrameIndex(
2733         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2734     if (Subtarget->isThumb1Only()) {
2735       // Set the alignment of the frame object to 4, to avoid having to generate
2736       // more than one ADD
2737       MachineFrameInfo &MFI = MF->getFrameInfo();
2738       if (MFI.getObjectAlignment(FI) < 4)
2739         MFI.setObjectAlignment(FI, 4);
2740       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2741                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2742       return;
2743     } else {
2744       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2745                       ARM::t2ADDri : ARM::ADDri);
2746       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2747                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2748                         CurDAG->getRegister(0, MVT::i32) };
2749       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2750       return;
2751     }
2752   }
2753   case ISD::SRL:
2754     if (tryV6T2BitfieldExtractOp(N, false))
2755       return;
2756     break;
2757   case ISD::SIGN_EXTEND_INREG:
2758   case ISD::SRA:
2759     if (tryV6T2BitfieldExtractOp(N, true))
2760       return;
2761     break;
2762   case ISD::MUL:
2763     if (Subtarget->isThumb1Only())
2764       break;
2765     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2766       unsigned RHSV = C->getZExtValue();
2767       if (!RHSV) break;
2768       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2769         unsigned ShImm = Log2_32(RHSV-1);
2770         if (ShImm >= 32)
2771           break;
2772         SDValue V = N->getOperand(0);
2773         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2774         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2775         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2776         if (Subtarget->isThumb()) {
2777           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2778           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2779           return;
2780         } else {
2781           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2782                             Reg0 };
2783           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2784           return;
2785         }
2786       }
2787       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2788         unsigned ShImm = Log2_32(RHSV+1);
2789         if (ShImm >= 32)
2790           break;
2791         SDValue V = N->getOperand(0);
2792         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2793         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2794         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2795         if (Subtarget->isThumb()) {
2796           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2797           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2798           return;
2799         } else {
2800           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2801                             Reg0 };
2802           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2803           return;
2804         }
2805       }
2806     }
2807     break;
2808   case ISD::AND: {
2809     // Check for unsigned bitfield extract
2810     if (tryV6T2BitfieldExtractOp(N, false))
2811       return;
2812
2813     // If an immediate is used in an AND node, it is possible that the immediate
2814     // can be more optimally materialized when negated. If this is the case we
2815     // can negate the immediate and use a BIC instead.
2816     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2817     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2818       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2819
2820       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2821       // immediate can be negated and fit in the immediate operand of
2822       // a t2BIC, don't do any manual transform here as this can be
2823       // handled by the generic ISel machinery.
2824       bool PreferImmediateEncoding =
2825         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2826       if (!PreferImmediateEncoding &&
2827           ConstantMaterializationCost(Imm) >
2828               ConstantMaterializationCost(~Imm)) {
2829         // The current immediate costs more to materialize than a negated
2830         // immediate, so negate the immediate and use a BIC.
2831         SDValue NewImm =
2832           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2833         // If the new constant didn't exist before, reposition it in the topological
2834         // ordering so it is just before N. Otherwise, don't touch its location.
2835         if (NewImm->getNodeId() == -1)
2836           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2837
2838         if (!Subtarget->hasThumb2()) {
2839           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2840                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2841                            CurDAG->getRegister(0, MVT::i32)};
2842           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2843           return;
2844         } else {
2845           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2846                            CurDAG->getRegister(0, MVT::i32),
2847                            CurDAG->getRegister(0, MVT::i32)};
2848           ReplaceNode(N,
2849                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2850           return;
2851         }
2852       }
2853     }
2854
2855     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2856     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2857     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2858     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2859     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2860     EVT VT = N->getValueType(0);
2861     if (VT != MVT::i32)
2862       break;
2863     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2864       ? ARM::t2MOVTi16
2865       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2866     if (!Opc)
2867       break;
2868     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2869     N1C = dyn_cast<ConstantSDNode>(N1);
2870     if (!N1C)
2871       break;
2872     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2873       SDValue N2 = N0.getOperand(1);
2874       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2875       if (!N2C)
2876         break;
2877       unsigned N1CVal = N1C->getZExtValue();
2878       unsigned N2CVal = N2C->getZExtValue();
2879       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2880           (N1CVal & 0xffffU) == 0xffffU &&
2881           (N2CVal & 0xffffU) == 0x0U) {
2882         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2883                                                   dl, MVT::i32);
2884         SDValue Ops[] = { N0.getOperand(0), Imm16,
2885                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2886         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2887         return;
2888       }
2889     }
2890
2891     break;
2892   }
2893   case ARMISD::UMAAL: {
2894     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2895     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2896                       N->getOperand(2), N->getOperand(3),
2897                       getAL(CurDAG, dl),
2898                       CurDAG->getRegister(0, MVT::i32) };
2899     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2900     return;
2901   }
2902   case ARMISD::UMLAL:{
2903     if (Subtarget->isThumb()) {
2904       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2905                         N->getOperand(3), getAL(CurDAG, dl),
2906                         CurDAG->getRegister(0, MVT::i32)};
2907       ReplaceNode(
2908           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2909       return;
2910     }else{
2911       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2912                         N->getOperand(3), getAL(CurDAG, dl),
2913                         CurDAG->getRegister(0, MVT::i32),
2914                         CurDAG->getRegister(0, MVT::i32) };
2915       ReplaceNode(N, CurDAG->getMachineNode(
2916                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2917                          MVT::i32, MVT::i32, Ops));
2918       return;
2919     }
2920   }
2921   case ARMISD::SMLAL:{
2922     if (Subtarget->isThumb()) {
2923       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2924                         N->getOperand(3), getAL(CurDAG, dl),
2925                         CurDAG->getRegister(0, MVT::i32)};
2926       ReplaceNode(
2927           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2928       return;
2929     }else{
2930       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2931                         N->getOperand(3), getAL(CurDAG, dl),
2932                         CurDAG->getRegister(0, MVT::i32),
2933                         CurDAG->getRegister(0, MVT::i32) };
2934       ReplaceNode(N, CurDAG->getMachineNode(
2935                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2936                          MVT::i32, MVT::i32, Ops));
2937       return;
2938     }
2939   }
2940   case ARMISD::SUBE: {
2941     if (!Subtarget->hasV6Ops())
2942       break;
2943     // Look for a pattern to match SMMLS
2944     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2945     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2946         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2947         !SDValue(N, 1).use_empty())
2948       break;
2949
2950     if (Subtarget->isThumb())
2951       assert(Subtarget->hasThumb2() &&
2952              "This pattern should not be generated for Thumb");
2953
2954     SDValue SmulLoHi = N->getOperand(1);
2955     SDValue Subc = N->getOperand(2);
2956     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2957
2958     if (!Zero || Zero->getZExtValue() != 0 ||
2959         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2960         N->getOperand(1) != SmulLoHi.getValue(1) ||
2961         N->getOperand(2) != Subc.getValue(1))
2962       break;
2963
2964     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2965     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2966                       N->getOperand(0), getAL(CurDAG, dl),
2967                       CurDAG->getRegister(0, MVT::i32) };
2968     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2969     return;
2970   }
2971   case ISD::LOAD: {
2972     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2973       if (tryT2IndexedLoad(N))
2974         return;
2975     } else if (Subtarget->isThumb()) {
2976       if (tryT1IndexedLoad(N))
2977         return;
2978     } else if (tryARMIndexedLoad(N))
2979       return;
2980     // Other cases are autogenerated.
2981     break;
2982   }
2983   case ARMISD::BRCOND: {
2984     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2985     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2986     // Pattern complexity = 6  cost = 1  size = 0
2987
2988     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2989     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2990     // Pattern complexity = 6  cost = 1  size = 0
2991
2992     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2993     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2994     // Pattern complexity = 6  cost = 1  size = 0
2995
2996     unsigned Opc = Subtarget->isThumb() ?
2997       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2998     SDValue Chain = N->getOperand(0);
2999     SDValue N1 = N->getOperand(1);
3000     SDValue N2 = N->getOperand(2);
3001     SDValue N3 = N->getOperand(3);
3002     SDValue InFlag = N->getOperand(4);
3003     assert(N1.getOpcode() == ISD::BasicBlock);
3004     assert(N2.getOpcode() == ISD::Constant);
3005     assert(N3.getOpcode() == ISD::Register);
3006
3007     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3008
3009     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3010       bool SwitchEQNEToPLMI;
3011       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3012       InFlag = N->getOperand(4);
3013
3014       if (SwitchEQNEToPLMI) {
3015         switch ((ARMCC::CondCodes)CC) {
3016         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3017         case ARMCC::NE:
3018           CC = (unsigned)ARMCC::MI;
3019           break;
3020         case ARMCC::EQ:
3021           CC = (unsigned)ARMCC::PL;
3022           break;
3023         }
3024       }
3025     }
3026
3027     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3028     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3029     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3030                                              MVT::Glue, Ops);
3031     Chain = SDValue(ResNode, 0);
3032     if (N->getNumValues() == 2) {
3033       InFlag = SDValue(ResNode, 1);
3034       ReplaceUses(SDValue(N, 1), InFlag);
3035     }
3036     ReplaceUses(SDValue(N, 0),
3037                 SDValue(Chain.getNode(), Chain.getResNo()));
3038     CurDAG->RemoveDeadNode(N);
3039     return;
3040   }
3041
3042   case ARMISD::CMPZ: {
3043     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3044     //   This allows us to avoid materializing the expensive negative constant.
3045     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3046     //   for its glue output.
3047     SDValue X = N->getOperand(0);
3048     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3049     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3050       int64_t Addend = -C->getSExtValue();
3051
3052       SDNode *Add = nullptr;
3053       // ADDS can be better than CMN if the immediate fits in a
3054       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3055       // Outside that range we can just use a CMN which is 32-bit but has a
3056       // 12-bit immediate range.
3057       if (Addend < 1<<8) {
3058         if (Subtarget->isThumb2()) {
3059           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3060                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3061                             CurDAG->getRegister(0, MVT::i32) };
3062           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3063         } else {
3064           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3065           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3066                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3067                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3068           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3069         }
3070       }
3071       if (Add) {
3072         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3073         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3074       }
3075     }
3076     // Other cases are autogenerated.
3077     break;
3078   }
3079
3080   case ARMISD::CMOV: {
3081     SDValue InFlag = N->getOperand(4);
3082
3083     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3084       bool SwitchEQNEToPLMI;
3085       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3086
3087       if (SwitchEQNEToPLMI) {
3088         SDValue ARMcc = N->getOperand(2);
3089         ARMCC::CondCodes CC =
3090           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3091
3092         switch (CC) {
3093         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3094         case ARMCC::NE:
3095           CC = ARMCC::MI;
3096           break;
3097         case ARMCC::EQ:
3098           CC = ARMCC::PL;
3099           break;
3100         }
3101         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3102         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3103                          N->getOperand(3), N->getOperand(4)};
3104         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3105       }
3106
3107     }
3108     // Other cases are autogenerated.
3109     break;
3110   }
3111
3112   case ARMISD::VZIP: {
3113     unsigned Opc = 0;
3114     EVT VT = N->getValueType(0);
3115     switch (VT.getSimpleVT().SimpleTy) {
3116     default: return;
3117     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3118     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3119     case MVT::v2f32:
3120     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3121     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3122     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3123     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3124     case MVT::v4f32:
3125     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3126     }
3127     SDValue Pred = getAL(CurDAG, dl);
3128     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3129     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3130     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3131     return;
3132   }
3133   case ARMISD::VUZP: {
3134     unsigned Opc = 0;
3135     EVT VT = N->getValueType(0);
3136     switch (VT.getSimpleVT().SimpleTy) {
3137     default: return;
3138     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3139     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3140     case MVT::v2f32:
3141     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3142     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3143     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3144     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3145     case MVT::v4f32:
3146     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3147     }
3148     SDValue Pred = getAL(CurDAG, dl);
3149     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3150     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3151     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3152     return;
3153   }
3154   case ARMISD::VTRN: {
3155     unsigned Opc = 0;
3156     EVT VT = N->getValueType(0);
3157     switch (VT.getSimpleVT().SimpleTy) {
3158     default: return;
3159     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3160     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3161     case MVT::v2f32:
3162     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3163     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3164     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3165     case MVT::v4f32:
3166     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3167     }
3168     SDValue Pred = getAL(CurDAG, dl);
3169     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3170     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3171     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3172     return;
3173   }
3174   case ARMISD::BUILD_VECTOR: {
3175     EVT VecVT = N->getValueType(0);
3176     EVT EltVT = VecVT.getVectorElementType();
3177     unsigned NumElts = VecVT.getVectorNumElements();
3178     if (EltVT == MVT::f64) {
3179       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3180       ReplaceNode(
3181           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3182       return;
3183     }
3184     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3185     if (NumElts == 2) {
3186       ReplaceNode(
3187           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3188       return;
3189     }
3190     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3191     ReplaceNode(N,
3192                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3193                                     N->getOperand(2), N->getOperand(3)));
3194     return;
3195   }
3196
3197   case ARMISD::VLD1DUP: {
3198     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3199                                          ARM::VLD1DUPd32 };
3200     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3201                                          ARM::VLD1DUPq32 };
3202     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3203     return;
3204   }
3205
3206   case ARMISD::VLD2DUP: {
3207     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3208                                         ARM::VLD2DUPd32 };
3209     SelectVLDDup(N, false, 2, Opcodes);
3210     return;
3211   }
3212
3213   case ARMISD::VLD3DUP: {
3214     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3215                                         ARM::VLD3DUPd16Pseudo,
3216                                         ARM::VLD3DUPd32Pseudo };
3217     SelectVLDDup(N, false, 3, Opcodes);
3218     return;
3219   }
3220
3221   case ARMISD::VLD4DUP: {
3222     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3223                                         ARM::VLD4DUPd16Pseudo,
3224                                         ARM::VLD4DUPd32Pseudo };
3225     SelectVLDDup(N, false, 4, Opcodes);
3226     return;
3227   }
3228
3229   case ARMISD::VLD1DUP_UPD: {
3230     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3231                                          ARM::VLD1DUPd16wb_fixed,
3232                                          ARM::VLD1DUPd32wb_fixed };
3233     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3234                                          ARM::VLD1DUPq16wb_fixed,
3235                                          ARM::VLD1DUPq32wb_fixed };
3236     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3237     return;
3238   }
3239
3240   case ARMISD::VLD2DUP_UPD: {
3241     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3242                                         ARM::VLD2DUPd16wb_fixed,
3243                                         ARM::VLD2DUPd32wb_fixed };
3244     SelectVLDDup(N, true, 2, Opcodes);
3245     return;
3246   }
3247
3248   case ARMISD::VLD3DUP_UPD: {
3249     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3250                                         ARM::VLD3DUPd16Pseudo_UPD,
3251                                         ARM::VLD3DUPd32Pseudo_UPD };
3252     SelectVLDDup(N, true, 3, Opcodes);
3253     return;
3254   }
3255
3256   case ARMISD::VLD4DUP_UPD: {
3257     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3258                                         ARM::VLD4DUPd16Pseudo_UPD,
3259                                         ARM::VLD4DUPd32Pseudo_UPD };
3260     SelectVLDDup(N, true, 4, Opcodes);
3261     return;
3262   }
3263
3264   case ARMISD::VLD1_UPD: {
3265     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3266                                          ARM::VLD1d16wb_fixed,
3267                                          ARM::VLD1d32wb_fixed,
3268                                          ARM::VLD1d64wb_fixed };
3269     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3270                                          ARM::VLD1q16wb_fixed,
3271                                          ARM::VLD1q32wb_fixed,
3272                                          ARM::VLD1q64wb_fixed };
3273     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3274     return;
3275   }
3276
3277   case ARMISD::VLD2_UPD: {
3278     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3279                                          ARM::VLD2d16wb_fixed,
3280                                          ARM::VLD2d32wb_fixed,
3281                                          ARM::VLD1q64wb_fixed};
3282     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3283                                          ARM::VLD2q16PseudoWB_fixed,
3284                                          ARM::VLD2q32PseudoWB_fixed };
3285     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3286     return;
3287   }
3288
3289   case ARMISD::VLD3_UPD: {
3290     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3291                                          ARM::VLD3d16Pseudo_UPD,
3292                                          ARM::VLD3d32Pseudo_UPD,
3293                                          ARM::VLD1d64TPseudoWB_fixed};
3294     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3295                                           ARM::VLD3q16Pseudo_UPD,
3296                                           ARM::VLD3q32Pseudo_UPD };
3297     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3298                                           ARM::VLD3q16oddPseudo_UPD,
3299                                           ARM::VLD3q32oddPseudo_UPD };
3300     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3301     return;
3302   }
3303
3304   case ARMISD::VLD4_UPD: {
3305     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3306                                          ARM::VLD4d16Pseudo_UPD,
3307                                          ARM::VLD4d32Pseudo_UPD,
3308                                          ARM::VLD1d64QPseudoWB_fixed};
3309     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3310                                           ARM::VLD4q16Pseudo_UPD,
3311                                           ARM::VLD4q32Pseudo_UPD };
3312     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3313                                           ARM::VLD4q16oddPseudo_UPD,
3314                                           ARM::VLD4q32oddPseudo_UPD };
3315     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3316     return;
3317   }
3318
3319   case ARMISD::VLD2LN_UPD: {
3320     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3321                                          ARM::VLD2LNd16Pseudo_UPD,
3322                                          ARM::VLD2LNd32Pseudo_UPD };
3323     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3324                                          ARM::VLD2LNq32Pseudo_UPD };
3325     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3326     return;
3327   }
3328
3329   case ARMISD::VLD3LN_UPD: {
3330     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3331                                          ARM::VLD3LNd16Pseudo_UPD,
3332                                          ARM::VLD3LNd32Pseudo_UPD };
3333     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3334                                          ARM::VLD3LNq32Pseudo_UPD };
3335     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3336     return;
3337   }
3338
3339   case ARMISD::VLD4LN_UPD: {
3340     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3341                                          ARM::VLD4LNd16Pseudo_UPD,
3342                                          ARM::VLD4LNd32Pseudo_UPD };
3343     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3344                                          ARM::VLD4LNq32Pseudo_UPD };
3345     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3346     return;
3347   }
3348
3349   case ARMISD::VST1_UPD: {
3350     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3351                                          ARM::VST1d16wb_fixed,
3352                                          ARM::VST1d32wb_fixed,
3353                                          ARM::VST1d64wb_fixed };
3354     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3355                                          ARM::VST1q16wb_fixed,
3356                                          ARM::VST1q32wb_fixed,
3357                                          ARM::VST1q64wb_fixed };
3358     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3359     return;
3360   }
3361
3362   case ARMISD::VST2_UPD: {
3363     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3364                                          ARM::VST2d16wb_fixed,
3365                                          ARM::VST2d32wb_fixed,
3366                                          ARM::VST1q64wb_fixed};
3367     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3368                                          ARM::VST2q16PseudoWB_fixed,
3369                                          ARM::VST2q32PseudoWB_fixed };
3370     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3371     return;
3372   }
3373
3374   case ARMISD::VST3_UPD: {
3375     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3376                                          ARM::VST3d16Pseudo_UPD,
3377                                          ARM::VST3d32Pseudo_UPD,
3378                                          ARM::VST1d64TPseudoWB_fixed};
3379     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3380                                           ARM::VST3q16Pseudo_UPD,
3381                                           ARM::VST3q32Pseudo_UPD };
3382     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3383                                           ARM::VST3q16oddPseudo_UPD,
3384                                           ARM::VST3q32oddPseudo_UPD };
3385     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3386     return;
3387   }
3388
3389   case ARMISD::VST4_UPD: {
3390     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3391                                          ARM::VST4d16Pseudo_UPD,
3392                                          ARM::VST4d32Pseudo_UPD,
3393                                          ARM::VST1d64QPseudoWB_fixed};
3394     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3395                                           ARM::VST4q16Pseudo_UPD,
3396                                           ARM::VST4q32Pseudo_UPD };
3397     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3398                                           ARM::VST4q16oddPseudo_UPD,
3399                                           ARM::VST4q32oddPseudo_UPD };
3400     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3401     return;
3402   }
3403
3404   case ARMISD::VST2LN_UPD: {
3405     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3406                                          ARM::VST2LNd16Pseudo_UPD,
3407                                          ARM::VST2LNd32Pseudo_UPD };
3408     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3409                                          ARM::VST2LNq32Pseudo_UPD };
3410     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3411     return;
3412   }
3413
3414   case ARMISD::VST3LN_UPD: {
3415     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3416                                          ARM::VST3LNd16Pseudo_UPD,
3417                                          ARM::VST3LNd32Pseudo_UPD };
3418     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3419                                          ARM::VST3LNq32Pseudo_UPD };
3420     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3421     return;
3422   }
3423
3424   case ARMISD::VST4LN_UPD: {
3425     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3426                                          ARM::VST4LNd16Pseudo_UPD,
3427                                          ARM::VST4LNd32Pseudo_UPD };
3428     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3429                                          ARM::VST4LNq32Pseudo_UPD };
3430     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3431     return;
3432   }
3433
3434   case ISD::INTRINSIC_VOID:
3435   case ISD::INTRINSIC_W_CHAIN: {
3436     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3437     switch (IntNo) {
3438     default:
3439       break;
3440
3441     case Intrinsic::arm_mrrc:
3442     case Intrinsic::arm_mrrc2: {
3443       SDLoc dl(N);
3444       SDValue Chain = N->getOperand(0);
3445       unsigned Opc;
3446
3447       if (Subtarget->isThumb())
3448         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3449       else
3450         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3451
3452       SmallVector<SDValue, 5> Ops;
3453       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3454       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3455       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3456
3457       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3458       // instruction will always be '1111' but it is possible in assembly language to specify
3459       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3460       if (Opc != ARM::MRRC2) {
3461         Ops.push_back(getAL(CurDAG, dl));
3462         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3463       }
3464
3465       Ops.push_back(Chain);
3466
3467       // Writes to two registers.
3468       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3469
3470       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3471       return;
3472     }
3473     case Intrinsic::arm_ldaexd:
3474     case Intrinsic::arm_ldrexd: {
3475       SDLoc dl(N);
3476       SDValue Chain = N->getOperand(0);
3477       SDValue MemAddr = N->getOperand(2);
3478       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3479
3480       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3481       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3482                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3483
3484       // arm_ldrexd returns a i64 value in {i32, i32}
3485       std::vector<EVT> ResTys;
3486       if (isThumb) {
3487         ResTys.push_back(MVT::i32);
3488         ResTys.push_back(MVT::i32);
3489       } else
3490         ResTys.push_back(MVT::Untyped);
3491       ResTys.push_back(MVT::Other);
3492
3493       // Place arguments in the right order.
3494       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3495                        CurDAG->getRegister(0, MVT::i32), Chain};
3496       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3497       // Transfer memoperands.
3498       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3499       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3500       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3501
3502       // Remap uses.
3503       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3504       if (!SDValue(N, 0).use_empty()) {
3505         SDValue Result;
3506         if (isThumb)
3507           Result = SDValue(Ld, 0);
3508         else {
3509           SDValue SubRegIdx =
3510             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3511           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3512               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3513           Result = SDValue(ResNode,0);
3514         }
3515         ReplaceUses(SDValue(N, 0), Result);
3516       }
3517       if (!SDValue(N, 1).use_empty()) {
3518         SDValue Result;
3519         if (isThumb)
3520           Result = SDValue(Ld, 1);
3521         else {
3522           SDValue SubRegIdx =
3523             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3524           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3525               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3526           Result = SDValue(ResNode,0);
3527         }
3528         ReplaceUses(SDValue(N, 1), Result);
3529       }
3530       ReplaceUses(SDValue(N, 2), OutChain);
3531       CurDAG->RemoveDeadNode(N);
3532       return;
3533     }
3534     case Intrinsic::arm_stlexd:
3535     case Intrinsic::arm_strexd: {
3536       SDLoc dl(N);
3537       SDValue Chain = N->getOperand(0);
3538       SDValue Val0 = N->getOperand(2);
3539       SDValue Val1 = N->getOperand(3);
3540       SDValue MemAddr = N->getOperand(4);
3541
3542       // Store exclusive double return a i32 value which is the return status
3543       // of the issued store.
3544       const EVT ResTys[] = {MVT::i32, MVT::Other};
3545
3546       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3547       // Place arguments in the right order.
3548       SmallVector<SDValue, 7> Ops;
3549       if (isThumb) {
3550         Ops.push_back(Val0);
3551         Ops.push_back(Val1);
3552       } else
3553         // arm_strexd uses GPRPair.
3554         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3555       Ops.push_back(MemAddr);
3556       Ops.push_back(getAL(CurDAG, dl));
3557       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3558       Ops.push_back(Chain);
3559
3560       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3561       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3562                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3563
3564       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3565       // Transfer memoperands.
3566       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3567       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3568       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3569
3570       ReplaceNode(N, St);
3571       return;
3572     }
3573
3574     case Intrinsic::arm_neon_vld1: {
3575       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3576                                            ARM::VLD1d32, ARM::VLD1d64 };
3577       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3578                                            ARM::VLD1q32, ARM::VLD1q64};
3579       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3580       return;
3581     }
3582
3583     case Intrinsic::arm_neon_vld2: {
3584       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3585                                            ARM::VLD2d32, ARM::VLD1q64 };
3586       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3587                                            ARM::VLD2q32Pseudo };
3588       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3589       return;
3590     }
3591
3592     case Intrinsic::arm_neon_vld3: {
3593       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3594                                            ARM::VLD3d16Pseudo,
3595                                            ARM::VLD3d32Pseudo,
3596                                            ARM::VLD1d64TPseudo };
3597       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3598                                             ARM::VLD3q16Pseudo_UPD,
3599                                             ARM::VLD3q32Pseudo_UPD };
3600       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3601                                             ARM::VLD3q16oddPseudo,
3602                                             ARM::VLD3q32oddPseudo };
3603       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3604       return;
3605     }
3606
3607     case Intrinsic::arm_neon_vld4: {
3608       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3609                                            ARM::VLD4d16Pseudo,
3610                                            ARM::VLD4d32Pseudo,
3611                                            ARM::VLD1d64QPseudo };
3612       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3613                                             ARM::VLD4q16Pseudo_UPD,
3614                                             ARM::VLD4q32Pseudo_UPD };
3615       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3616                                             ARM::VLD4q16oddPseudo,
3617                                             ARM::VLD4q32oddPseudo };
3618       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3619       return;
3620     }
3621
3622     case Intrinsic::arm_neon_vld2lane: {
3623       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3624                                            ARM::VLD2LNd16Pseudo,
3625                                            ARM::VLD2LNd32Pseudo };
3626       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3627                                            ARM::VLD2LNq32Pseudo };
3628       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3629       return;
3630     }
3631
3632     case Intrinsic::arm_neon_vld3lane: {
3633       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3634                                            ARM::VLD3LNd16Pseudo,
3635                                            ARM::VLD3LNd32Pseudo };
3636       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3637                                            ARM::VLD3LNq32Pseudo };
3638       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3639       return;
3640     }
3641
3642     case Intrinsic::arm_neon_vld4lane: {
3643       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3644                                            ARM::VLD4LNd16Pseudo,
3645                                            ARM::VLD4LNd32Pseudo };
3646       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3647                                            ARM::VLD4LNq32Pseudo };
3648       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3649       return;
3650     }
3651
3652     case Intrinsic::arm_neon_vst1: {
3653       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3654                                            ARM::VST1d32, ARM::VST1d64 };
3655       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3656                                            ARM::VST1q32, ARM::VST1q64 };
3657       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3658       return;
3659     }
3660
3661     case Intrinsic::arm_neon_vst2: {
3662       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3663                                            ARM::VST2d32, ARM::VST1q64 };
3664       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3665                                            ARM::VST2q32Pseudo };
3666       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3667       return;
3668     }
3669
3670     case Intrinsic::arm_neon_vst3: {
3671       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3672                                            ARM::VST3d16Pseudo,
3673                                            ARM::VST3d32Pseudo,
3674                                            ARM::VST1d64TPseudo };
3675       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3676                                             ARM::VST3q16Pseudo_UPD,
3677                                             ARM::VST3q32Pseudo_UPD };
3678       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3679                                             ARM::VST3q16oddPseudo,
3680                                             ARM::VST3q32oddPseudo };
3681       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3682       return;
3683     }
3684
3685     case Intrinsic::arm_neon_vst4: {
3686       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3687                                            ARM::VST4d16Pseudo,
3688                                            ARM::VST4d32Pseudo,
3689                                            ARM::VST1d64QPseudo };
3690       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3691                                             ARM::VST4q16Pseudo_UPD,
3692                                             ARM::VST4q32Pseudo_UPD };
3693       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3694                                             ARM::VST4q16oddPseudo,
3695                                             ARM::VST4q32oddPseudo };
3696       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3697       return;
3698     }
3699
3700     case Intrinsic::arm_neon_vst2lane: {
3701       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3702                                            ARM::VST2LNd16Pseudo,
3703                                            ARM::VST2LNd32Pseudo };
3704       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3705                                            ARM::VST2LNq32Pseudo };
3706       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3707       return;
3708     }
3709
3710     case Intrinsic::arm_neon_vst3lane: {
3711       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3712                                            ARM::VST3LNd16Pseudo,
3713                                            ARM::VST3LNd32Pseudo };
3714       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3715                                            ARM::VST3LNq32Pseudo };
3716       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3717       return;
3718     }
3719
3720     case Intrinsic::arm_neon_vst4lane: {
3721       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3722                                            ARM::VST4LNd16Pseudo,
3723                                            ARM::VST4LNd32Pseudo };
3724       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3725                                            ARM::VST4LNq32Pseudo };
3726       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3727       return;
3728     }
3729     }
3730     break;
3731   }
3732
3733   case ISD::INTRINSIC_WO_CHAIN: {
3734     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3735     switch (IntNo) {
3736     default:
3737       break;
3738
3739     case Intrinsic::arm_neon_vtbl2:
3740       SelectVTBL(N, false, 2, ARM::VTBL2);
3741       return;
3742     case Intrinsic::arm_neon_vtbl3:
3743       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3744       return;
3745     case Intrinsic::arm_neon_vtbl4:
3746       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3747       return;
3748
3749     case Intrinsic::arm_neon_vtbx2:
3750       SelectVTBL(N, true, 2, ARM::VTBX2);
3751       return;
3752     case Intrinsic::arm_neon_vtbx3:
3753       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3754       return;
3755     case Intrinsic::arm_neon_vtbx4:
3756       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3757       return;
3758     }
3759     break;
3760   }
3761
3762   case ARMISD::VTBL1: {
3763     SDLoc dl(N);
3764     EVT VT = N->getValueType(0);
3765     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3766                      getAL(CurDAG, dl),                 // Predicate
3767                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3768     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3769     return;
3770   }
3771   case ARMISD::VTBL2: {
3772     SDLoc dl(N);
3773     EVT VT = N->getValueType(0);
3774
3775     // Form a REG_SEQUENCE to force register allocation.
3776     SDValue V0 = N->getOperand(0);
3777     SDValue V1 = N->getOperand(1);
3778     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3779
3780     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
3781                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3782     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
3783     return;
3784   }
3785
3786   case ISD::ATOMIC_CMP_SWAP:
3787     SelectCMP_SWAP(N);
3788     return;
3789   }
3790
3791   SelectCode(N);
3792 }
3793
3794 // Inspect a register string of the form
3795 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3796 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3797 // and obtain the integer operands from them, adding these operands to the
3798 // provided vector.
3799 static void getIntOperandsFromRegisterString(StringRef RegString,
3800                                              SelectionDAG *CurDAG,
3801                                              const SDLoc &DL,
3802                                              std::vector<SDValue> &Ops) {
3803   SmallVector<StringRef, 5> Fields;
3804   RegString.split(Fields, ':');
3805
3806   if (Fields.size() > 1) {
3807     bool AllIntFields = true;
3808
3809     for (StringRef Field : Fields) {
3810       // Need to trim out leading 'cp' characters and get the integer field.
3811       unsigned IntField;
3812       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3813       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3814     }
3815
3816     assert(AllIntFields &&
3817             "Unexpected non-integer value in special register string.");
3818   }
3819 }
3820
3821 // Maps a Banked Register string to its mask value. The mask value returned is
3822 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3823 // mask operand, which expresses which register is to be used, e.g. r8, and in
3824 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3825 // was invalid.
3826 static inline int getBankedRegisterMask(StringRef RegString) {
3827   return StringSwitch<int>(RegString.lower())
3828           .Case("r8_usr", 0x00)
3829           .Case("r9_usr", 0x01)
3830           .Case("r10_usr", 0x02)
3831           .Case("r11_usr", 0x03)
3832           .Case("r12_usr", 0x04)
3833           .Case("sp_usr", 0x05)
3834           .Case("lr_usr", 0x06)
3835           .Case("r8_fiq", 0x08)
3836           .Case("r9_fiq", 0x09)
3837           .Case("r10_fiq", 0x0a)
3838           .Case("r11_fiq", 0x0b)
3839           .Case("r12_fiq", 0x0c)
3840           .Case("sp_fiq", 0x0d)
3841           .Case("lr_fiq", 0x0e)
3842           .Case("lr_irq", 0x10)
3843           .Case("sp_irq", 0x11)
3844           .Case("lr_svc", 0x12)
3845           .Case("sp_svc", 0x13)
3846           .Case("lr_abt", 0x14)
3847           .Case("sp_abt", 0x15)
3848           .Case("lr_und", 0x16)
3849           .Case("sp_und", 0x17)
3850           .Case("lr_mon", 0x1c)
3851           .Case("sp_mon", 0x1d)
3852           .Case("elr_hyp", 0x1e)
3853           .Case("sp_hyp", 0x1f)
3854           .Case("spsr_fiq", 0x2e)
3855           .Case("spsr_irq", 0x30)
3856           .Case("spsr_svc", 0x32)
3857           .Case("spsr_abt", 0x34)
3858           .Case("spsr_und", 0x36)
3859           .Case("spsr_mon", 0x3c)
3860           .Case("spsr_hyp", 0x3e)
3861           .Default(-1);
3862 }
3863
3864 // Maps a MClass special register string to its value for use in the
3865 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3866 // Returns -1 to signify that the string was invalid.
3867 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3868   return StringSwitch<int>(RegString.lower())
3869           .Case("apsr", 0x0)
3870           .Case("iapsr", 0x1)
3871           .Case("eapsr", 0x2)
3872           .Case("xpsr", 0x3)
3873           .Case("ipsr", 0x5)
3874           .Case("epsr", 0x6)
3875           .Case("iepsr", 0x7)
3876           .Case("msp", 0x8)
3877           .Case("psp", 0x9)
3878           .Case("primask", 0x10)
3879           .Case("basepri", 0x11)
3880           .Case("basepri_max", 0x12)
3881           .Case("faultmask", 0x13)
3882           .Case("control", 0x14)
3883           .Case("msplim", 0x0a)
3884           .Case("psplim", 0x0b)
3885           .Case("sp", 0x18)
3886           .Default(-1);
3887 }
3888
3889 // The flags here are common to those allowed for apsr in the A class cores and
3890 // those allowed for the special registers in the M class cores. Returns a
3891 // value representing which flags were present, -1 if invalid.
3892 static inline int getMClassFlagsMask(StringRef Flags) {
3893   return StringSwitch<int>(Flags)
3894           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3895                          // correct when flags are not permitted
3896           .Case("g", 0x1)
3897           .Case("nzcvq", 0x2)
3898           .Case("nzcvqg", 0x3)
3899           .Default(-1);
3900 }
3901
3902 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3903                                  const ARMSubtarget *Subtarget) {
3904   // Ensure that the register (without flags) was a valid M Class special
3905   // register.
3906   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3907   if (SYSmvalue == -1)
3908     return -1;
3909
3910   // basepri, basepri_max and faultmask are only valid for V7m.
3911   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3912     return -1;
3913
3914   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3915     Flags = "";
3916     SYSmvalue |= 0x80;
3917   }
3918
3919   if (!Subtarget->has8MSecExt() &&
3920       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3921     return -1;
3922
3923   if (!Subtarget->hasV8MMainlineOps() &&
3924       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3925        SYSmvalue == 0x93))
3926     return -1;
3927
3928   // If it was a read then we won't be expecting flags and so at this point
3929   // we can return the mask.
3930   if (IsRead) {
3931     if (Flags.empty())
3932       return SYSmvalue;
3933     else
3934       return -1;
3935   }
3936
3937   // We know we are now handling a write so need to get the mask for the flags.
3938   int Mask = getMClassFlagsMask(Flags);
3939
3940   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3941   // shouldn't have flags present.
3942   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3943     return -1;
3944
3945   // The _g and _nzcvqg versions are only valid if the DSP extension is
3946   // available.
3947   if (!Subtarget->hasDSP() && (Mask & 0x1))
3948     return -1;
3949
3950   // The register was valid so need to put the mask in the correct place
3951   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3952   // construct the operand for the instruction node.
3953   return SYSmvalue | Mask << 10;
3954 }
3955
3956 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3957   // The mask operand contains the special register (R Bit) in bit 4, whether
3958   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3959   // bits 3-0 contains the fields to be accessed in the special register, set by
3960   // the flags provided with the register.
3961   int Mask = 0;
3962   if (Reg == "apsr") {
3963     // The flags permitted for apsr are the same flags that are allowed in
3964     // M class registers. We get the flag value and then shift the flags into
3965     // the correct place to combine with the mask.
3966     Mask = getMClassFlagsMask(Flags);
3967     if (Mask == -1)
3968       return -1;
3969     return Mask << 2;
3970   }
3971
3972   if (Reg != "cpsr" && Reg != "spsr") {
3973     return -1;
3974   }
3975
3976   // This is the same as if the flags were "fc"
3977   if (Flags.empty() || Flags == "all")
3978     return Mask | 0x9;
3979
3980   // Inspect the supplied flags string and set the bits in the mask for
3981   // the relevant and valid flags allowed for cpsr and spsr.
3982   for (char Flag : Flags) {
3983     int FlagVal;
3984     switch (Flag) {
3985       case 'c':
3986         FlagVal = 0x1;
3987         break;
3988       case 'x':
3989         FlagVal = 0x2;
3990         break;
3991       case 's':
3992         FlagVal = 0x4;
3993         break;
3994       case 'f':
3995         FlagVal = 0x8;
3996         break;
3997       default:
3998         FlagVal = 0;
3999     }
4000
4001     // This avoids allowing strings where the same flag bit appears twice.
4002     if (!FlagVal || (Mask & FlagVal))
4003       return -1;
4004     Mask |= FlagVal;
4005   }
4006
4007   // If the register is spsr then we need to set the R bit.
4008   if (Reg == "spsr")
4009     Mask |= 0x10;
4010
4011   return Mask;
4012 }
4013
4014 // Lower the read_register intrinsic to ARM specific DAG nodes
4015 // using the supplied metadata string to select the instruction node to use
4016 // and the registers/masks to construct as operands for the node.
4017 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4018   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4019   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4020   bool IsThumb2 = Subtarget->isThumb2();
4021   SDLoc DL(N);
4022
4023   std::vector<SDValue> Ops;
4024   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4025
4026   if (!Ops.empty()) {
4027     // If the special register string was constructed of fields (as defined
4028     // in the ACLE) then need to lower to MRC node (32 bit) or
4029     // MRRC node(64 bit), we can make the distinction based on the number of
4030     // operands we have.
4031     unsigned Opcode;
4032     SmallVector<EVT, 3> ResTypes;
4033     if (Ops.size() == 5){
4034       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4035       ResTypes.append({ MVT::i32, MVT::Other });
4036     } else {
4037       assert(Ops.size() == 3 &&
4038               "Invalid number of fields in special register string.");
4039       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4040       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4041     }
4042
4043     Ops.push_back(getAL(CurDAG, DL));
4044     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4045     Ops.push_back(N->getOperand(0));
4046     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4047     return true;
4048   }
4049
4050   std::string SpecialReg = RegString->getString().lower();
4051
4052   int BankedReg = getBankedRegisterMask(SpecialReg);
4053   if (BankedReg != -1) {
4054     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4055             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4056             N->getOperand(0) };
4057     ReplaceNode(
4058         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4059                                   DL, MVT::i32, MVT::Other, Ops));
4060     return true;
4061   }
4062
4063   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4064   // corresponding to the register that is being read from. So we switch on the
4065   // string to find which opcode we need to use.
4066   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4067                     .Case("fpscr", ARM::VMRS)
4068                     .Case("fpexc", ARM::VMRS_FPEXC)
4069                     .Case("fpsid", ARM::VMRS_FPSID)
4070                     .Case("mvfr0", ARM::VMRS_MVFR0)
4071                     .Case("mvfr1", ARM::VMRS_MVFR1)
4072                     .Case("mvfr2", ARM::VMRS_MVFR2)
4073                     .Case("fpinst", ARM::VMRS_FPINST)
4074                     .Case("fpinst2", ARM::VMRS_FPINST2)
4075                     .Default(0);
4076
4077   // If an opcode was found then we can lower the read to a VFP instruction.
4078   if (Opcode) {
4079     if (!Subtarget->hasVFP2())
4080       return false;
4081     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4082       return false;
4083
4084     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4085             N->getOperand(0) };
4086     ReplaceNode(N,
4087                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4088     return true;
4089   }
4090
4091   // If the target is M Class then need to validate that the register string
4092   // is an acceptable value, so check that a mask can be constructed from the
4093   // string.
4094   if (Subtarget->isMClass()) {
4095     StringRef Flags = "", Reg = SpecialReg;
4096     if (Reg.endswith("_ns")) {
4097       Flags = "ns";
4098       Reg = Reg.drop_back(3);
4099     }
4100
4101     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4102     if (SYSmValue == -1)
4103       return false;
4104
4105     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4106                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4107                       N->getOperand(0) };
4108     ReplaceNode(
4109         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4110     return true;
4111   }
4112
4113   // Here we know the target is not M Class so we need to check if it is one
4114   // of the remaining possible values which are apsr, cpsr or spsr.
4115   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4116     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4117             N->getOperand(0) };
4118     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4119                                           DL, MVT::i32, MVT::Other, Ops));
4120     return true;
4121   }
4122
4123   if (SpecialReg == "spsr") {
4124     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4125             N->getOperand(0) };
4126     ReplaceNode(
4127         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4128                                   MVT::i32, MVT::Other, Ops));
4129     return true;
4130   }
4131
4132   return false;
4133 }
4134
4135 // Lower the write_register intrinsic to ARM specific DAG nodes
4136 // using the supplied metadata string to select the instruction node to use
4137 // and the registers/masks to use in the nodes
4138 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4139   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4140   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4141   bool IsThumb2 = Subtarget->isThumb2();
4142   SDLoc DL(N);
4143
4144   std::vector<SDValue> Ops;
4145   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4146
4147   if (!Ops.empty()) {
4148     // If the special register string was constructed of fields (as defined
4149     // in the ACLE) then need to lower to MCR node (32 bit) or
4150     // MCRR node(64 bit), we can make the distinction based on the number of
4151     // operands we have.
4152     unsigned Opcode;
4153     if (Ops.size() == 5) {
4154       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4155       Ops.insert(Ops.begin()+2, N->getOperand(2));
4156     } else {
4157       assert(Ops.size() == 3 &&
4158               "Invalid number of fields in special register string.");
4159       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4160       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4161       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4162     }
4163
4164     Ops.push_back(getAL(CurDAG, DL));
4165     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4166     Ops.push_back(N->getOperand(0));
4167
4168     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4169     return true;
4170   }
4171
4172   std::string SpecialReg = RegString->getString().lower();
4173   int BankedReg = getBankedRegisterMask(SpecialReg);
4174   if (BankedReg != -1) {
4175     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4176             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4177             N->getOperand(0) };
4178     ReplaceNode(
4179         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4180                                   DL, MVT::Other, Ops));
4181     return true;
4182   }
4183
4184   // The VFP registers are written to by creating SelectionDAG nodes with
4185   // opcodes corresponding to the register that is being written. So we switch
4186   // on the string to find which opcode we need to use.
4187   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4188                     .Case("fpscr", ARM::VMSR)
4189                     .Case("fpexc", ARM::VMSR_FPEXC)
4190                     .Case("fpsid", ARM::VMSR_FPSID)
4191                     .Case("fpinst", ARM::VMSR_FPINST)
4192                     .Case("fpinst2", ARM::VMSR_FPINST2)
4193                     .Default(0);
4194
4195   if (Opcode) {
4196     if (!Subtarget->hasVFP2())
4197       return false;
4198     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4199             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4200     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4201     return true;
4202   }
4203
4204   std::pair<StringRef, StringRef> Fields;
4205   Fields = StringRef(SpecialReg).rsplit('_');
4206   std::string Reg = Fields.first.str();
4207   StringRef Flags = Fields.second;
4208
4209   // If the target was M Class then need to validate the special register value
4210   // and retrieve the mask for use in the instruction node.
4211   if (Subtarget->isMClass()) {
4212     // basepri_max gets split so need to correct Reg and Flags.
4213     if (SpecialReg == "basepri_max") {
4214       Reg = SpecialReg;
4215       Flags = "";
4216     }
4217     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4218     if (SYSmValue == -1)
4219       return false;
4220
4221     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4222                       N->getOperand(2), getAL(CurDAG, DL),
4223                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4224     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4225     return true;
4226   }
4227
4228   // We then check to see if a valid mask can be constructed for one of the
4229   // register string values permitted for the A and R class cores. These values
4230   // are apsr, spsr and cpsr; these are also valid on older cores.
4231   int Mask = getARClassRegisterMask(Reg, Flags);
4232   if (Mask != -1) {
4233     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4234             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4235             N->getOperand(0) };
4236     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4237                                           DL, MVT::Other, Ops));
4238     return true;
4239   }
4240
4241   return false;
4242 }
4243
4244 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4245   std::vector<SDValue> AsmNodeOperands;
4246   unsigned Flag, Kind;
4247   bool Changed = false;
4248   unsigned NumOps = N->getNumOperands();
4249
4250   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4251   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4252   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4253   // respectively. Since there is no constraint to explicitly specify a
4254   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4255   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4256   // them into a GPRPair.
4257
4258   SDLoc dl(N);
4259   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4260                                    : SDValue(nullptr,0);
4261
4262   SmallVector<bool, 8> OpChanged;
4263   // Glue node will be appended late.
4264   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4265     SDValue op = N->getOperand(i);
4266     AsmNodeOperands.push_back(op);
4267
4268     if (i < InlineAsm::Op_FirstOperand)
4269       continue;
4270
4271     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4272       Flag = C->getZExtValue();
4273       Kind = InlineAsm::getKind(Flag);
4274     }
4275     else
4276       continue;
4277
4278     // Immediate operands to inline asm in the SelectionDAG are modeled with
4279     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4280     // the second is a constant with the value of the immediate. If we get here
4281     // and we have a Kind_Imm, skip the next operand, and continue.
4282     if (Kind == InlineAsm::Kind_Imm) {
4283       SDValue op = N->getOperand(++i);
4284       AsmNodeOperands.push_back(op);
4285       continue;
4286     }
4287
4288     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4289     if (NumRegs)
4290       OpChanged.push_back(false);
4291
4292     unsigned DefIdx = 0;
4293     bool IsTiedToChangedOp = false;
4294     // If it's a use that is tied with a previous def, it has no
4295     // reg class constraint.
4296     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4297       IsTiedToChangedOp = OpChanged[DefIdx];
4298
4299     // Memory operands to inline asm in the SelectionDAG are modeled with two
4300     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4301     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4302     // it doesn't get misinterpreted), and continue. We do this here because
4303     // it's important to update the OpChanged array correctly before moving on.
4304     if (Kind == InlineAsm::Kind_Mem) {
4305       SDValue op = N->getOperand(++i);
4306       AsmNodeOperands.push_back(op);
4307       continue;
4308     }
4309
4310     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4311         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4312       continue;
4313
4314     unsigned RC;
4315     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4316     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4317         || NumRegs != 2)
4318       continue;
4319
4320     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4321     SDValue V0 = N->getOperand(i+1);
4322     SDValue V1 = N->getOperand(i+2);
4323     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4324     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4325     SDValue PairedReg;
4326     MachineRegisterInfo &MRI = MF->getRegInfo();
4327
4328     if (Kind == InlineAsm::Kind_RegDef ||
4329         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4330       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4331       // the original GPRs.
4332
4333       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4334       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4335       SDValue Chain = SDValue(N,0);
4336
4337       SDNode *GU = N->getGluedUser();
4338       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4339                                                Chain.getValue(1));
4340
4341       // Extract values from a GPRPair reg and copy to the original GPR reg.
4342       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4343                                                     RegCopy);
4344       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4345                                                     RegCopy);
4346       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4347                                         RegCopy.getValue(1));
4348       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4349
4350       // Update the original glue user.
4351       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4352       Ops.push_back(T1.getValue(1));
4353       CurDAG->UpdateNodeOperands(GU, Ops);
4354     }
4355     else {
4356       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4357       // GPRPair and then pass the GPRPair to the inline asm.
4358       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4359
4360       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4361       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4362                                           Chain.getValue(1));
4363       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4364                                           T0.getValue(1));
4365       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4366
4367       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4368       // i32 VRs of inline asm with it.
4369       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4370       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4371       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4372
4373       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4374       Glue = Chain.getValue(1);
4375     }
4376
4377     Changed = true;
4378
4379     if(PairedReg.getNode()) {
4380       OpChanged[OpChanged.size() -1 ] = true;
4381       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4382       if (IsTiedToChangedOp)
4383         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4384       else
4385         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4386       // Replace the current flag.
4387       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4388           Flag, dl, MVT::i32);
4389       // Add the new register node and skip the original two GPRs.
4390       AsmNodeOperands.push_back(PairedReg);
4391       // Skip the next two GPRs.
4392       i += 2;
4393     }
4394   }
4395
4396   if (Glue.getNode())
4397     AsmNodeOperands.push_back(Glue);
4398   if (!Changed)
4399     return false;
4400
4401   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4402       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4403   New->setNodeId(-1);
4404   ReplaceNode(N, New.getNode());
4405   return true;
4406 }
4407
4408
4409 bool ARMDAGToDAGISel::
4410 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4411                              std::vector<SDValue> &OutOps) {
4412   switch(ConstraintID) {
4413   default:
4414     llvm_unreachable("Unexpected asm memory constraint");
4415   case InlineAsm::Constraint_i:
4416     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4417     //        be an immediate and not a memory constraint.
4418     LLVM_FALLTHROUGH;
4419   case InlineAsm::Constraint_m:
4420   case InlineAsm::Constraint_o:
4421   case InlineAsm::Constraint_Q:
4422   case InlineAsm::Constraint_Um:
4423   case InlineAsm::Constraint_Un:
4424   case InlineAsm::Constraint_Uq:
4425   case InlineAsm::Constraint_Us:
4426   case InlineAsm::Constraint_Ut:
4427   case InlineAsm::Constraint_Uv:
4428   case InlineAsm::Constraint_Uy:
4429     // Require the address to be in a register.  That is safe for all ARM
4430     // variants and it is hard to do anything much smarter without knowing
4431     // how the operand is used.
4432     OutOps.push_back(Op);
4433     return false;
4434   }
4435   return true;
4436 }
4437
4438 /// createARMISelDag - This pass converts a legalized DAG into a
4439 /// ARM-specific DAG, ready for instruction scheduling.
4440 ///
4441 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4442                                      CodeGenOpt::Level OptLevel) {
4443   return new ARMDAGToDAGISel(TM, OptLevel);
4444 }