contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   const char *getPassName() const override {
  74     return "ARM Instruction Selection";
  75   }
  76
  77   void PreprocessISelDAG() override;
  78
  79   /// getI32Imm - Return a target constant of type i32 with the specified
  80   /// value.
  81   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  82     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  83   }
  84
  85   void Select(SDNode *N) override;
  86
  87   bool hasNoVMLxHazardUse(SDNode *N) const;
  88   bool isShifterOpProfitable(const SDValue &Shift,
  89                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  90   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  91                                SDValue &B, SDValue &C,
  92                                bool CheckProfitability = true);
  93   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  94                                SDValue &B, bool CheckProfitability = true);
  95   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  96                                     SDValue &B, SDValue &C) {
  97     // Don't apply the profitability check
  98     return SelectRegShifterOperand(N, A, B, C, false);
  99   }
 100   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 101                                     SDValue &B) {
 102     // Don't apply the profitability check
 103     return SelectImmShifterOperand(N, A, B, false);
 104   }
 105
 106   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 107   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 108
 109   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 110                                       SDValue &Offset, SDValue &Opc);
 111   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 112                            SDValue &Opc) {
 113     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 114   }
 115
 116   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 117                            SDValue &Opc) {
 118     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 119   }
 120
 121   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 122                        SDValue &Opc) {
 123     SelectAddrMode2Worker(N, Base, Offset, Opc);
 124 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 125     // This always matches one way or another.
 126     return true;
 127   }
 128
 129   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 130     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 131     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 132     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 133     return true;
 134   }
 135
 136   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 141                              SDValue &Offset, SDValue &Opc);
 142   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 143   bool SelectAddrMode3(SDValue N, SDValue &Base,
 144                        SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 146                              SDValue &Offset, SDValue &Opc);
 147   bool SelectAddrMode5(SDValue N, SDValue &Base,
 148                        SDValue &Offset);
 149   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 150   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 151
 152   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 153
 154   // Thumb Addressing Modes:
 155   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 156   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 157                                 SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 163                                  SDValue &OffImm);
 164   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 165
 166   // Thumb 2 Addressing Modes:
 167   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 168   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 169                             SDValue &OffImm);
 170   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 171                                  SDValue &OffImm);
 172   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 173                              SDValue &OffReg, SDValue &ShImm);
 174   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 175
 176   inline bool is_so_imm(unsigned Imm) const {
 177     return ARM_AM::getSOImmVal(Imm) != -1;
 178   }
 179
 180   inline bool is_so_imm_not(unsigned Imm) const {
 181     return ARM_AM::getSOImmVal(~Imm) != -1;
 182   }
 183
 184   inline bool is_t2_so_imm(unsigned Imm) const {
 185     return ARM_AM::getT2SOImmVal(Imm) != -1;
 186   }
 187
 188   inline bool is_t2_so_imm_not(unsigned Imm) const {
 189     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 190   }
 191
 192   // Include the pieces autogenerated from the target description.
 193 #include "ARMGenDAGISel.inc"
 194
 195 private:
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *Opcodes);
 229
 230   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 231   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 232   /// generated to force the table registers to be consecutive.
 233   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 234
 235   /// Try to select SBFX/UBFX instructions for ARM.
 236   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 237
 238   // Select special operations if node forms integer ABS pattern
 239   bool tryABSOp(SDNode *N);
 240
 241   bool tryReadRegister(SDNode *N);
 242   bool tryWriteRegister(SDNode *N);
 243
 244   bool tryInlineAsm(SDNode *N);
 245
 246   void SelectConcatVector(SDNode *N);
 247
 248   bool trySMLAWSMULW(SDNode *N);
 249
 250   void SelectCMP_SWAP(SDNode *N);
 251
 252   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 253   /// inline asm expressions.
 254   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 255                                     std::vector<SDValue> &OutOps) override;
 256
 257   // Form pairs of consecutive R, S, D, or Q registers.
 258   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 259   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 260   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 261   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 262
 263   // Form sequences of 4 consecutive S, D, or Q registers.
 264   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 265   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 266   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 267
 268   // Get the alignment operand for a NEON VLD or VST instruction.
 269   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 270                         bool is64BitVector);
 271
 272   /// Returns the number of instructions required to materialize the given
 273   /// constant in a register, or 3 if a literal pool load is needed.
 274   unsigned ConstantMaterializationCost(unsigned Val) const;
 275
 276   /// Checks if N is a multiplication by a constant where we can extract out a
 277   /// power of two from the constant so that it can be used in a shift, but only
 278   /// if it simplifies the materialization of the constant. Returns true if it
 279   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 280   /// out and to NewMulConst the new constant to be multiplied by.
 281   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 282                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 283
 284   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 285   /// selected when N would have been selected.
 286   void replaceDAGValue(const SDValue &N, SDValue M);
 287 };
 288 }
 289
 290 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 291 /// operand. If so Imm will receive the 32-bit value.
 292 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 293   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 294     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 295     return true;
 296   }
 297   return false;
 298 }
 299
 300 // isInt32Immediate - This method tests to see if a constant operand.
 301 // If so Imm will receive the 32 bit value.
 302 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 303   return isInt32Immediate(N.getNode(), Imm);
 304 }
 305
 306 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 307 // opcode and that it has a immediate integer right operand.
 308 // If so Imm will receive the 32 bit value.
 309 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 310   return N->getOpcode() == Opc &&
 311          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 312 }
 313
 314 /// \brief Check whether a particular node is a constant value representable as
 315 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 316 ///
 317 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 318 static bool isScaledConstantInRange(SDValue Node, int Scale,
 319                                     int RangeMin, int RangeMax,
 320                                     int &ScaledConstant) {
 321   assert(Scale > 0 && "Invalid scale!");
 322
 323   // Check that this is a constant.
 324   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 325   if (!C)
 326     return false;
 327
 328   ScaledConstant = (int) C->getZExtValue();
 329   if ((ScaledConstant % Scale) != 0)
 330     return false;
 331
 332   ScaledConstant /= Scale;
 333   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 334 }
 335
 336 void ARMDAGToDAGISel::PreprocessISelDAG() {
 337   if (!Subtarget->hasV6T2Ops())
 338     return;
 339
 340   bool isThumb2 = Subtarget->isThumb();
 341   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 342        E = CurDAG->allnodes_end(); I != E; ) {
 343     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 344
 345     if (N->getOpcode() != ISD::ADD)
 346       continue;
 347
 348     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 349     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 350     // trailing zeros, e.g. 1020.
 351     // Transform the expression to
 352     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 353     // of trailing zeros of c2. The left shift would be folded as an shifter
 354     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 355     // node (UBFX).
 356
 357     SDValue N0 = N->getOperand(0);
 358     SDValue N1 = N->getOperand(1);
 359     unsigned And_imm = 0;
 360     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 361       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 362         std::swap(N0, N1);
 363     }
 364     if (!And_imm)
 365       continue;
 366
 367     // Check if the AND mask is an immediate of the form: 000.....1111111100
 368     unsigned TZ = countTrailingZeros(And_imm);
 369     if (TZ != 1 && TZ != 2)
 370       // Be conservative here. Shifter operands aren't always free. e.g. On
 371       // Swift, left shifter operand of 1 / 2 for free but others are not.
 372       // e.g.
 373       //  ubfx   r3, r1, #16, #8
 374       //  ldr.w  r3, [r0, r3, lsl #2]
 375       // vs.
 376       //  mov.w  r9, #1020
 377       //  and.w  r2, r9, r1, lsr #14
 378       //  ldr    r2, [r0, r2]
 379       continue;
 380     And_imm >>= TZ;
 381     if (And_imm & (And_imm + 1))
 382       continue;
 383
 384     // Look for (and (srl X, c1), c2).
 385     SDValue Srl = N1.getOperand(0);
 386     unsigned Srl_imm = 0;
 387     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 388         (Srl_imm <= 2))
 389       continue;
 390
 391     // Make sure first operand is not a shifter operand which would prevent
 392     // folding of the left shift.
 393     SDValue CPTmp0;
 394     SDValue CPTmp1;
 395     SDValue CPTmp2;
 396     if (isThumb2) {
 397       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 398         continue;
 399     } else {
 400       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 401           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 402         continue;
 403     }
 404
 405     // Now make the transformation.
 406     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 407                           Srl.getOperand(0),
 408                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 409                                               MVT::i32));
 410     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 411                          Srl,
 412                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 413     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 414                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 415     CurDAG->UpdateNodeOperands(N, N0, N1);
 416   }
 417 }
 418
 419 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 420 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 421 /// least on current ARM implementations) which should be avoidded.
 422 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 423   if (OptLevel == CodeGenOpt::None)
 424     return true;
 425
 426   if (!Subtarget->hasVMLxHazards())
 427     return true;
 428
 429   if (!N->hasOneUse())
 430     return false;
 431
 432   SDNode *Use = *N->use_begin();
 433   if (Use->getOpcode() == ISD::CopyToReg)
 434     return true;
 435   if (Use->isMachineOpcode()) {
 436     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 437         CurDAG->getSubtarget().getInstrInfo());
 438
 439     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 440     if (MCID.mayStore())
 441       return true;
 442     unsigned Opcode = MCID.getOpcode();
 443     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 444       return true;
 445     // vmlx feeding into another vmlx. We actually want to unfold
 446     // the use later in the MLxExpansion pass. e.g.
 447     // vmla
 448     // vmla (stall 8 cycles)
 449     //
 450     // vmul (5 cycles)
 451     // vadd (5 cycles)
 452     // vmla
 453     // This adds up to about 18 - 19 cycles.
 454     //
 455     // vmla
 456     // vmul (stall 4 cycles)
 457     // vadd adds up to about 14 cycles.
 458     return TII->isFpMLxInstruction(Opcode);
 459   }
 460
 461   return false;
 462 }
 463
 464 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 465                                             ARM_AM::ShiftOpc ShOpcVal,
 466                                             unsigned ShAmt) {
 467   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 468     return true;
 469   if (Shift.hasOneUse())
 470     return true;
 471   // R << 2 is free.
 472   return ShOpcVal == ARM_AM::lsl &&
 473          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 474 }
 475
 476 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 477   if (Subtarget->isThumb()) {
 478     if (Val <= 255) return 1;                               // MOV
 479     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 480     if (Val <= 510) return 2;                               // MOV + ADDi8
 481     if (~Val <= 255) return 2;                              // MOV + MVN
 482     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 483   } else {
 484     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 485     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 486     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 487     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 488   }
 489   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 490   return 3; // Literal pool load
 491 }
 492
 493 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 494                                              unsigned MaxShift,
 495                                              unsigned &PowerOfTwo,
 496                                              SDValue &NewMulConst) const {
 497   assert(N.getOpcode() == ISD::MUL);
 498   assert(MaxShift > 0);
 499
 500   // If the multiply is used in more than one place then changing the constant
 501   // will make other uses incorrect, so don't.
 502   if (!N.hasOneUse()) return false;
 503   // Check if the multiply is by a constant
 504   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 505   if (!MulConst) return false;
 506   // If the constant is used in more than one place then modifying it will mean
 507   // we need to materialize two constants instead of one, which is a bad idea.
 508   if (!MulConst->hasOneUse()) return false;
 509   unsigned MulConstVal = MulConst->getZExtValue();
 510   if (MulConstVal == 0) return false;
 511
 512   // Find the largest power of 2 that MulConstVal is a multiple of
 513   PowerOfTwo = MaxShift;
 514   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 515     --PowerOfTwo;
 516     if (PowerOfTwo == 0) return false;
 517   }
 518
 519   // Only optimise if the new cost is better
 520   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 521   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 522   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 523   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 524   return NewCost < OldCost;
 525 }
 526
 527 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 528   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 529   CurDAG->ReplaceAllUsesWith(N, M);
 530 }
 531
 532 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 533                                               SDValue &BaseReg,
 534                                               SDValue &Opc,
 535                                               bool CheckProfitability) {
 536   if (DisableShifterOp)
 537     return false;
 538
 539   // If N is a multiply-by-constant and it's profitable to extract a shift and
 540   // use it in a shifted operand do so.
 541   if (N.getOpcode() == ISD::MUL) {
 542     unsigned PowerOfTwo = 0;
 543     SDValue NewMulConst;
 544     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 545       HandleSDNode Handle(N);
 546       replaceDAGValue(N.getOperand(1), NewMulConst);
 547       BaseReg = Handle.getValue();
 548       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 549                                                           PowerOfTwo),
 550                                       SDLoc(N), MVT::i32);
 551       return true;
 552     }
 553   }
 554
 555   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 556
 557   // Don't match base register only case. That is matched to a separate
 558   // lower complexity pattern with explicit register operand.
 559   if (ShOpcVal == ARM_AM::no_shift) return false;
 560
 561   BaseReg = N.getOperand(0);
 562   unsigned ShImmVal = 0;
 563   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 564   if (!RHS) return false;
 565   ShImmVal = RHS->getZExtValue() & 31;
 566   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 567                                   SDLoc(N), MVT::i32);
 568   return true;
 569 }
 570
 571 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 572                                               SDValue &BaseReg,
 573                                               SDValue &ShReg,
 574                                               SDValue &Opc,
 575                                               bool CheckProfitability) {
 576   if (DisableShifterOp)
 577     return false;
 578
 579   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 580
 581   // Don't match base register only case. That is matched to a separate
 582   // lower complexity pattern with explicit register operand.
 583   if (ShOpcVal == ARM_AM::no_shift) return false;
 584
 585   BaseReg = N.getOperand(0);
 586   unsigned ShImmVal = 0;
 587   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 588   if (RHS) return false;
 589
 590   ShReg = N.getOperand(1);
 591   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 592     return false;
 593   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 594                                   SDLoc(N), MVT::i32);
 595   return true;
 596 }
 597
 598
 599 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 600                                           SDValue &Base,
 601                                           SDValue &OffImm) {
 602   // Match simple R + imm12 operands.
 603
 604   // Base only.
 605   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 606       !CurDAG->isBaseWithConstantOffset(N)) {
 607     if (N.getOpcode() == ISD::FrameIndex) {
 608       // Match frame index.
 609       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 610       Base = CurDAG->getTargetFrameIndex(
 611           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 612       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 613       return true;
 614     }
 615
 616     if (N.getOpcode() == ARMISD::Wrapper &&
 617         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 618         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 619         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 620       Base = N.getOperand(0);
 621     } else
 622       Base = N;
 623     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 624     return true;
 625   }
 626
 627   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 628     int RHSC = (int)RHS->getSExtValue();
 629     if (N.getOpcode() == ISD::SUB)
 630       RHSC = -RHSC;
 631
 632     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 633       Base   = N.getOperand(0);
 634       if (Base.getOpcode() == ISD::FrameIndex) {
 635         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 636         Base = CurDAG->getTargetFrameIndex(
 637             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 638       }
 639       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 640       return true;
 641     }
 642   }
 643
 644   // Base only.
 645   Base = N;
 646   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 647   return true;
 648 }
 649
 650
 651
 652 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 653                                       SDValue &Opc) {
 654   if (N.getOpcode() == ISD::MUL &&
 655       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 656     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 657       // X * [3,5,9] -> X + X * [2,4,8] etc.
 658       int RHSC = (int)RHS->getZExtValue();
 659       if (RHSC & 1) {
 660         RHSC = RHSC & ~1;
 661         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 662         if (RHSC < 0) {
 663           AddSub = ARM_AM::sub;
 664           RHSC = - RHSC;
 665         }
 666         if (isPowerOf2_32(RHSC)) {
 667           unsigned ShAmt = Log2_32(RHSC);
 668           Base = Offset = N.getOperand(0);
 669           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 670                                                             ARM_AM::lsl),
 671                                           SDLoc(N), MVT::i32);
 672           return true;
 673         }
 674       }
 675     }
 676   }
 677
 678   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 679       // ISD::OR that is equivalent to an ISD::ADD.
 680       !CurDAG->isBaseWithConstantOffset(N))
 681     return false;
 682
 683   // Leave simple R +/- imm12 operands for LDRi12
 684   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 685     int RHSC;
 686     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 687                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 688       return false;
 689   }
 690
 691   // Otherwise this is R +/- [possibly shifted] R.
 692   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 693   ARM_AM::ShiftOpc ShOpcVal =
 694     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 695   unsigned ShAmt = 0;
 696
 697   Base   = N.getOperand(0);
 698   Offset = N.getOperand(1);
 699
 700   if (ShOpcVal != ARM_AM::no_shift) {
 701     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 702     // it.
 703     if (ConstantSDNode *Sh =
 704            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 705       ShAmt = Sh->getZExtValue();
 706       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 707         Offset = N.getOperand(1).getOperand(0);
 708       else {
 709         ShAmt = 0;
 710         ShOpcVal = ARM_AM::no_shift;
 711       }
 712     } else {
 713       ShOpcVal = ARM_AM::no_shift;
 714     }
 715   }
 716
 717   // Try matching (R shl C) + (R).
 718   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 719       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 720         N.getOperand(0).hasOneUse())) {
 721     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 722     if (ShOpcVal != ARM_AM::no_shift) {
 723       // Check to see if the RHS of the shift is a constant, if not, we can't
 724       // fold it.
 725       if (ConstantSDNode *Sh =
 726           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 727         ShAmt = Sh->getZExtValue();
 728         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 729           Offset = N.getOperand(0).getOperand(0);
 730           Base = N.getOperand(1);
 731         } else {
 732           ShAmt = 0;
 733           ShOpcVal = ARM_AM::no_shift;
 734         }
 735       } else {
 736         ShOpcVal = ARM_AM::no_shift;
 737       }
 738     }
 739   }
 740
 741   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 742   // and use it in a shifted operand do so.
 743   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 744     unsigned PowerOfTwo = 0;
 745     SDValue NewMulConst;
 746     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 747       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 748       ShAmt = PowerOfTwo;
 749       ShOpcVal = ARM_AM::lsl;
 750     }
 751   }
 752
 753   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 754                                   SDLoc(N), MVT::i32);
 755   return true;
 756 }
 757
 758
 759 //-----
 760
 761 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 762                                                      SDValue &Base,
 763                                                      SDValue &Offset,
 764                                                      SDValue &Opc) {
 765   if (N.getOpcode() == ISD::MUL &&
 766       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 767     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 768       // X * [3,5,9] -> X + X * [2,4,8] etc.
 769       int RHSC = (int)RHS->getZExtValue();
 770       if (RHSC & 1) {
 771         RHSC = RHSC & ~1;
 772         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 773         if (RHSC < 0) {
 774           AddSub = ARM_AM::sub;
 775           RHSC = - RHSC;
 776         }
 777         if (isPowerOf2_32(RHSC)) {
 778           unsigned ShAmt = Log2_32(RHSC);
 779           Base = Offset = N.getOperand(0);
 780           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 781                                                             ARM_AM::lsl),
 782                                           SDLoc(N), MVT::i32);
 783           return AM2_SHOP;
 784         }
 785       }
 786     }
 787   }
 788
 789   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 790       // ISD::OR that is equivalent to an ADD.
 791       !CurDAG->isBaseWithConstantOffset(N)) {
 792     Base = N;
 793     if (N.getOpcode() == ISD::FrameIndex) {
 794       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 795       Base = CurDAG->getTargetFrameIndex(
 796           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 797     } else if (N.getOpcode() == ARMISD::Wrapper &&
 798                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 799                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 800                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 801       Base = N.getOperand(0);
 802     }
 803     Offset = CurDAG->getRegister(0, MVT::i32);
 804     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 805                                                       ARM_AM::no_shift),
 806                                     SDLoc(N), MVT::i32);
 807     return AM2_BASE;
 808   }
 809
 810   // Match simple R +/- imm12 operands.
 811   if (N.getOpcode() != ISD::SUB) {
 812     int RHSC;
 813     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 814                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 815       Base = N.getOperand(0);
 816       if (Base.getOpcode() == ISD::FrameIndex) {
 817         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 818         Base = CurDAG->getTargetFrameIndex(
 819             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 820       }
 821       Offset = CurDAG->getRegister(0, MVT::i32);
 822
 823       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 824       if (RHSC < 0) {
 825         AddSub = ARM_AM::sub;
 826         RHSC = - RHSC;
 827       }
 828       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 829                                                         ARM_AM::no_shift),
 830                                       SDLoc(N), MVT::i32);
 831       return AM2_BASE;
 832     }
 833   }
 834
 835   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 836     // Compute R +/- (R << N) and reuse it.
 837     Base = N;
 838     Offset = CurDAG->getRegister(0, MVT::i32);
 839     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 840                                                       ARM_AM::no_shift),
 841                                     SDLoc(N), MVT::i32);
 842     return AM2_BASE;
 843   }
 844
 845   // Otherwise this is R +/- [possibly shifted] R.
 846   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 847   ARM_AM::ShiftOpc ShOpcVal =
 848     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 849   unsigned ShAmt = 0;
 850
 851   Base   = N.getOperand(0);
 852   Offset = N.getOperand(1);
 853
 854   if (ShOpcVal != ARM_AM::no_shift) {
 855     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 856     // it.
 857     if (ConstantSDNode *Sh =
 858            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 859       ShAmt = Sh->getZExtValue();
 860       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 861         Offset = N.getOperand(1).getOperand(0);
 862       else {
 863         ShAmt = 0;
 864         ShOpcVal = ARM_AM::no_shift;
 865       }
 866     } else {
 867       ShOpcVal = ARM_AM::no_shift;
 868     }
 869   }
 870
 871   // Try matching (R shl C) + (R).
 872   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 873       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 874         N.getOperand(0).hasOneUse())) {
 875     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 876     if (ShOpcVal != ARM_AM::no_shift) {
 877       // Check to see if the RHS of the shift is a constant, if not, we can't
 878       // fold it.
 879       if (ConstantSDNode *Sh =
 880           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 881         ShAmt = Sh->getZExtValue();
 882         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 883           Offset = N.getOperand(0).getOperand(0);
 884           Base = N.getOperand(1);
 885         } else {
 886           ShAmt = 0;
 887           ShOpcVal = ARM_AM::no_shift;
 888         }
 889       } else {
 890         ShOpcVal = ARM_AM::no_shift;
 891       }
 892     }
 893   }
 894
 895   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 896                                   SDLoc(N), MVT::i32);
 897   return AM2_SHOP;
 898 }
 899
 900 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 901                                             SDValue &Offset, SDValue &Opc) {
 902   unsigned Opcode = Op->getOpcode();
 903   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 904     ? cast<LoadSDNode>(Op)->getAddressingMode()
 905     : cast<StoreSDNode>(Op)->getAddressingMode();
 906   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 907     ? ARM_AM::add : ARM_AM::sub;
 908   int Val;
 909   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 910     return false;
 911
 912   Offset = N;
 913   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 914   unsigned ShAmt = 0;
 915   if (ShOpcVal != ARM_AM::no_shift) {
 916     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 917     // it.
 918     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 919       ShAmt = Sh->getZExtValue();
 920       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 921         Offset = N.getOperand(0);
 922       else {
 923         ShAmt = 0;
 924         ShOpcVal = ARM_AM::no_shift;
 925       }
 926     } else {
 927       ShOpcVal = ARM_AM::no_shift;
 928     }
 929   }
 930
 931   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 932                                   SDLoc(N), MVT::i32);
 933   return true;
 934 }
 935
 936 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 937                                             SDValue &Offset, SDValue &Opc) {
 938   unsigned Opcode = Op->getOpcode();
 939   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 940     ? cast<LoadSDNode>(Op)->getAddressingMode()
 941     : cast<StoreSDNode>(Op)->getAddressingMode();
 942   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 943     ? ARM_AM::add : ARM_AM::sub;
 944   int Val;
 945   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 946     if (AddSub == ARM_AM::sub) Val *= -1;
 947     Offset = CurDAG->getRegister(0, MVT::i32);
 948     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 949     return true;
 950   }
 951
 952   return false;
 953 }
 954
 955
 956 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 957                                             SDValue &Offset, SDValue &Opc) {
 958   unsigned Opcode = Op->getOpcode();
 959   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 960     ? cast<LoadSDNode>(Op)->getAddressingMode()
 961     : cast<StoreSDNode>(Op)->getAddressingMode();
 962   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 963     ? ARM_AM::add : ARM_AM::sub;
 964   int Val;
 965   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 966     Offset = CurDAG->getRegister(0, MVT::i32);
 967     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 968                                                       ARM_AM::no_shift),
 969                                     SDLoc(Op), MVT::i32);
 970     return true;
 971   }
 972
 973   return false;
 974 }
 975
 976 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 977   Base = N;
 978   return true;
 979 }
 980
 981 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 982                                       SDValue &Base, SDValue &Offset,
 983                                       SDValue &Opc) {
 984   if (N.getOpcode() == ISD::SUB) {
 985     // X - C  is canonicalize to X + -C, no need to handle it here.
 986     Base = N.getOperand(0);
 987     Offset = N.getOperand(1);
 988     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 989                                     MVT::i32);
 990     return true;
 991   }
 992
 993   if (!CurDAG->isBaseWithConstantOffset(N)) {
 994     Base = N;
 995     if (N.getOpcode() == ISD::FrameIndex) {
 996       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 997       Base = CurDAG->getTargetFrameIndex(
 998           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 999     }
1000     Offset = CurDAG->getRegister(0, MVT::i32);
1001     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1002                                     MVT::i32);
1003     return true;
1004   }
1005
1006   // If the RHS is +/- imm8, fold into addr mode.
1007   int RHSC;
1008   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1009                               -256 + 1, 256, RHSC)) { // 8 bits.
1010     Base = N.getOperand(0);
1011     if (Base.getOpcode() == ISD::FrameIndex) {
1012       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1013       Base = CurDAG->getTargetFrameIndex(
1014           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1015     }
1016     Offset = CurDAG->getRegister(0, MVT::i32);
1017
1018     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1019     if (RHSC < 0) {
1020       AddSub = ARM_AM::sub;
1021       RHSC = -RHSC;
1022     }
1023     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1024                                     MVT::i32);
1025     return true;
1026   }
1027
1028   Base = N.getOperand(0);
1029   Offset = N.getOperand(1);
1030   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1031                                   MVT::i32);
1032   return true;
1033 }
1034
1035 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1036                                             SDValue &Offset, SDValue &Opc) {
1037   unsigned Opcode = Op->getOpcode();
1038   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1039     ? cast<LoadSDNode>(Op)->getAddressingMode()
1040     : cast<StoreSDNode>(Op)->getAddressingMode();
1041   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1042     ? ARM_AM::add : ARM_AM::sub;
1043   int Val;
1044   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1045     Offset = CurDAG->getRegister(0, MVT::i32);
1046     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1047                                     MVT::i32);
1048     return true;
1049   }
1050
1051   Offset = N;
1052   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1053                                   MVT::i32);
1054   return true;
1055 }
1056
1057 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1058                                       SDValue &Base, SDValue &Offset) {
1059   if (!CurDAG->isBaseWithConstantOffset(N)) {
1060     Base = N;
1061     if (N.getOpcode() == ISD::FrameIndex) {
1062       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1063       Base = CurDAG->getTargetFrameIndex(
1064           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1065     } else if (N.getOpcode() == ARMISD::Wrapper &&
1066                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1067                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1068                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1069       Base = N.getOperand(0);
1070     }
1071     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1072                                        SDLoc(N), MVT::i32);
1073     return true;
1074   }
1075
1076   // If the RHS is +/- imm8, fold into addr mode.
1077   int RHSC;
1078   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1079                               -256 + 1, 256, RHSC)) {
1080     Base = N.getOperand(0);
1081     if (Base.getOpcode() == ISD::FrameIndex) {
1082       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1083       Base = CurDAG->getTargetFrameIndex(
1084           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1085     }
1086
1087     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1088     if (RHSC < 0) {
1089       AddSub = ARM_AM::sub;
1090       RHSC = -RHSC;
1091     }
1092     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1093                                        SDLoc(N), MVT::i32);
1094     return true;
1095   }
1096
1097   Base = N;
1098   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1099                                      SDLoc(N), MVT::i32);
1100   return true;
1101 }
1102
1103 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1104                                       SDValue &Align) {
1105   Addr = N;
1106
1107   unsigned Alignment = 0;
1108
1109   MemSDNode *MemN = cast<MemSDNode>(Parent);
1110
1111   if (isa<LSBaseSDNode>(MemN) ||
1112       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1113         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1114        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1115     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1116     // The maximum alignment is equal to the memory size being referenced.
1117     unsigned MMOAlign = MemN->getAlignment();
1118     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1119     if (MMOAlign >= MemSize && MemSize > 1)
1120       Alignment = MemSize;
1121   } else {
1122     // All other uses of addrmode6 are for intrinsics.  For now just record
1123     // the raw alignment value; it will be refined later based on the legal
1124     // alignment operands for the intrinsic.
1125     Alignment = MemN->getAlignment();
1126   }
1127
1128   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1129   return true;
1130 }
1131
1132 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1133                                             SDValue &Offset) {
1134   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1135   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1136   if (AM != ISD::POST_INC)
1137     return false;
1138   Offset = N;
1139   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1140     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1141       Offset = CurDAG->getRegister(0, MVT::i32);
1142   }
1143   return true;
1144 }
1145
1146 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1147                                        SDValue &Offset, SDValue &Label) {
1148   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1149     Offset = N.getOperand(0);
1150     SDValue N1 = N.getOperand(1);
1151     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1152                                       SDLoc(N), MVT::i32);
1153     return true;
1154   }
1155
1156   return false;
1157 }
1158
1159
1160 //===----------------------------------------------------------------------===//
1161 //                         Thumb Addressing Modes
1162 //===----------------------------------------------------------------------===//
1163
1164 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1165                                             SDValue &Base, SDValue &Offset){
1166   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1167     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1168     if (!NC || !NC->isNullValue())
1169       return false;
1170
1171     Base = Offset = N;
1172     return true;
1173   }
1174
1175   Base = N.getOperand(0);
1176   Offset = N.getOperand(1);
1177   return true;
1178 }
1179
1180 bool
1181 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1182                                           SDValue &Base, SDValue &OffImm) {
1183   if (!CurDAG->isBaseWithConstantOffset(N)) {
1184     if (N.getOpcode() == ISD::ADD) {
1185       return false; // We want to select register offset instead
1186     } else if (N.getOpcode() == ARMISD::Wrapper &&
1187         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1188         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1189         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1190       Base = N.getOperand(0);
1191     } else {
1192       Base = N;
1193     }
1194
1195     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1196     return true;
1197   }
1198
1199   // If the RHS is + imm5 * scale, fold into addr mode.
1200   int RHSC;
1201   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1202     Base = N.getOperand(0);
1203     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1204     return true;
1205   }
1206
1207   // Offset is too large, so use register offset instead.
1208   return false;
1209 }
1210
1211 bool
1212 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1213                                            SDValue &OffImm) {
1214   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1215 }
1216
1217 bool
1218 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1219                                            SDValue &OffImm) {
1220   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1221 }
1222
1223 bool
1224 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1225                                            SDValue &OffImm) {
1226   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1227 }
1228
1229 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1230                                             SDValue &Base, SDValue &OffImm) {
1231   if (N.getOpcode() == ISD::FrameIndex) {
1232     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1233     // Only multiples of 4 are allowed for the offset, so the frame object
1234     // alignment must be at least 4.
1235     MachineFrameInfo *MFI = MF->getFrameInfo();
1236     if (MFI->getObjectAlignment(FI) < 4)
1237       MFI->setObjectAlignment(FI, 4);
1238     Base = CurDAG->getTargetFrameIndex(
1239         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1240     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1241     return true;
1242   }
1243
1244   if (!CurDAG->isBaseWithConstantOffset(N))
1245     return false;
1246
1247   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1248   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1249       (LHSR && LHSR->getReg() == ARM::SP)) {
1250     // If the RHS is + imm8 * scale, fold into addr mode.
1251     int RHSC;
1252     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1253       Base = N.getOperand(0);
1254       if (Base.getOpcode() == ISD::FrameIndex) {
1255         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1256         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1257         // indexed by the LHS must be 4-byte aligned.
1258         MachineFrameInfo *MFI = MF->getFrameInfo();
1259         if (MFI->getObjectAlignment(FI) < 4)
1260           MFI->setObjectAlignment(FI, 4);
1261         Base = CurDAG->getTargetFrameIndex(
1262             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1263       }
1264       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1265       return true;
1266     }
1267   }
1268
1269   return false;
1270 }
1271
1272
1273 //===----------------------------------------------------------------------===//
1274 //                        Thumb 2 Addressing Modes
1275 //===----------------------------------------------------------------------===//
1276
1277
1278 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1279                                             SDValue &Base, SDValue &OffImm) {
1280   // Match simple R + imm12 operands.
1281
1282   // Base only.
1283   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1284       !CurDAG->isBaseWithConstantOffset(N)) {
1285     if (N.getOpcode() == ISD::FrameIndex) {
1286       // Match frame index.
1287       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1288       Base = CurDAG->getTargetFrameIndex(
1289           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1290       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1291       return true;
1292     }
1293
1294     if (N.getOpcode() == ARMISD::Wrapper &&
1295         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1296         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1297         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1298       Base = N.getOperand(0);
1299       if (Base.getOpcode() == ISD::TargetConstantPool)
1300         return false;  // We want to select t2LDRpci instead.
1301     } else
1302       Base = N;
1303     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1304     return true;
1305   }
1306
1307   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1308     if (SelectT2AddrModeImm8(N, Base, OffImm))
1309       // Let t2LDRi8 handle (R - imm8).
1310       return false;
1311
1312     int RHSC = (int)RHS->getZExtValue();
1313     if (N.getOpcode() == ISD::SUB)
1314       RHSC = -RHSC;
1315
1316     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1317       Base   = N.getOperand(0);
1318       if (Base.getOpcode() == ISD::FrameIndex) {
1319         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1320         Base = CurDAG->getTargetFrameIndex(
1321             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1322       }
1323       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1324       return true;
1325     }
1326   }
1327
1328   // Base only.
1329   Base = N;
1330   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1331   return true;
1332 }
1333
1334 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1335                                            SDValue &Base, SDValue &OffImm) {
1336   // Match simple R - imm8 operands.
1337   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1338       !CurDAG->isBaseWithConstantOffset(N))
1339     return false;
1340
1341   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1342     int RHSC = (int)RHS->getSExtValue();
1343     if (N.getOpcode() == ISD::SUB)
1344       RHSC = -RHSC;
1345
1346     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1347       Base = N.getOperand(0);
1348       if (Base.getOpcode() == ISD::FrameIndex) {
1349         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1350         Base = CurDAG->getTargetFrameIndex(
1351             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1352       }
1353       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1354       return true;
1355     }
1356   }
1357
1358   return false;
1359 }
1360
1361 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1362                                                  SDValue &OffImm){
1363   unsigned Opcode = Op->getOpcode();
1364   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1365     ? cast<LoadSDNode>(Op)->getAddressingMode()
1366     : cast<StoreSDNode>(Op)->getAddressingMode();
1367   int RHSC;
1368   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1369     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1370       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1371       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1372     return true;
1373   }
1374
1375   return false;
1376 }
1377
1378 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1379                                             SDValue &Base,
1380                                             SDValue &OffReg, SDValue &ShImm) {
1381   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1382   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1383     return false;
1384
1385   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1386   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1387     int RHSC = (int)RHS->getZExtValue();
1388     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1389       return false;
1390     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1391       return false;
1392   }
1393
1394   // Look for (R + R) or (R + (R << [1,2,3])).
1395   unsigned ShAmt = 0;
1396   Base   = N.getOperand(0);
1397   OffReg = N.getOperand(1);
1398
1399   // Swap if it is ((R << c) + R).
1400   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1401   if (ShOpcVal != ARM_AM::lsl) {
1402     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1403     if (ShOpcVal == ARM_AM::lsl)
1404       std::swap(Base, OffReg);
1405   }
1406
1407   if (ShOpcVal == ARM_AM::lsl) {
1408     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1409     // it.
1410     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1411       ShAmt = Sh->getZExtValue();
1412       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1413         OffReg = OffReg.getOperand(0);
1414       else {
1415         ShAmt = 0;
1416       }
1417     }
1418   }
1419
1420   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1421   // and use it in a shifted operand do so.
1422   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1423     unsigned PowerOfTwo = 0;
1424     SDValue NewMulConst;
1425     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1426       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1427       ShAmt = PowerOfTwo;
1428     }
1429   }
1430
1431   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1432
1433   return true;
1434 }
1435
1436 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1437                                                 SDValue &OffImm) {
1438   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1439   // instructions.
1440   Base = N;
1441   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1442
1443   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1444     return true;
1445
1446   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1447   if (!RHS)
1448     return true;
1449
1450   uint32_t RHSC = (int)RHS->getZExtValue();
1451   if (RHSC > 1020 || RHSC % 4 != 0)
1452     return true;
1453
1454   Base = N.getOperand(0);
1455   if (Base.getOpcode() == ISD::FrameIndex) {
1456     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1457     Base = CurDAG->getTargetFrameIndex(
1458         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1459   }
1460
1461   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1462   return true;
1463 }
1464
1465 //===--------------------------------------------------------------------===//
1466
1467 /// getAL - Returns a ARMCC::AL immediate node.
1468 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1469   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1470 }
1471
1472 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1473   LoadSDNode *LD = cast<LoadSDNode>(N);
1474   ISD::MemIndexedMode AM = LD->getAddressingMode();
1475   if (AM == ISD::UNINDEXED)
1476     return false;
1477
1478   EVT LoadedVT = LD->getMemoryVT();
1479   SDValue Offset, AMOpc;
1480   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1481   unsigned Opcode = 0;
1482   bool Match = false;
1483   if (LoadedVT == MVT::i32 && isPre &&
1484       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1485     Opcode = ARM::LDR_PRE_IMM;
1486     Match = true;
1487   } else if (LoadedVT == MVT::i32 && !isPre &&
1488       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1489     Opcode = ARM::LDR_POST_IMM;
1490     Match = true;
1491   } else if (LoadedVT == MVT::i32 &&
1492       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1493     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1494     Match = true;
1495
1496   } else if (LoadedVT == MVT::i16 &&
1497              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1498     Match = true;
1499     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1500       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1501       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1502   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1503     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1504       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1505         Match = true;
1506         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1507       }
1508     } else {
1509       if (isPre &&
1510           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1511         Match = true;
1512         Opcode = ARM::LDRB_PRE_IMM;
1513       } else if (!isPre &&
1514                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1515         Match = true;
1516         Opcode = ARM::LDRB_POST_IMM;
1517       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1518         Match = true;
1519         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1520       }
1521     }
1522   }
1523
1524   if (Match) {
1525     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1526       SDValue Chain = LD->getChain();
1527       SDValue Base = LD->getBasePtr();
1528       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1529                        CurDAG->getRegister(0, MVT::i32), Chain };
1530       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1531                                             MVT::i32, MVT::Other, Ops));
1532       return true;
1533     } else {
1534       SDValue Chain = LD->getChain();
1535       SDValue Base = LD->getBasePtr();
1536       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1537                        CurDAG->getRegister(0, MVT::i32), Chain };
1538       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1539                                             MVT::i32, MVT::Other, Ops));
1540       return true;
1541     }
1542   }
1543
1544   return false;
1545 }
1546
1547 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1548   LoadSDNode *LD = cast<LoadSDNode>(N);
1549   EVT LoadedVT = LD->getMemoryVT();
1550   ISD::MemIndexedMode AM = LD->getAddressingMode();
1551   if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1552       AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1553     return false;
1554
1555   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1556   if (!COffs || COffs->getZExtValue() != 4)
1557     return false;
1558
1559   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1560   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1561   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1562   // ISel.
1563   SDValue Chain = LD->getChain();
1564   SDValue Base = LD->getBasePtr();
1565   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1566                    CurDAG->getRegister(0, MVT::i32), Chain };
1567   ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
1568                                         MVT::Other, Ops));
1569   return true;
1570 }
1571
1572 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1573   LoadSDNode *LD = cast<LoadSDNode>(N);
1574   ISD::MemIndexedMode AM = LD->getAddressingMode();
1575   if (AM == ISD::UNINDEXED)
1576     return false;
1577
1578   EVT LoadedVT = LD->getMemoryVT();
1579   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1580   SDValue Offset;
1581   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1582   unsigned Opcode = 0;
1583   bool Match = false;
1584   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1585     switch (LoadedVT.getSimpleVT().SimpleTy) {
1586     case MVT::i32:
1587       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1588       break;
1589     case MVT::i16:
1590       if (isSExtLd)
1591         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1592       else
1593         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1594       break;
1595     case MVT::i8:
1596     case MVT::i1:
1597       if (isSExtLd)
1598         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1599       else
1600         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1601       break;
1602     default:
1603       return false;
1604     }
1605     Match = true;
1606   }
1607
1608   if (Match) {
1609     SDValue Chain = LD->getChain();
1610     SDValue Base = LD->getBasePtr();
1611     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1612                      CurDAG->getRegister(0, MVT::i32), Chain };
1613     ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1614                                           MVT::Other, Ops));
1615     return true;
1616   }
1617
1618   return false;
1619 }
1620
1621 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1622 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1623   SDLoc dl(V0.getNode());
1624   SDValue RegClass =
1625     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1626   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1627   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1628   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1629   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1630 }
1631
1632 /// \brief Form a D register from a pair of S registers.
1633 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1634   SDLoc dl(V0.getNode());
1635   SDValue RegClass =
1636     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1637   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1638   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1639   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1640   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1641 }
1642
1643 /// \brief Form a quad register from a pair of D registers.
1644 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1645   SDLoc dl(V0.getNode());
1646   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1647                                                MVT::i32);
1648   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1649   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1650   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1651   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1652 }
1653
1654 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1655 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1656   SDLoc dl(V0.getNode());
1657   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1658                                                MVT::i32);
1659   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1660   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1661   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1662   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1663 }
1664
1665 /// \brief Form 4 consecutive S registers.
1666 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1667                                    SDValue V2, SDValue V3) {
1668   SDLoc dl(V0.getNode());
1669   SDValue RegClass =
1670     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1671   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1672   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1673   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1674   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1675   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1676                                     V2, SubReg2, V3, SubReg3 };
1677   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1678 }
1679
1680 /// \brief Form 4 consecutive D registers.
1681 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1682                                    SDValue V2, SDValue V3) {
1683   SDLoc dl(V0.getNode());
1684   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1685                                                MVT::i32);
1686   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1687   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1688   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1689   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1690   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1691                                     V2, SubReg2, V3, SubReg3 };
1692   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1693 }
1694
1695 /// \brief Form 4 consecutive Q registers.
1696 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1697                                    SDValue V2, SDValue V3) {
1698   SDLoc dl(V0.getNode());
1699   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1700                                                MVT::i32);
1701   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1702   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1703   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1704   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1705   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1706                                     V2, SubReg2, V3, SubReg3 };
1707   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1708 }
1709
1710 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1711 /// of a NEON VLD or VST instruction.  The supported values depend on the
1712 /// number of registers being loaded.
1713 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1714                                        unsigned NumVecs, bool is64BitVector) {
1715   unsigned NumRegs = NumVecs;
1716   if (!is64BitVector && NumVecs < 3)
1717     NumRegs *= 2;
1718
1719   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1720   if (Alignment >= 32 && NumRegs == 4)
1721     Alignment = 32;
1722   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1723     Alignment = 16;
1724   else if (Alignment >= 8)
1725     Alignment = 8;
1726   else
1727     Alignment = 0;
1728
1729   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1730 }
1731
1732 static bool isVLDfixed(unsigned Opc)
1733 {
1734   switch (Opc) {
1735   default: return false;
1736   case ARM::VLD1d8wb_fixed : return true;
1737   case ARM::VLD1d16wb_fixed : return true;
1738   case ARM::VLD1d64Qwb_fixed : return true;
1739   case ARM::VLD1d32wb_fixed : return true;
1740   case ARM::VLD1d64wb_fixed : return true;
1741   case ARM::VLD1d64TPseudoWB_fixed : return true;
1742   case ARM::VLD1d64QPseudoWB_fixed : return true;
1743   case ARM::VLD1q8wb_fixed : return true;
1744   case ARM::VLD1q16wb_fixed : return true;
1745   case ARM::VLD1q32wb_fixed : return true;
1746   case ARM::VLD1q64wb_fixed : return true;
1747   case ARM::VLD2d8wb_fixed : return true;
1748   case ARM::VLD2d16wb_fixed : return true;
1749   case ARM::VLD2d32wb_fixed : return true;
1750   case ARM::VLD2q8PseudoWB_fixed : return true;
1751   case ARM::VLD2q16PseudoWB_fixed : return true;
1752   case ARM::VLD2q32PseudoWB_fixed : return true;
1753   case ARM::VLD2DUPd8wb_fixed : return true;
1754   case ARM::VLD2DUPd16wb_fixed : return true;
1755   case ARM::VLD2DUPd32wb_fixed : return true;
1756   }
1757 }
1758
1759 static bool isVSTfixed(unsigned Opc)
1760 {
1761   switch (Opc) {
1762   default: return false;
1763   case ARM::VST1d8wb_fixed : return true;
1764   case ARM::VST1d16wb_fixed : return true;
1765   case ARM::VST1d32wb_fixed : return true;
1766   case ARM::VST1d64wb_fixed : return true;
1767   case ARM::VST1q8wb_fixed : return true;
1768   case ARM::VST1q16wb_fixed : return true;
1769   case ARM::VST1q32wb_fixed : return true;
1770   case ARM::VST1q64wb_fixed : return true;
1771   case ARM::VST1d64TPseudoWB_fixed : return true;
1772   case ARM::VST1d64QPseudoWB_fixed : return true;
1773   case ARM::VST2d8wb_fixed : return true;
1774   case ARM::VST2d16wb_fixed : return true;
1775   case ARM::VST2d32wb_fixed : return true;
1776   case ARM::VST2q8PseudoWB_fixed : return true;
1777   case ARM::VST2q16PseudoWB_fixed : return true;
1778   case ARM::VST2q32PseudoWB_fixed : return true;
1779   }
1780 }
1781
1782 // Get the register stride update opcode of a VLD/VST instruction that
1783 // is otherwise equivalent to the given fixed stride updating instruction.
1784 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1785   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1786     && "Incorrect fixed stride updating instruction.");
1787   switch (Opc) {
1788   default: break;
1789   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1790   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1791   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1792   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1793   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1794   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1795   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1796   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1797   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1798   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1799   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1800   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1801
1802   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1803   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1804   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1805   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1806   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1807   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1808   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1809   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1810   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1811   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1812
1813   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1814   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1815   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1816   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1817   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1818   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1819
1820   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1821   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1822   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1823   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1824   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1825   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1826
1827   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1828   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1829   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1830   }
1831   return Opc; // If not one we handle, return it unchanged.
1832 }
1833
1834 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1835                                 const uint16_t *DOpcodes,
1836                                 const uint16_t *QOpcodes0,
1837                                 const uint16_t *QOpcodes1) {
1838   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1839   SDLoc dl(N);
1840
1841   SDValue MemAddr, Align;
1842   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1843   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1844     return;
1845
1846   SDValue Chain = N->getOperand(0);
1847   EVT VT = N->getValueType(0);
1848   bool is64BitVector = VT.is64BitVector();
1849   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1850
1851   unsigned OpcodeIndex;
1852   switch (VT.getSimpleVT().SimpleTy) {
1853   default: llvm_unreachable("unhandled vld type");
1854     // Double-register operations:
1855   case MVT::v8i8:  OpcodeIndex = 0; break;
1856   case MVT::v4i16: OpcodeIndex = 1; break;
1857   case MVT::v2f32:
1858   case MVT::v2i32: OpcodeIndex = 2; break;
1859   case MVT::v1i64: OpcodeIndex = 3; break;
1860     // Quad-register operations:
1861   case MVT::v16i8: OpcodeIndex = 0; break;
1862   case MVT::v8i16: OpcodeIndex = 1; break;
1863   case MVT::v4f32:
1864   case MVT::v4i32: OpcodeIndex = 2; break;
1865   case MVT::v2f64:
1866   case MVT::v2i64: OpcodeIndex = 3;
1867     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1868     break;
1869   }
1870
1871   EVT ResTy;
1872   if (NumVecs == 1)
1873     ResTy = VT;
1874   else {
1875     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1876     if (!is64BitVector)
1877       ResTyElts *= 2;
1878     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1879   }
1880   std::vector<EVT> ResTys;
1881   ResTys.push_back(ResTy);
1882   if (isUpdating)
1883     ResTys.push_back(MVT::i32);
1884   ResTys.push_back(MVT::Other);
1885
1886   SDValue Pred = getAL(CurDAG, dl);
1887   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1888   SDNode *VLd;
1889   SmallVector<SDValue, 7> Ops;
1890
1891   // Double registers and VLD1/VLD2 quad registers are directly supported.
1892   if (is64BitVector || NumVecs <= 2) {
1893     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1894                     QOpcodes0[OpcodeIndex]);
1895     Ops.push_back(MemAddr);
1896     Ops.push_back(Align);
1897     if (isUpdating) {
1898       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1899       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1900       // case entirely when the rest are updated to that form, too.
1901       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1902         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1903       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1904       // check for that explicitly too. Horribly hacky, but temporary.
1905       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1906           !isa<ConstantSDNode>(Inc.getNode()))
1907         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1908     }
1909     Ops.push_back(Pred);
1910     Ops.push_back(Reg0);
1911     Ops.push_back(Chain);
1912     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1913
1914   } else {
1915     // Otherwise, quad registers are loaded with two separate instructions,
1916     // where one loads the even registers and the other loads the odd registers.
1917     EVT AddrTy = MemAddr.getValueType();
1918
1919     // Load the even subregs.  This is always an updating load, so that it
1920     // provides the address to the second load for the odd subregs.
1921     SDValue ImplDef =
1922       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1923     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1924     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1925                                           ResTy, AddrTy, MVT::Other, OpsA);
1926     Chain = SDValue(VLdA, 2);
1927
1928     // Load the odd subregs.
1929     Ops.push_back(SDValue(VLdA, 1));
1930     Ops.push_back(Align);
1931     if (isUpdating) {
1932       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1933       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1934              "only constant post-increment update allowed for VLD3/4");
1935       (void)Inc;
1936       Ops.push_back(Reg0);
1937     }
1938     Ops.push_back(SDValue(VLdA, 0));
1939     Ops.push_back(Pred);
1940     Ops.push_back(Reg0);
1941     Ops.push_back(Chain);
1942     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1943   }
1944
1945   // Transfer memoperands.
1946   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1947   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1948   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1949
1950   if (NumVecs == 1) {
1951     ReplaceNode(N, VLd);
1952     return;
1953   }
1954
1955   // Extract out the subregisters.
1956   SDValue SuperReg = SDValue(VLd, 0);
1957   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1958                     ARM::qsub_3 == ARM::qsub_0 + 3,
1959                 "Unexpected subreg numbering");
1960   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1961   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1962     ReplaceUses(SDValue(N, Vec),
1963                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1964   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1965   if (isUpdating)
1966     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1967   CurDAG->RemoveDeadNode(N);
1968 }
1969
1970 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1971                                 const uint16_t *DOpcodes,
1972                                 const uint16_t *QOpcodes0,
1973                                 const uint16_t *QOpcodes1) {
1974   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1975   SDLoc dl(N);
1976
1977   SDValue MemAddr, Align;
1978   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1979   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1980   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1981     return;
1982
1983   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1984   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1985
1986   SDValue Chain = N->getOperand(0);
1987   EVT VT = N->getOperand(Vec0Idx).getValueType();
1988   bool is64BitVector = VT.is64BitVector();
1989   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1990
1991   unsigned OpcodeIndex;
1992   switch (VT.getSimpleVT().SimpleTy) {
1993   default: llvm_unreachable("unhandled vst type");
1994     // Double-register operations:
1995   case MVT::v8i8:  OpcodeIndex = 0; break;
1996   case MVT::v4i16: OpcodeIndex = 1; break;
1997   case MVT::v2f32:
1998   case MVT::v2i32: OpcodeIndex = 2; break;
1999   case MVT::v1i64: OpcodeIndex = 3; break;
2000     // Quad-register operations:
2001   case MVT::v16i8: OpcodeIndex = 0; break;
2002   case MVT::v8i16: OpcodeIndex = 1; break;
2003   case MVT::v4f32:
2004   case MVT::v4i32: OpcodeIndex = 2; break;
2005   case MVT::v2f64:
2006   case MVT::v2i64: OpcodeIndex = 3;
2007     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2008     break;
2009   }
2010
2011   std::vector<EVT> ResTys;
2012   if (isUpdating)
2013     ResTys.push_back(MVT::i32);
2014   ResTys.push_back(MVT::Other);
2015
2016   SDValue Pred = getAL(CurDAG, dl);
2017   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2018   SmallVector<SDValue, 7> Ops;
2019
2020   // Double registers and VST1/VST2 quad registers are directly supported.
2021   if (is64BitVector || NumVecs <= 2) {
2022     SDValue SrcReg;
2023     if (NumVecs == 1) {
2024       SrcReg = N->getOperand(Vec0Idx);
2025     } else if (is64BitVector) {
2026       // Form a REG_SEQUENCE to force register allocation.
2027       SDValue V0 = N->getOperand(Vec0Idx + 0);
2028       SDValue V1 = N->getOperand(Vec0Idx + 1);
2029       if (NumVecs == 2)
2030         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2031       else {
2032         SDValue V2 = N->getOperand(Vec0Idx + 2);
2033         // If it's a vst3, form a quad D-register and leave the last part as
2034         // an undef.
2035         SDValue V3 = (NumVecs == 3)
2036           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2037           : N->getOperand(Vec0Idx + 3);
2038         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2039       }
2040     } else {
2041       // Form a QQ register.
2042       SDValue Q0 = N->getOperand(Vec0Idx);
2043       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2044       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2045     }
2046
2047     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2048                     QOpcodes0[OpcodeIndex]);
2049     Ops.push_back(MemAddr);
2050     Ops.push_back(Align);
2051     if (isUpdating) {
2052       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2053       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2054       // case entirely when the rest are updated to that form, too.
2055       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2056         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2057       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2058       // check for that explicitly too. Horribly hacky, but temporary.
2059       if  (!isa<ConstantSDNode>(Inc.getNode()))
2060         Ops.push_back(Inc);
2061       else if (NumVecs > 2 && !isVSTfixed(Opc))
2062         Ops.push_back(Reg0);
2063     }
2064     Ops.push_back(SrcReg);
2065     Ops.push_back(Pred);
2066     Ops.push_back(Reg0);
2067     Ops.push_back(Chain);
2068     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2069
2070     // Transfer memoperands.
2071     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2072
2073     ReplaceNode(N, VSt);
2074     return;
2075   }
2076
2077   // Otherwise, quad registers are stored with two separate instructions,
2078   // where one stores the even registers and the other stores the odd registers.
2079
2080   // Form the QQQQ REG_SEQUENCE.
2081   SDValue V0 = N->getOperand(Vec0Idx + 0);
2082   SDValue V1 = N->getOperand(Vec0Idx + 1);
2083   SDValue V2 = N->getOperand(Vec0Idx + 2);
2084   SDValue V3 = (NumVecs == 3)
2085     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2086     : N->getOperand(Vec0Idx + 3);
2087   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2088
2089   // Store the even D registers.  This is always an updating store, so that it
2090   // provides the address to the second store for the odd subregs.
2091   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2092   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2093                                         MemAddr.getValueType(),
2094                                         MVT::Other, OpsA);
2095   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2096   Chain = SDValue(VStA, 1);
2097
2098   // Store the odd D registers.
2099   Ops.push_back(SDValue(VStA, 0));
2100   Ops.push_back(Align);
2101   if (isUpdating) {
2102     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2103     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2104            "only constant post-increment update allowed for VST3/4");
2105     (void)Inc;
2106     Ops.push_back(Reg0);
2107   }
2108   Ops.push_back(RegSeq);
2109   Ops.push_back(Pred);
2110   Ops.push_back(Reg0);
2111   Ops.push_back(Chain);
2112   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2113                                         Ops);
2114   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2115   ReplaceNode(N, VStB);
2116 }
2117
2118 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2119                                       unsigned NumVecs,
2120                                       const uint16_t *DOpcodes,
2121                                       const uint16_t *QOpcodes) {
2122   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2123   SDLoc dl(N);
2124
2125   SDValue MemAddr, Align;
2126   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2127   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2128   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2129     return;
2130
2131   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2132   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2133
2134   SDValue Chain = N->getOperand(0);
2135   unsigned Lane =
2136     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2137   EVT VT = N->getOperand(Vec0Idx).getValueType();
2138   bool is64BitVector = VT.is64BitVector();
2139
2140   unsigned Alignment = 0;
2141   if (NumVecs != 3) {
2142     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2143     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2144     if (Alignment > NumBytes)
2145       Alignment = NumBytes;
2146     if (Alignment < 8 && Alignment < NumBytes)
2147       Alignment = 0;
2148     // Alignment must be a power of two; make sure of that.
2149     Alignment = (Alignment & -Alignment);
2150     if (Alignment == 1)
2151       Alignment = 0;
2152   }
2153   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2154
2155   unsigned OpcodeIndex;
2156   switch (VT.getSimpleVT().SimpleTy) {
2157   default: llvm_unreachable("unhandled vld/vst lane type");
2158     // Double-register operations:
2159   case MVT::v8i8:  OpcodeIndex = 0; break;
2160   case MVT::v4i16: OpcodeIndex = 1; break;
2161   case MVT::v2f32:
2162   case MVT::v2i32: OpcodeIndex = 2; break;
2163     // Quad-register operations:
2164   case MVT::v8i16: OpcodeIndex = 0; break;
2165   case MVT::v4f32:
2166   case MVT::v4i32: OpcodeIndex = 1; break;
2167   }
2168
2169   std::vector<EVT> ResTys;
2170   if (IsLoad) {
2171     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2172     if (!is64BitVector)
2173       ResTyElts *= 2;
2174     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2175                                       MVT::i64, ResTyElts));
2176   }
2177   if (isUpdating)
2178     ResTys.push_back(MVT::i32);
2179   ResTys.push_back(MVT::Other);
2180
2181   SDValue Pred = getAL(CurDAG, dl);
2182   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2183
2184   SmallVector<SDValue, 8> Ops;
2185   Ops.push_back(MemAddr);
2186   Ops.push_back(Align);
2187   if (isUpdating) {
2188     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2189     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2190   }
2191
2192   SDValue SuperReg;
2193   SDValue V0 = N->getOperand(Vec0Idx + 0);
2194   SDValue V1 = N->getOperand(Vec0Idx + 1);
2195   if (NumVecs == 2) {
2196     if (is64BitVector)
2197       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2198     else
2199       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2200   } else {
2201     SDValue V2 = N->getOperand(Vec0Idx + 2);
2202     SDValue V3 = (NumVecs == 3)
2203       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2204       : N->getOperand(Vec0Idx + 3);
2205     if (is64BitVector)
2206       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2207     else
2208       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2209   }
2210   Ops.push_back(SuperReg);
2211   Ops.push_back(getI32Imm(Lane, dl));
2212   Ops.push_back(Pred);
2213   Ops.push_back(Reg0);
2214   Ops.push_back(Chain);
2215
2216   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2217                                   QOpcodes[OpcodeIndex]);
2218   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2219   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2220   if (!IsLoad) {
2221     ReplaceNode(N, VLdLn);
2222     return;
2223   }
2224
2225   // Extract the subregisters.
2226   SuperReg = SDValue(VLdLn, 0);
2227   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2228                     ARM::qsub_3 == ARM::qsub_0 + 3,
2229                 "Unexpected subreg numbering");
2230   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2231   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2232     ReplaceUses(SDValue(N, Vec),
2233                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2234   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2235   if (isUpdating)
2236     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2237   CurDAG->RemoveDeadNode(N);
2238 }
2239
2240 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2241                                    const uint16_t *Opcodes) {
2242   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2243   SDLoc dl(N);
2244
2245   SDValue MemAddr, Align;
2246   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2247     return;
2248
2249   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2250   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2251
2252   SDValue Chain = N->getOperand(0);
2253   EVT VT = N->getValueType(0);
2254
2255   unsigned Alignment = 0;
2256   if (NumVecs != 3) {
2257     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2258     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2259     if (Alignment > NumBytes)
2260       Alignment = NumBytes;
2261     if (Alignment < 8 && Alignment < NumBytes)
2262       Alignment = 0;
2263     // Alignment must be a power of two; make sure of that.
2264     Alignment = (Alignment & -Alignment);
2265     if (Alignment == 1)
2266       Alignment = 0;
2267   }
2268   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2269
2270   unsigned OpcodeIndex;
2271   switch (VT.getSimpleVT().SimpleTy) {
2272   default: llvm_unreachable("unhandled vld-dup type");
2273   case MVT::v8i8:  OpcodeIndex = 0; break;
2274   case MVT::v4i16: OpcodeIndex = 1; break;
2275   case MVT::v2f32:
2276   case MVT::v2i32: OpcodeIndex = 2; break;
2277   }
2278
2279   SDValue Pred = getAL(CurDAG, dl);
2280   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2281   SDValue SuperReg;
2282   unsigned Opc = Opcodes[OpcodeIndex];
2283   SmallVector<SDValue, 6> Ops;
2284   Ops.push_back(MemAddr);
2285   Ops.push_back(Align);
2286   if (isUpdating) {
2287     // fixed-stride update instructions don't have an explicit writeback
2288     // operand. It's implicit in the opcode itself.
2289     SDValue Inc = N->getOperand(2);
2290     if (!isa<ConstantSDNode>(Inc.getNode()))
2291       Ops.push_back(Inc);
2292     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2293     else if (NumVecs > 2)
2294       Ops.push_back(Reg0);
2295   }
2296   Ops.push_back(Pred);
2297   Ops.push_back(Reg0);
2298   Ops.push_back(Chain);
2299
2300   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2301   std::vector<EVT> ResTys;
2302   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2303   if (isUpdating)
2304     ResTys.push_back(MVT::i32);
2305   ResTys.push_back(MVT::Other);
2306   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2307   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2308   SuperReg = SDValue(VLdDup, 0);
2309
2310   // Extract the subregisters.
2311   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2312   unsigned SubIdx = ARM::dsub_0;
2313   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2314     ReplaceUses(SDValue(N, Vec),
2315                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2316   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2317   if (isUpdating)
2318     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2319   CurDAG->RemoveDeadNode(N);
2320 }
2321
2322 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2323                                  unsigned Opc) {
2324   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2325   SDLoc dl(N);
2326   EVT VT = N->getValueType(0);
2327   unsigned FirstTblReg = IsExt ? 2 : 1;
2328
2329   // Form a REG_SEQUENCE to force register allocation.
2330   SDValue RegSeq;
2331   SDValue V0 = N->getOperand(FirstTblReg + 0);
2332   SDValue V1 = N->getOperand(FirstTblReg + 1);
2333   if (NumVecs == 2)
2334     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2335   else {
2336     SDValue V2 = N->getOperand(FirstTblReg + 2);
2337     // If it's a vtbl3, form a quad D-register and leave the last part as
2338     // an undef.
2339     SDValue V3 = (NumVecs == 3)
2340       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2341       : N->getOperand(FirstTblReg + 3);
2342     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2343   }
2344
2345   SmallVector<SDValue, 6> Ops;
2346   if (IsExt)
2347     Ops.push_back(N->getOperand(1));
2348   Ops.push_back(RegSeq);
2349   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2350   Ops.push_back(getAL(CurDAG, dl)); // predicate
2351   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2352   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2353 }
2354
2355 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2356   if (!Subtarget->hasV6T2Ops())
2357     return false;
2358
2359   unsigned Opc = isSigned
2360     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2361     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2362   SDLoc dl(N);
2363
2364   // For unsigned extracts, check for a shift right and mask
2365   unsigned And_imm = 0;
2366   if (N->getOpcode() == ISD::AND) {
2367     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2368
2369       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2370       if (And_imm & (And_imm + 1))
2371         return false;
2372
2373       unsigned Srl_imm = 0;
2374       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2375                                 Srl_imm)) {
2376         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2377
2378         // Note: The width operand is encoded as width-1.
2379         unsigned Width = countTrailingOnes(And_imm) - 1;
2380         unsigned LSB = Srl_imm;
2381
2382         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2383
2384         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2385           // It's cheaper to use a right shift to extract the top bits.
2386           if (Subtarget->isThumb()) {
2387             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2388             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2390                               getAL(CurDAG, dl), Reg0, Reg0 };
2391             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2392             return true;
2393           }
2394
2395           // ARM models shift instructions as MOVsi with shifter operand.
2396           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2397           SDValue ShOpc =
2398             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2399                                       MVT::i32);
2400           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2401                             getAL(CurDAG, dl), Reg0, Reg0 };
2402           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2403           return true;
2404         }
2405
2406         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2407                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2408                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2409                           getAL(CurDAG, dl), Reg0 };
2410         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2411         return true;
2412       }
2413     }
2414     return false;
2415   }
2416
2417   // Otherwise, we're looking for a shift of a shift
2418   unsigned Shl_imm = 0;
2419   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2420     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2421     unsigned Srl_imm = 0;
2422     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2423       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2424       // Note: The width operand is encoded as width-1.
2425       unsigned Width = 32 - Srl_imm - 1;
2426       int LSB = Srl_imm - Shl_imm;
2427       if (LSB < 0)
2428         return false;
2429       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2430       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2431                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2432                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2433                         getAL(CurDAG, dl), Reg0 };
2434       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2435       return true;
2436     }
2437   }
2438
2439   // Or we are looking for a shift of an and, with a mask operand
2440   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2441       isShiftedMask_32(And_imm)) {
2442     unsigned Srl_imm = 0;
2443     unsigned LSB = countTrailingZeros(And_imm);
2444     // Shift must be the same as the ands lsb
2445     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2446       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2447       unsigned MSB = 31 - countLeadingZeros(And_imm);
2448       // Note: The width operand is encoded as width-1.
2449       unsigned Width = MSB - LSB;
2450       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2451       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2452                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2453                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2454                         getAL(CurDAG, dl), Reg0 };
2455       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2456       return true;
2457     }
2458   }
2459
2460   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2461     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2462     unsigned LSB = 0;
2463     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2464         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2465       return false;
2466
2467     if (LSB + Width > 32)
2468       return false;
2469
2470     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2471     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2472                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2473                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2474                       getAL(CurDAG, dl), Reg0 };
2475     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2476     return true;
2477   }
2478
2479   return false;
2480 }
2481
2482 /// Target-specific DAG combining for ISD::XOR.
2483 /// Target-independent combining lowers SELECT_CC nodes of the form
2484 /// select_cc setg[ge] X,  0,  X, -X
2485 /// select_cc setgt    X, -1,  X, -X
2486 /// select_cc setl[te] X,  0, -X,  X
2487 /// select_cc setlt    X,  1, -X,  X
2488 /// which represent Integer ABS into:
2489 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2490 /// ARM instruction selection detects the latter and matches it to
2491 /// ARM::ABS or ARM::t2ABS machine node.
2492 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2493   SDValue XORSrc0 = N->getOperand(0);
2494   SDValue XORSrc1 = N->getOperand(1);
2495   EVT VT = N->getValueType(0);
2496
2497   if (Subtarget->isThumb1Only())
2498     return false;
2499
2500   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2501     return false;
2502
2503   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2504   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2505   SDValue SRASrc0 = XORSrc1.getOperand(0);
2506   SDValue SRASrc1 = XORSrc1.getOperand(1);
2507   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2508   EVT XType = SRASrc0.getValueType();
2509   unsigned Size = XType.getSizeInBits() - 1;
2510
2511   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2512       XType.isInteger() && SRAConstant != nullptr &&
2513       Size == SRAConstant->getZExtValue()) {
2514     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2515     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2516     return true;
2517   }
2518
2519   return false;
2520 }
2521
2522 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2523                                  bool Accumulate) {
2524   // For SM*WB, we need to some form of sext.
2525   // For SM*WT, we need to search for (sra X, 16)
2526   // Src1 then gets set to X.
2527   if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2528        SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2529        SignExt.getOpcode() == ISD::AssertSext) &&
2530        SignExt.getValueType() == MVT::i32) {
2531
2532     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2533     Src1 = SignExt.getOperand(0);
2534     return true;
2535   }
2536
2537   if (SignExt.getOpcode() != ISD::SRA)
2538     return false;
2539
2540   ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2541   if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2542     return false;
2543
2544   SDValue Op0 = SignExt.getOperand(0);
2545
2546   // The sign extend operand for SM*WB could be generated by a shl and ashr.
2547   if (Op0.getOpcode() == ISD::SHL) {
2548     SDValue SHL = Op0;
2549     ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2550     if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2551       return false;
2552
2553     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2554     Src1 = Op0.getOperand(0);
2555     return true;
2556   }
2557   *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2558   Src1 = SignExt.getOperand(0);
2559   return true;
2560 }
2561
2562 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2563                                 SDValue &Src1, bool Accumulate) {
2564   // First we look for:
2565   // (add (or (srl ?, 16), (shl ?, 16)))
2566   if (OR.getOpcode() != ISD::OR)
2567     return false;
2568
2569   SDValue SRL = OR.getOperand(0);
2570   SDValue SHL = OR.getOperand(1);
2571
2572   if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2573     SRL = OR.getOperand(1);
2574     SHL = OR.getOperand(0);
2575     if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2576       return false;
2577   }
2578
2579   ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2580   ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2581   if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2582       SHLSrc1->getZExtValue() != 16)
2583     return false;
2584
2585   // The first operands to the shifts need to be the two results from the
2586   // same smul_lohi node.
2587   if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2588        SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2589     return false;
2590
2591   SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2592   if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2593       SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2594     return false;
2595
2596   // Now we have:
2597   // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2598   // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2599   // For SMLAWB the 16-bit value will signed extended somehow.
2600   // For SMLAWT only the SRA is required.
2601
2602   // Check both sides of SMUL_LOHI
2603   if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2604     Src0 = SMULLOHI->getOperand(1);
2605   } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2606                                   Accumulate)) {
2607     Src0 = SMULLOHI->getOperand(0);
2608   } else {
2609     return false;
2610   }
2611   return true;
2612 }
2613
2614 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2615   SDLoc dl(N);
2616   SDValue Src0 = N->getOperand(0);
2617   SDValue Src1 = N->getOperand(1);
2618   SDValue A, B;
2619   unsigned Opc = 0;
2620
2621   if (N->getOpcode() == ISD::ADD) {
2622     if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2623       return false;
2624
2625     SDValue Acc;
2626     if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2627       Acc = Src1;
2628     } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2629       Acc = Src0;
2630     } else {
2631       return false;
2632     }
2633     if (Opc == 0)
2634       return false;
2635
2636     SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2637                       CurDAG->getRegister(0, MVT::i32) };
2638     CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2639     return true;
2640   } else if (N->getOpcode() == ISD::OR &&
2641              SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2642     if (Opc == 0)
2643       return false;
2644
2645     SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2646                       CurDAG->getRegister(0, MVT::i32)};
2647     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2648     return true;
2649   }
2650   return false;
2651 }
2652
2653 /// We've got special pseudo-instructions for these
2654 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2655   unsigned Opcode;
2656   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2657   if (MemTy == MVT::i8)
2658     Opcode = ARM::CMP_SWAP_8;
2659   else if (MemTy == MVT::i16)
2660     Opcode = ARM::CMP_SWAP_16;
2661   else if (MemTy == MVT::i32)
2662     Opcode = ARM::CMP_SWAP_32;
2663   else
2664     llvm_unreachable("Unknown AtomicCmpSwap type");
2665
2666   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2667                    N->getOperand(0)};
2668   SDNode *CmpSwap = CurDAG->getMachineNode(
2669       Opcode, SDLoc(N),
2670       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2671
2672   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2673   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2674   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2675
2676   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2677   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2678   CurDAG->RemoveDeadNode(N);
2679 }
2680
2681 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2682   // The only time a CONCAT_VECTORS operation can have legal types is when
2683   // two 64-bit vectors are concatenated to a 128-bit vector.
2684   EVT VT = N->getValueType(0);
2685   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2686     llvm_unreachable("unexpected CONCAT_VECTORS");
2687   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2688 }
2689
2690 void ARMDAGToDAGISel::Select(SDNode *N) {
2691   SDLoc dl(N);
2692
2693   if (N->isMachineOpcode()) {
2694     N->setNodeId(-1);
2695     return;   // Already selected.
2696   }
2697
2698   switch (N->getOpcode()) {
2699   default: break;
2700   case ISD::ADD:
2701   case ISD::OR:
2702     if (trySMLAWSMULW(N))
2703       return;
2704     break;
2705   case ISD::WRITE_REGISTER:
2706     if (tryWriteRegister(N))
2707       return;
2708     break;
2709   case ISD::READ_REGISTER:
2710     if (tryReadRegister(N))
2711       return;
2712     break;
2713   case ISD::INLINEASM:
2714     if (tryInlineAsm(N))
2715       return;
2716     break;
2717   case ISD::XOR:
2718     // Select special operations if XOR node forms integer ABS pattern
2719     if (tryABSOp(N))
2720       return;
2721     // Other cases are autogenerated.
2722     break;
2723   case ISD::Constant: {
2724     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2725     // If we can't materialize the constant we need to use a literal pool
2726     if (ConstantMaterializationCost(Val) > 2) {
2727       SDValue CPIdx = CurDAG->getTargetConstantPool(
2728           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2729           TLI->getPointerTy(CurDAG->getDataLayout()));
2730
2731       SDNode *ResNode;
2732       if (Subtarget->isThumb()) {
2733         SDValue Pred = getAL(CurDAG, dl);
2734         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2735         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2736         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2737                                          Ops);
2738       } else {
2739         SDValue Ops[] = {
2740           CPIdx,
2741           CurDAG->getTargetConstant(0, dl, MVT::i32),
2742           getAL(CurDAG, dl),
2743           CurDAG->getRegister(0, MVT::i32),
2744           CurDAG->getEntryNode()
2745         };
2746         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2747                                          Ops);
2748       }
2749       ReplaceNode(N, ResNode);
2750       return;
2751     }
2752
2753     // Other cases are autogenerated.
2754     break;
2755   }
2756   case ISD::FrameIndex: {
2757     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2758     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2759     SDValue TFI = CurDAG->getTargetFrameIndex(
2760         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2761     if (Subtarget->isThumb1Only()) {
2762       // Set the alignment of the frame object to 4, to avoid having to generate
2763       // more than one ADD
2764       MachineFrameInfo *MFI = MF->getFrameInfo();
2765       if (MFI->getObjectAlignment(FI) < 4)
2766         MFI->setObjectAlignment(FI, 4);
2767       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2768                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2769       return;
2770     } else {
2771       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2772                       ARM::t2ADDri : ARM::ADDri);
2773       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2774                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2775                         CurDAG->getRegister(0, MVT::i32) };
2776       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2777       return;
2778     }
2779   }
2780   case ISD::SRL:
2781     if (tryV6T2BitfieldExtractOp(N, false))
2782       return;
2783     break;
2784   case ISD::SIGN_EXTEND_INREG:
2785   case ISD::SRA:
2786     if (tryV6T2BitfieldExtractOp(N, true))
2787       return;
2788     break;
2789   case ISD::MUL:
2790     if (Subtarget->isThumb1Only())
2791       break;
2792     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2793       unsigned RHSV = C->getZExtValue();
2794       if (!RHSV) break;
2795       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2796         unsigned ShImm = Log2_32(RHSV-1);
2797         if (ShImm >= 32)
2798           break;
2799         SDValue V = N->getOperand(0);
2800         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2801         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2802         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2803         if (Subtarget->isThumb()) {
2804           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2805           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2806           return;
2807         } else {
2808           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2809                             Reg0 };
2810           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2811           return;
2812         }
2813       }
2814       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2815         unsigned ShImm = Log2_32(RHSV+1);
2816         if (ShImm >= 32)
2817           break;
2818         SDValue V = N->getOperand(0);
2819         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2820         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2821         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2822         if (Subtarget->isThumb()) {
2823           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2824           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2825           return;
2826         } else {
2827           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2828                             Reg0 };
2829           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2830           return;
2831         }
2832       }
2833     }
2834     break;
2835   case ISD::AND: {
2836     // Check for unsigned bitfield extract
2837     if (tryV6T2BitfieldExtractOp(N, false))
2838       return;
2839
2840     // If an immediate is used in an AND node, it is possible that the immediate
2841     // can be more optimally materialized when negated. If this is the case we
2842     // can negate the immediate and use a BIC instead.
2843     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2844     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2845       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2846
2847       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2848       // immediate can be negated and fit in the immediate operand of
2849       // a t2BIC, don't do any manual transform here as this can be
2850       // handled by the generic ISel machinery.
2851       bool PreferImmediateEncoding =
2852         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2853       if (!PreferImmediateEncoding &&
2854           ConstantMaterializationCost(Imm) >
2855               ConstantMaterializationCost(~Imm)) {
2856         // The current immediate costs more to materialize than a negated
2857         // immediate, so negate the immediate and use a BIC.
2858         SDValue NewImm =
2859           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2860         // If the new constant didn't exist before, reposition it in the topological
2861         // ordering so it is just before N. Otherwise, don't touch its location.
2862         if (NewImm->getNodeId() == -1)
2863           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2864
2865         if (!Subtarget->hasThumb2()) {
2866           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2867                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2868                            CurDAG->getRegister(0, MVT::i32)};
2869           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2870           return;
2871         } else {
2872           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2873                            CurDAG->getRegister(0, MVT::i32),
2874                            CurDAG->getRegister(0, MVT::i32)};
2875           ReplaceNode(N,
2876                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2877           return;
2878         }
2879       }
2880     }
2881
2882     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2883     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2884     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2885     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2886     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2887     EVT VT = N->getValueType(0);
2888     if (VT != MVT::i32)
2889       break;
2890     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2891       ? ARM::t2MOVTi16
2892       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2893     if (!Opc)
2894       break;
2895     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2896     N1C = dyn_cast<ConstantSDNode>(N1);
2897     if (!N1C)
2898       break;
2899     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2900       SDValue N2 = N0.getOperand(1);
2901       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2902       if (!N2C)
2903         break;
2904       unsigned N1CVal = N1C->getZExtValue();
2905       unsigned N2CVal = N2C->getZExtValue();
2906       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2907           (N1CVal & 0xffffU) == 0xffffU &&
2908           (N2CVal & 0xffffU) == 0x0U) {
2909         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2910                                                   dl, MVT::i32);
2911         SDValue Ops[] = { N0.getOperand(0), Imm16,
2912                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2913         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2914         return;
2915       }
2916     }
2917     break;
2918   }
2919   case ARMISD::VMOVRRD:
2920     ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2921                                           N->getOperand(0), getAL(CurDAG, dl),
2922                                           CurDAG->getRegister(0, MVT::i32)));
2923     return;
2924   case ISD::UMUL_LOHI: {
2925     if (Subtarget->isThumb1Only())
2926       break;
2927     if (Subtarget->isThumb()) {
2928       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2929                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2930       ReplaceNode(
2931           N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
2932       return;
2933     } else {
2934       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2935                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2936                         CurDAG->getRegister(0, MVT::i32) };
2937       ReplaceNode(N, CurDAG->getMachineNode(
2938                          Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
2939                          MVT::i32, MVT::i32, Ops));
2940       return;
2941     }
2942   }
2943   case ISD::SMUL_LOHI: {
2944     if (Subtarget->isThumb1Only())
2945       break;
2946     if (Subtarget->isThumb()) {
2947       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2948                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2949       ReplaceNode(
2950           N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
2951       return;
2952     } else {
2953       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2954                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2955                         CurDAG->getRegister(0, MVT::i32) };
2956       ReplaceNode(N, CurDAG->getMachineNode(
2957                          Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
2958                          MVT::i32, MVT::i32, Ops));
2959       return;
2960     }
2961   }
2962   case ARMISD::UMAAL: {
2963     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2964     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2965                       N->getOperand(2), N->getOperand(3),
2966                       getAL(CurDAG, dl),
2967                       CurDAG->getRegister(0, MVT::i32) };
2968     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2969     return;
2970   }
2971   case ARMISD::UMLAL:{
2972     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
2973     // 64-bit multiplication result.
2974     if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
2975         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
2976
2977       SDValue Addc = N->getOperand(2);
2978       SDValue Adde = N->getOperand(3);
2979
2980       if (Adde.getOperand(2).getNode() == Addc.getNode()) {
2981
2982         ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
2983         ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
2984
2985         if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
2986         {
2987           // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
2988           // RdLo = one operand to be added, lower 32-bits of res
2989           // RdHi = other operand to be added, upper 32-bits of res
2990           // Rn = first multiply operand
2991           // Rm = second multiply operand
2992           SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2993                             Addc.getOperand(0), Addc.getOperand(1),
2994                             getAL(CurDAG, dl),
2995                             CurDAG->getRegister(0, MVT::i32) };
2996           unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2997           CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
2998           return;
2999         }
3000       }
3001     }
3002
3003     if (Subtarget->isThumb()) {
3004       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3005                         N->getOperand(3), getAL(CurDAG, dl),
3006                         CurDAG->getRegister(0, MVT::i32)};
3007       ReplaceNode(
3008           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3009       return;
3010     }else{
3011       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3012                         N->getOperand(3), getAL(CurDAG, dl),
3013                         CurDAG->getRegister(0, MVT::i32),
3014                         CurDAG->getRegister(0, MVT::i32) };
3015       ReplaceNode(N, CurDAG->getMachineNode(
3016                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3017                          MVT::i32, MVT::i32, Ops));
3018       return;
3019     }
3020   }
3021   case ARMISD::SMLAL:{
3022     if (Subtarget->isThumb()) {
3023       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3024                         N->getOperand(3), getAL(CurDAG, dl),
3025                         CurDAG->getRegister(0, MVT::i32)};
3026       ReplaceNode(
3027           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3028       return;
3029     }else{
3030       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3031                         N->getOperand(3), getAL(CurDAG, dl),
3032                         CurDAG->getRegister(0, MVT::i32),
3033                         CurDAG->getRegister(0, MVT::i32) };
3034       ReplaceNode(N, CurDAG->getMachineNode(
3035                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3036                          MVT::i32, MVT::i32, Ops));
3037       return;
3038     }
3039   }
3040   case ISD::LOAD: {
3041     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3042       if (tryT2IndexedLoad(N))
3043         return;
3044     } else if (Subtarget->isThumb()) {
3045       if (tryT1IndexedLoad(N))
3046         return;
3047     } else if (tryARMIndexedLoad(N))
3048       return;
3049     // Other cases are autogenerated.
3050     break;
3051   }
3052   case ARMISD::BRCOND: {
3053     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3054     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3055     // Pattern complexity = 6  cost = 1  size = 0
3056
3057     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3058     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3059     // Pattern complexity = 6  cost = 1  size = 0
3060
3061     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3062     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3063     // Pattern complexity = 6  cost = 1  size = 0
3064
3065     unsigned Opc = Subtarget->isThumb() ?
3066       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3067     SDValue Chain = N->getOperand(0);
3068     SDValue N1 = N->getOperand(1);
3069     SDValue N2 = N->getOperand(2);
3070     SDValue N3 = N->getOperand(3);
3071     SDValue InFlag = N->getOperand(4);
3072     assert(N1.getOpcode() == ISD::BasicBlock);
3073     assert(N2.getOpcode() == ISD::Constant);
3074     assert(N3.getOpcode() == ISD::Register);
3075
3076     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
3077                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
3078                                MVT::i32);
3079     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3080     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3081                                              MVT::Glue, Ops);
3082     Chain = SDValue(ResNode, 0);
3083     if (N->getNumValues() == 2) {
3084       InFlag = SDValue(ResNode, 1);
3085       ReplaceUses(SDValue(N, 1), InFlag);
3086     }
3087     ReplaceUses(SDValue(N, 0),
3088                 SDValue(Chain.getNode(), Chain.getResNo()));
3089     CurDAG->RemoveDeadNode(N);
3090     return;
3091   }
3092   case ARMISD::VZIP: {
3093     unsigned Opc = 0;
3094     EVT VT = N->getValueType(0);
3095     switch (VT.getSimpleVT().SimpleTy) {
3096     default: return;
3097     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3098     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3099     case MVT::v2f32:
3100     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3101     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3102     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3103     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3104     case MVT::v4f32:
3105     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3106     }
3107     SDValue Pred = getAL(CurDAG, dl);
3108     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3109     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3110     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3111     return;
3112   }
3113   case ARMISD::VUZP: {
3114     unsigned Opc = 0;
3115     EVT VT = N->getValueType(0);
3116     switch (VT.getSimpleVT().SimpleTy) {
3117     default: return;
3118     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3119     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3120     case MVT::v2f32:
3121     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3122     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3123     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3124     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3125     case MVT::v4f32:
3126     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3127     }
3128     SDValue Pred = getAL(CurDAG, dl);
3129     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3130     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3131     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3132     return;
3133   }
3134   case ARMISD::VTRN: {
3135     unsigned Opc = 0;
3136     EVT VT = N->getValueType(0);
3137     switch (VT.getSimpleVT().SimpleTy) {
3138     default: return;
3139     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3140     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3141     case MVT::v2f32:
3142     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3143     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3144     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3145     case MVT::v4f32:
3146     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3147     }
3148     SDValue Pred = getAL(CurDAG, dl);
3149     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3150     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3151     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3152     return;
3153   }
3154   case ARMISD::BUILD_VECTOR: {
3155     EVT VecVT = N->getValueType(0);
3156     EVT EltVT = VecVT.getVectorElementType();
3157     unsigned NumElts = VecVT.getVectorNumElements();
3158     if (EltVT == MVT::f64) {
3159       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3160       ReplaceNode(
3161           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3162       return;
3163     }
3164     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3165     if (NumElts == 2) {
3166       ReplaceNode(
3167           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3168       return;
3169     }
3170     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3171     ReplaceNode(N,
3172                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3173                                     N->getOperand(2), N->getOperand(3)));
3174     return;
3175   }
3176
3177   case ARMISD::VLD2DUP: {
3178     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3179                                         ARM::VLD2DUPd32 };
3180     SelectVLDDup(N, false, 2, Opcodes);
3181     return;
3182   }
3183
3184   case ARMISD::VLD3DUP: {
3185     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3186                                         ARM::VLD3DUPd16Pseudo,
3187                                         ARM::VLD3DUPd32Pseudo };
3188     SelectVLDDup(N, false, 3, Opcodes);
3189     return;
3190   }
3191
3192   case ARMISD::VLD4DUP: {
3193     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3194                                         ARM::VLD4DUPd16Pseudo,
3195                                         ARM::VLD4DUPd32Pseudo };
3196     SelectVLDDup(N, false, 4, Opcodes);
3197     return;
3198   }
3199
3200   case ARMISD::VLD2DUP_UPD: {
3201     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3202                                         ARM::VLD2DUPd16wb_fixed,
3203                                         ARM::VLD2DUPd32wb_fixed };
3204     SelectVLDDup(N, true, 2, Opcodes);
3205     return;
3206   }
3207
3208   case ARMISD::VLD3DUP_UPD: {
3209     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3210                                         ARM::VLD3DUPd16Pseudo_UPD,
3211                                         ARM::VLD3DUPd32Pseudo_UPD };
3212     SelectVLDDup(N, true, 3, Opcodes);
3213     return;
3214   }
3215
3216   case ARMISD::VLD4DUP_UPD: {
3217     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3218                                         ARM::VLD4DUPd16Pseudo_UPD,
3219                                         ARM::VLD4DUPd32Pseudo_UPD };
3220     SelectVLDDup(N, true, 4, Opcodes);
3221     return;
3222   }
3223
3224   case ARMISD::VLD1_UPD: {
3225     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3226                                          ARM::VLD1d16wb_fixed,
3227                                          ARM::VLD1d32wb_fixed,
3228                                          ARM::VLD1d64wb_fixed };
3229     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3230                                          ARM::VLD1q16wb_fixed,
3231                                          ARM::VLD1q32wb_fixed,
3232                                          ARM::VLD1q64wb_fixed };
3233     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3234     return;
3235   }
3236
3237   case ARMISD::VLD2_UPD: {
3238     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3239                                          ARM::VLD2d16wb_fixed,
3240                                          ARM::VLD2d32wb_fixed,
3241                                          ARM::VLD1q64wb_fixed};
3242     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3243                                          ARM::VLD2q16PseudoWB_fixed,
3244                                          ARM::VLD2q32PseudoWB_fixed };
3245     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3246     return;
3247   }
3248
3249   case ARMISD::VLD3_UPD: {
3250     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3251                                          ARM::VLD3d16Pseudo_UPD,
3252                                          ARM::VLD3d32Pseudo_UPD,
3253                                          ARM::VLD1d64TPseudoWB_fixed};
3254     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3255                                           ARM::VLD3q16Pseudo_UPD,
3256                                           ARM::VLD3q32Pseudo_UPD };
3257     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3258                                           ARM::VLD3q16oddPseudo_UPD,
3259                                           ARM::VLD3q32oddPseudo_UPD };
3260     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3261     return;
3262   }
3263
3264   case ARMISD::VLD4_UPD: {
3265     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3266                                          ARM::VLD4d16Pseudo_UPD,
3267                                          ARM::VLD4d32Pseudo_UPD,
3268                                          ARM::VLD1d64QPseudoWB_fixed};
3269     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3270                                           ARM::VLD4q16Pseudo_UPD,
3271                                           ARM::VLD4q32Pseudo_UPD };
3272     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3273                                           ARM::VLD4q16oddPseudo_UPD,
3274                                           ARM::VLD4q32oddPseudo_UPD };
3275     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3276     return;
3277   }
3278
3279   case ARMISD::VLD2LN_UPD: {
3280     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3281                                          ARM::VLD2LNd16Pseudo_UPD,
3282                                          ARM::VLD2LNd32Pseudo_UPD };
3283     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3284                                          ARM::VLD2LNq32Pseudo_UPD };
3285     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3286     return;
3287   }
3288
3289   case ARMISD::VLD3LN_UPD: {
3290     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3291                                          ARM::VLD3LNd16Pseudo_UPD,
3292                                          ARM::VLD3LNd32Pseudo_UPD };
3293     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3294                                          ARM::VLD3LNq32Pseudo_UPD };
3295     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3296     return;
3297   }
3298
3299   case ARMISD::VLD4LN_UPD: {
3300     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3301                                          ARM::VLD4LNd16Pseudo_UPD,
3302                                          ARM::VLD4LNd32Pseudo_UPD };
3303     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3304                                          ARM::VLD4LNq32Pseudo_UPD };
3305     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3306     return;
3307   }
3308
3309   case ARMISD::VST1_UPD: {
3310     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3311                                          ARM::VST1d16wb_fixed,
3312                                          ARM::VST1d32wb_fixed,
3313                                          ARM::VST1d64wb_fixed };
3314     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3315                                          ARM::VST1q16wb_fixed,
3316                                          ARM::VST1q32wb_fixed,
3317                                          ARM::VST1q64wb_fixed };
3318     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3319     return;
3320   }
3321
3322   case ARMISD::VST2_UPD: {
3323     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3324                                          ARM::VST2d16wb_fixed,
3325                                          ARM::VST2d32wb_fixed,
3326                                          ARM::VST1q64wb_fixed};
3327     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3328                                          ARM::VST2q16PseudoWB_fixed,
3329                                          ARM::VST2q32PseudoWB_fixed };
3330     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3331     return;
3332   }
3333
3334   case ARMISD::VST3_UPD: {
3335     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3336                                          ARM::VST3d16Pseudo_UPD,
3337                                          ARM::VST3d32Pseudo_UPD,
3338                                          ARM::VST1d64TPseudoWB_fixed};
3339     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3340                                           ARM::VST3q16Pseudo_UPD,
3341                                           ARM::VST3q32Pseudo_UPD };
3342     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3343                                           ARM::VST3q16oddPseudo_UPD,
3344                                           ARM::VST3q32oddPseudo_UPD };
3345     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3346     return;
3347   }
3348
3349   case ARMISD::VST4_UPD: {
3350     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3351                                          ARM::VST4d16Pseudo_UPD,
3352                                          ARM::VST4d32Pseudo_UPD,
3353                                          ARM::VST1d64QPseudoWB_fixed};
3354     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3355                                           ARM::VST4q16Pseudo_UPD,
3356                                           ARM::VST4q32Pseudo_UPD };
3357     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3358                                           ARM::VST4q16oddPseudo_UPD,
3359                                           ARM::VST4q32oddPseudo_UPD };
3360     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3361     return;
3362   }
3363
3364   case ARMISD::VST2LN_UPD: {
3365     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3366                                          ARM::VST2LNd16Pseudo_UPD,
3367                                          ARM::VST2LNd32Pseudo_UPD };
3368     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3369                                          ARM::VST2LNq32Pseudo_UPD };
3370     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3371     return;
3372   }
3373
3374   case ARMISD::VST3LN_UPD: {
3375     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3376                                          ARM::VST3LNd16Pseudo_UPD,
3377                                          ARM::VST3LNd32Pseudo_UPD };
3378     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3379                                          ARM::VST3LNq32Pseudo_UPD };
3380     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3381     return;
3382   }
3383
3384   case ARMISD::VST4LN_UPD: {
3385     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3386                                          ARM::VST4LNd16Pseudo_UPD,
3387                                          ARM::VST4LNd32Pseudo_UPD };
3388     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3389                                          ARM::VST4LNq32Pseudo_UPD };
3390     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3391     return;
3392   }
3393
3394   case ISD::INTRINSIC_VOID:
3395   case ISD::INTRINSIC_W_CHAIN: {
3396     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3397     switch (IntNo) {
3398     default:
3399       break;
3400
3401     case Intrinsic::arm_mrrc:
3402     case Intrinsic::arm_mrrc2: {
3403       SDLoc dl(N);
3404       SDValue Chain = N->getOperand(0);
3405       unsigned Opc;
3406
3407       if (Subtarget->isThumb())
3408         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3409       else
3410         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3411
3412       SmallVector<SDValue, 5> Ops;
3413       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3414       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3415       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3416
3417       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3418       // instruction will always be '1111' but it is possible in assembly language to specify
3419       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3420       if (Opc != ARM::MRRC2) {
3421         Ops.push_back(getAL(CurDAG, dl));
3422         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3423       }
3424
3425       Ops.push_back(Chain);
3426
3427       // Writes to two registers.
3428       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3429
3430       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3431       return;
3432     }
3433     case Intrinsic::arm_ldaexd:
3434     case Intrinsic::arm_ldrexd: {
3435       SDLoc dl(N);
3436       SDValue Chain = N->getOperand(0);
3437       SDValue MemAddr = N->getOperand(2);
3438       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3439
3440       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3441       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3442                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3443
3444       // arm_ldrexd returns a i64 value in {i32, i32}
3445       std::vector<EVT> ResTys;
3446       if (isThumb) {
3447         ResTys.push_back(MVT::i32);
3448         ResTys.push_back(MVT::i32);
3449       } else
3450         ResTys.push_back(MVT::Untyped);
3451       ResTys.push_back(MVT::Other);
3452
3453       // Place arguments in the right order.
3454       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3455                        CurDAG->getRegister(0, MVT::i32), Chain};
3456       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3457       // Transfer memoperands.
3458       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3459       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3460       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3461
3462       // Remap uses.
3463       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3464       if (!SDValue(N, 0).use_empty()) {
3465         SDValue Result;
3466         if (isThumb)
3467           Result = SDValue(Ld, 0);
3468         else {
3469           SDValue SubRegIdx =
3470             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3471           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3472               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3473           Result = SDValue(ResNode,0);
3474         }
3475         ReplaceUses(SDValue(N, 0), Result);
3476       }
3477       if (!SDValue(N, 1).use_empty()) {
3478         SDValue Result;
3479         if (isThumb)
3480           Result = SDValue(Ld, 1);
3481         else {
3482           SDValue SubRegIdx =
3483             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3484           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3485               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3486           Result = SDValue(ResNode,0);
3487         }
3488         ReplaceUses(SDValue(N, 1), Result);
3489       }
3490       ReplaceUses(SDValue(N, 2), OutChain);
3491       CurDAG->RemoveDeadNode(N);
3492       return;
3493     }
3494     case Intrinsic::arm_stlexd:
3495     case Intrinsic::arm_strexd: {
3496       SDLoc dl(N);
3497       SDValue Chain = N->getOperand(0);
3498       SDValue Val0 = N->getOperand(2);
3499       SDValue Val1 = N->getOperand(3);
3500       SDValue MemAddr = N->getOperand(4);
3501
3502       // Store exclusive double return a i32 value which is the return status
3503       // of the issued store.
3504       const EVT ResTys[] = {MVT::i32, MVT::Other};
3505
3506       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3507       // Place arguments in the right order.
3508       SmallVector<SDValue, 7> Ops;
3509       if (isThumb) {
3510         Ops.push_back(Val0);
3511         Ops.push_back(Val1);
3512       } else
3513         // arm_strexd uses GPRPair.
3514         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3515       Ops.push_back(MemAddr);
3516       Ops.push_back(getAL(CurDAG, dl));
3517       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3518       Ops.push_back(Chain);
3519
3520       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3521       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3522                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3523
3524       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3525       // Transfer memoperands.
3526       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3527       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3528       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3529
3530       ReplaceNode(N, St);
3531       return;
3532     }
3533
3534     case Intrinsic::arm_neon_vld1: {
3535       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3536                                            ARM::VLD1d32, ARM::VLD1d64 };
3537       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3538                                            ARM::VLD1q32, ARM::VLD1q64};
3539       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3540       return;
3541     }
3542
3543     case Intrinsic::arm_neon_vld2: {
3544       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3545                                            ARM::VLD2d32, ARM::VLD1q64 };
3546       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3547                                            ARM::VLD2q32Pseudo };
3548       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3549       return;
3550     }
3551
3552     case Intrinsic::arm_neon_vld3: {
3553       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3554                                            ARM::VLD3d16Pseudo,
3555                                            ARM::VLD3d32Pseudo,
3556                                            ARM::VLD1d64TPseudo };
3557       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3558                                             ARM::VLD3q16Pseudo_UPD,
3559                                             ARM::VLD3q32Pseudo_UPD };
3560       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3561                                             ARM::VLD3q16oddPseudo,
3562                                             ARM::VLD3q32oddPseudo };
3563       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3564       return;
3565     }
3566
3567     case Intrinsic::arm_neon_vld4: {
3568       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3569                                            ARM::VLD4d16Pseudo,
3570                                            ARM::VLD4d32Pseudo,
3571                                            ARM::VLD1d64QPseudo };
3572       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3573                                             ARM::VLD4q16Pseudo_UPD,
3574                                             ARM::VLD4q32Pseudo_UPD };
3575       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3576                                             ARM::VLD4q16oddPseudo,
3577                                             ARM::VLD4q32oddPseudo };
3578       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3579       return;
3580     }
3581
3582     case Intrinsic::arm_neon_vld2lane: {
3583       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3584                                            ARM::VLD2LNd16Pseudo,
3585                                            ARM::VLD2LNd32Pseudo };
3586       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3587                                            ARM::VLD2LNq32Pseudo };
3588       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3589       return;
3590     }
3591
3592     case Intrinsic::arm_neon_vld3lane: {
3593       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3594                                            ARM::VLD3LNd16Pseudo,
3595                                            ARM::VLD3LNd32Pseudo };
3596       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3597                                            ARM::VLD3LNq32Pseudo };
3598       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3599       return;
3600     }
3601
3602     case Intrinsic::arm_neon_vld4lane: {
3603       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3604                                            ARM::VLD4LNd16Pseudo,
3605                                            ARM::VLD4LNd32Pseudo };
3606       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3607                                            ARM::VLD4LNq32Pseudo };
3608       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3609       return;
3610     }
3611
3612     case Intrinsic::arm_neon_vst1: {
3613       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3614                                            ARM::VST1d32, ARM::VST1d64 };
3615       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3616                                            ARM::VST1q32, ARM::VST1q64 };
3617       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3618       return;
3619     }
3620
3621     case Intrinsic::arm_neon_vst2: {
3622       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3623                                            ARM::VST2d32, ARM::VST1q64 };
3624       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3625                                            ARM::VST2q32Pseudo };
3626       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3627       return;
3628     }
3629
3630     case Intrinsic::arm_neon_vst3: {
3631       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3632                                            ARM::VST3d16Pseudo,
3633                                            ARM::VST3d32Pseudo,
3634                                            ARM::VST1d64TPseudo };
3635       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3636                                             ARM::VST3q16Pseudo_UPD,
3637                                             ARM::VST3q32Pseudo_UPD };
3638       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3639                                             ARM::VST3q16oddPseudo,
3640                                             ARM::VST3q32oddPseudo };
3641       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3642       return;
3643     }
3644
3645     case Intrinsic::arm_neon_vst4: {
3646       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3647                                            ARM::VST4d16Pseudo,
3648                                            ARM::VST4d32Pseudo,
3649                                            ARM::VST1d64QPseudo };
3650       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3651                                             ARM::VST4q16Pseudo_UPD,
3652                                             ARM::VST4q32Pseudo_UPD };
3653       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3654                                             ARM::VST4q16oddPseudo,
3655                                             ARM::VST4q32oddPseudo };
3656       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3657       return;
3658     }
3659
3660     case Intrinsic::arm_neon_vst2lane: {
3661       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3662                                            ARM::VST2LNd16Pseudo,
3663                                            ARM::VST2LNd32Pseudo };
3664       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3665                                            ARM::VST2LNq32Pseudo };
3666       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3667       return;
3668     }
3669
3670     case Intrinsic::arm_neon_vst3lane: {
3671       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3672                                            ARM::VST3LNd16Pseudo,
3673                                            ARM::VST3LNd32Pseudo };
3674       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3675                                            ARM::VST3LNq32Pseudo };
3676       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3677       return;
3678     }
3679
3680     case Intrinsic::arm_neon_vst4lane: {
3681       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3682                                            ARM::VST4LNd16Pseudo,
3683                                            ARM::VST4LNd32Pseudo };
3684       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3685                                            ARM::VST4LNq32Pseudo };
3686       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3687       return;
3688     }
3689     }
3690     break;
3691   }
3692
3693   case ISD::INTRINSIC_WO_CHAIN: {
3694     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3695     switch (IntNo) {
3696     default:
3697       break;
3698
3699     case Intrinsic::arm_neon_vtbl2:
3700       SelectVTBL(N, false, 2, ARM::VTBL2);
3701       return;
3702     case Intrinsic::arm_neon_vtbl3:
3703       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3704       return;
3705     case Intrinsic::arm_neon_vtbl4:
3706       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3707       return;
3708
3709     case Intrinsic::arm_neon_vtbx2:
3710       SelectVTBL(N, true, 2, ARM::VTBX2);
3711       return;
3712     case Intrinsic::arm_neon_vtbx3:
3713       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3714       return;
3715     case Intrinsic::arm_neon_vtbx4:
3716       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3717       return;
3718     }
3719     break;
3720   }
3721
3722   case ARMISD::VTBL1: {
3723     SDLoc dl(N);
3724     EVT VT = N->getValueType(0);
3725     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3726                      getAL(CurDAG, dl),                 // Predicate
3727                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3728     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3729     return;
3730   }
3731   case ARMISD::VTBL2: {
3732     SDLoc dl(N);
3733     EVT VT = N->getValueType(0);
3734
3735     // Form a REG_SEQUENCE to force register allocation.
3736     SDValue V0 = N->getOperand(0);
3737     SDValue V1 = N->getOperand(1);
3738     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3739
3740     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
3741                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3742     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
3743     return;
3744   }
3745
3746   case ISD::CONCAT_VECTORS:
3747     SelectConcatVector(N);
3748     return;
3749
3750   case ISD::ATOMIC_CMP_SWAP:
3751     SelectCMP_SWAP(N);
3752     return;
3753   }
3754
3755   SelectCode(N);
3756 }
3757
3758 // Inspect a register string of the form
3759 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3760 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3761 // and obtain the integer operands from them, adding these operands to the
3762 // provided vector.
3763 static void getIntOperandsFromRegisterString(StringRef RegString,
3764                                              SelectionDAG *CurDAG,
3765                                              const SDLoc &DL,
3766                                              std::vector<SDValue> &Ops) {
3767   SmallVector<StringRef, 5> Fields;
3768   RegString.split(Fields, ':');
3769
3770   if (Fields.size() > 1) {
3771     bool AllIntFields = true;
3772
3773     for (StringRef Field : Fields) {
3774       // Need to trim out leading 'cp' characters and get the integer field.
3775       unsigned IntField;
3776       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3777       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3778     }
3779
3780     assert(AllIntFields &&
3781             "Unexpected non-integer value in special register string.");
3782   }
3783 }
3784
3785 // Maps a Banked Register string to its mask value. The mask value returned is
3786 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3787 // mask operand, which expresses which register is to be used, e.g. r8, and in
3788 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3789 // was invalid.
3790 static inline int getBankedRegisterMask(StringRef RegString) {
3791   return StringSwitch<int>(RegString.lower())
3792           .Case("r8_usr", 0x00)
3793           .Case("r9_usr", 0x01)
3794           .Case("r10_usr", 0x02)
3795           .Case("r11_usr", 0x03)
3796           .Case("r12_usr", 0x04)
3797           .Case("sp_usr", 0x05)
3798           .Case("lr_usr", 0x06)
3799           .Case("r8_fiq", 0x08)
3800           .Case("r9_fiq", 0x09)
3801           .Case("r10_fiq", 0x0a)
3802           .Case("r11_fiq", 0x0b)
3803           .Case("r12_fiq", 0x0c)
3804           .Case("sp_fiq", 0x0d)
3805           .Case("lr_fiq", 0x0e)
3806           .Case("lr_irq", 0x10)
3807           .Case("sp_irq", 0x11)
3808           .Case("lr_svc", 0x12)
3809           .Case("sp_svc", 0x13)
3810           .Case("lr_abt", 0x14)
3811           .Case("sp_abt", 0x15)
3812           .Case("lr_und", 0x16)
3813           .Case("sp_und", 0x17)
3814           .Case("lr_mon", 0x1c)
3815           .Case("sp_mon", 0x1d)
3816           .Case("elr_hyp", 0x1e)
3817           .Case("sp_hyp", 0x1f)
3818           .Case("spsr_fiq", 0x2e)
3819           .Case("spsr_irq", 0x30)
3820           .Case("spsr_svc", 0x32)
3821           .Case("spsr_abt", 0x34)
3822           .Case("spsr_und", 0x36)
3823           .Case("spsr_mon", 0x3c)
3824           .Case("spsr_hyp", 0x3e)
3825           .Default(-1);
3826 }
3827
3828 // Maps a MClass special register string to its value for use in the
3829 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3830 // Returns -1 to signify that the string was invalid.
3831 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3832   return StringSwitch<int>(RegString.lower())
3833           .Case("apsr", 0x0)
3834           .Case("iapsr", 0x1)
3835           .Case("eapsr", 0x2)
3836           .Case("xpsr", 0x3)
3837           .Case("ipsr", 0x5)
3838           .Case("epsr", 0x6)
3839           .Case("iepsr", 0x7)
3840           .Case("msp", 0x8)
3841           .Case("psp", 0x9)
3842           .Case("primask", 0x10)
3843           .Case("basepri", 0x11)
3844           .Case("basepri_max", 0x12)
3845           .Case("faultmask", 0x13)
3846           .Case("control", 0x14)
3847           .Case("msplim", 0x0a)
3848           .Case("psplim", 0x0b)
3849           .Case("sp", 0x18)
3850           .Default(-1);
3851 }
3852
3853 // The flags here are common to those allowed for apsr in the A class cores and
3854 // those allowed for the special registers in the M class cores. Returns a
3855 // value representing which flags were present, -1 if invalid.
3856 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3857   if (Flags.empty())
3858     return 0x2 | (int)hasDSP;
3859
3860   return StringSwitch<int>(Flags)
3861           .Case("g", 0x1)
3862           .Case("nzcvq", 0x2)
3863           .Case("nzcvqg", 0x3)
3864           .Default(-1);
3865 }
3866
3867 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3868                                  const ARMSubtarget *Subtarget) {
3869   // Ensure that the register (without flags) was a valid M Class special
3870   // register.
3871   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3872   if (SYSmvalue == -1)
3873     return -1;
3874
3875   // basepri, basepri_max and faultmask are only valid for V7m.
3876   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3877     return -1;
3878
3879   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3880     Flags = "";
3881     SYSmvalue |= 0x80;
3882   }
3883
3884   if (!Subtarget->has8MSecExt() &&
3885       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3886     return -1;
3887
3888   if (!Subtarget->hasV8MMainlineOps() &&
3889       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3890        SYSmvalue == 0x93))
3891     return -1;
3892
3893   // If it was a read then we won't be expecting flags and so at this point
3894   // we can return the mask.
3895   if (IsRead) {
3896     if (Flags.empty())
3897       return SYSmvalue;
3898     else
3899       return -1;
3900   }
3901
3902   // We know we are now handling a write so need to get the mask for the flags.
3903   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3904
3905   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3906   // shouldn't have flags present.
3907   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3908     return -1;
3909
3910   // The _g and _nzcvqg versions are only valid if the DSP extension is
3911   // available.
3912   if (!Subtarget->hasDSP() && (Mask & 0x1))
3913     return -1;
3914
3915   // The register was valid so need to put the mask in the correct place
3916   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3917   // construct the operand for the instruction node.
3918   if (SYSmvalue < 0x4)
3919     return SYSmvalue | Mask << 10;
3920
3921   return SYSmvalue;
3922 }
3923
3924 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3925   // The mask operand contains the special register (R Bit) in bit 4, whether
3926   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3927   // bits 3-0 contains the fields to be accessed in the special register, set by
3928   // the flags provided with the register.
3929   int Mask = 0;
3930   if (Reg == "apsr") {
3931     // The flags permitted for apsr are the same flags that are allowed in
3932     // M class registers. We get the flag value and then shift the flags into
3933     // the correct place to combine with the mask.
3934     Mask = getMClassFlagsMask(Flags, true);
3935     if (Mask == -1)
3936       return -1;
3937     return Mask << 2;
3938   }
3939
3940   if (Reg != "cpsr" && Reg != "spsr") {
3941     return -1;
3942   }
3943
3944   // This is the same as if the flags were "fc"
3945   if (Flags.empty() || Flags == "all")
3946     return Mask | 0x9;
3947
3948   // Inspect the supplied flags string and set the bits in the mask for
3949   // the relevant and valid flags allowed for cpsr and spsr.
3950   for (char Flag : Flags) {
3951     int FlagVal;
3952     switch (Flag) {
3953       case 'c':
3954         FlagVal = 0x1;
3955         break;
3956       case 'x':
3957         FlagVal = 0x2;
3958         break;
3959       case 's':
3960         FlagVal = 0x4;
3961         break;
3962       case 'f':
3963         FlagVal = 0x8;
3964         break;
3965       default:
3966         FlagVal = 0;
3967     }
3968
3969     // This avoids allowing strings where the same flag bit appears twice.
3970     if (!FlagVal || (Mask & FlagVal))
3971       return -1;
3972     Mask |= FlagVal;
3973   }
3974
3975   // If the register is spsr then we need to set the R bit.
3976   if (Reg == "spsr")
3977     Mask |= 0x10;
3978
3979   return Mask;
3980 }
3981
3982 // Lower the read_register intrinsic to ARM specific DAG nodes
3983 // using the supplied metadata string to select the instruction node to use
3984 // and the registers/masks to construct as operands for the node.
3985 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3986   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3987   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3988   bool IsThumb2 = Subtarget->isThumb2();
3989   SDLoc DL(N);
3990
3991   std::vector<SDValue> Ops;
3992   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3993
3994   if (!Ops.empty()) {
3995     // If the special register string was constructed of fields (as defined
3996     // in the ACLE) then need to lower to MRC node (32 bit) or
3997     // MRRC node(64 bit), we can make the distinction based on the number of
3998     // operands we have.
3999     unsigned Opcode;
4000     SmallVector<EVT, 3> ResTypes;
4001     if (Ops.size() == 5){
4002       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4003       ResTypes.append({ MVT::i32, MVT::Other });
4004     } else {
4005       assert(Ops.size() == 3 &&
4006               "Invalid number of fields in special register string.");
4007       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4008       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4009     }
4010
4011     Ops.push_back(getAL(CurDAG, DL));
4012     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4013     Ops.push_back(N->getOperand(0));
4014     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4015     return true;
4016   }
4017
4018   std::string SpecialReg = RegString->getString().lower();
4019
4020   int BankedReg = getBankedRegisterMask(SpecialReg);
4021   if (BankedReg != -1) {
4022     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4023             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4024             N->getOperand(0) };
4025     ReplaceNode(
4026         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4027                                   DL, MVT::i32, MVT::Other, Ops));
4028     return true;
4029   }
4030
4031   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4032   // corresponding to the register that is being read from. So we switch on the
4033   // string to find which opcode we need to use.
4034   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4035                     .Case("fpscr", ARM::VMRS)
4036                     .Case("fpexc", ARM::VMRS_FPEXC)
4037                     .Case("fpsid", ARM::VMRS_FPSID)
4038                     .Case("mvfr0", ARM::VMRS_MVFR0)
4039                     .Case("mvfr1", ARM::VMRS_MVFR1)
4040                     .Case("mvfr2", ARM::VMRS_MVFR2)
4041                     .Case("fpinst", ARM::VMRS_FPINST)
4042                     .Case("fpinst2", ARM::VMRS_FPINST2)
4043                     .Default(0);
4044
4045   // If an opcode was found then we can lower the read to a VFP instruction.
4046   if (Opcode) {
4047     if (!Subtarget->hasVFP2())
4048       return false;
4049     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4050       return false;
4051
4052     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4053             N->getOperand(0) };
4054     ReplaceNode(N,
4055                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4056     return true;
4057   }
4058
4059   // If the target is M Class then need to validate that the register string
4060   // is an acceptable value, so check that a mask can be constructed from the
4061   // string.
4062   if (Subtarget->isMClass()) {
4063     StringRef Flags = "", Reg = SpecialReg;
4064     if (Reg.endswith("_ns")) {
4065       Flags = "ns";
4066       Reg = Reg.drop_back(3);
4067     }
4068
4069     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4070     if (SYSmValue == -1)
4071       return false;
4072
4073     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4074                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4075                       N->getOperand(0) };
4076     ReplaceNode(
4077         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4078     return true;
4079   }
4080
4081   // Here we know the target is not M Class so we need to check if it is one
4082   // of the remaining possible values which are apsr, cpsr or spsr.
4083   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4084     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4085             N->getOperand(0) };
4086     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4087                                           DL, MVT::i32, MVT::Other, Ops));
4088     return true;
4089   }
4090
4091   if (SpecialReg == "spsr") {
4092     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4093             N->getOperand(0) };
4094     ReplaceNode(
4095         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4096                                   MVT::i32, MVT::Other, Ops));
4097     return true;
4098   }
4099
4100   return false;
4101 }
4102
4103 // Lower the write_register intrinsic to ARM specific DAG nodes
4104 // using the supplied metadata string to select the instruction node to use
4105 // and the registers/masks to use in the nodes
4106 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4107   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4108   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4109   bool IsThumb2 = Subtarget->isThumb2();
4110   SDLoc DL(N);
4111
4112   std::vector<SDValue> Ops;
4113   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4114
4115   if (!Ops.empty()) {
4116     // If the special register string was constructed of fields (as defined
4117     // in the ACLE) then need to lower to MCR node (32 bit) or
4118     // MCRR node(64 bit), we can make the distinction based on the number of
4119     // operands we have.
4120     unsigned Opcode;
4121     if (Ops.size() == 5) {
4122       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4123       Ops.insert(Ops.begin()+2, N->getOperand(2));
4124     } else {
4125       assert(Ops.size() == 3 &&
4126               "Invalid number of fields in special register string.");
4127       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4128       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4129       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4130     }
4131
4132     Ops.push_back(getAL(CurDAG, DL));
4133     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4134     Ops.push_back(N->getOperand(0));
4135
4136     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4137     return true;
4138   }
4139
4140   std::string SpecialReg = RegString->getString().lower();
4141   int BankedReg = getBankedRegisterMask(SpecialReg);
4142   if (BankedReg != -1) {
4143     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4144             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4145             N->getOperand(0) };
4146     ReplaceNode(
4147         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4148                                   DL, MVT::Other, Ops));
4149     return true;
4150   }
4151
4152   // The VFP registers are written to by creating SelectionDAG nodes with
4153   // opcodes corresponding to the register that is being written. So we switch
4154   // on the string to find which opcode we need to use.
4155   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4156                     .Case("fpscr", ARM::VMSR)
4157                     .Case("fpexc", ARM::VMSR_FPEXC)
4158                     .Case("fpsid", ARM::VMSR_FPSID)
4159                     .Case("fpinst", ARM::VMSR_FPINST)
4160                     .Case("fpinst2", ARM::VMSR_FPINST2)
4161                     .Default(0);
4162
4163   if (Opcode) {
4164     if (!Subtarget->hasVFP2())
4165       return false;
4166     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4167             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4168     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4169     return true;
4170   }
4171
4172   std::pair<StringRef, StringRef> Fields;
4173   Fields = StringRef(SpecialReg).rsplit('_');
4174   std::string Reg = Fields.first.str();
4175   StringRef Flags = Fields.second;
4176
4177   // If the target was M Class then need to validate the special register value
4178   // and retrieve the mask for use in the instruction node.
4179   if (Subtarget->isMClass()) {
4180     // basepri_max gets split so need to correct Reg and Flags.
4181     if (SpecialReg == "basepri_max") {
4182       Reg = SpecialReg;
4183       Flags = "";
4184     }
4185     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4186     if (SYSmValue == -1)
4187       return false;
4188
4189     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4190                       N->getOperand(2), getAL(CurDAG, DL),
4191                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4192     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4193     return true;
4194   }
4195
4196   // We then check to see if a valid mask can be constructed for one of the
4197   // register string values permitted for the A and R class cores. These values
4198   // are apsr, spsr and cpsr; these are also valid on older cores.
4199   int Mask = getARClassRegisterMask(Reg, Flags);
4200   if (Mask != -1) {
4201     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4202             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4203             N->getOperand(0) };
4204     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4205                                           DL, MVT::Other, Ops));
4206     return true;
4207   }
4208
4209   return false;
4210 }
4211
4212 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4213   std::vector<SDValue> AsmNodeOperands;
4214   unsigned Flag, Kind;
4215   bool Changed = false;
4216   unsigned NumOps = N->getNumOperands();
4217
4218   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4219   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4220   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4221   // respectively. Since there is no constraint to explicitly specify a
4222   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4223   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4224   // them into a GPRPair.
4225
4226   SDLoc dl(N);
4227   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4228                                    : SDValue(nullptr,0);
4229
4230   SmallVector<bool, 8> OpChanged;
4231   // Glue node will be appended late.
4232   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4233     SDValue op = N->getOperand(i);
4234     AsmNodeOperands.push_back(op);
4235
4236     if (i < InlineAsm::Op_FirstOperand)
4237       continue;
4238
4239     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4240       Flag = C->getZExtValue();
4241       Kind = InlineAsm::getKind(Flag);
4242     }
4243     else
4244       continue;
4245
4246     // Immediate operands to inline asm in the SelectionDAG are modeled with
4247     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4248     // the second is a constant with the value of the immediate. If we get here
4249     // and we have a Kind_Imm, skip the next operand, and continue.
4250     if (Kind == InlineAsm::Kind_Imm) {
4251       SDValue op = N->getOperand(++i);
4252       AsmNodeOperands.push_back(op);
4253       continue;
4254     }
4255
4256     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4257     if (NumRegs)
4258       OpChanged.push_back(false);
4259
4260     unsigned DefIdx = 0;
4261     bool IsTiedToChangedOp = false;
4262     // If it's a use that is tied with a previous def, it has no
4263     // reg class constraint.
4264     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4265       IsTiedToChangedOp = OpChanged[DefIdx];
4266
4267     // Memory operands to inline asm in the SelectionDAG are modeled with two
4268     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4269     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4270     // it doesn't get misinterpreted), and continue. We do this here because
4271     // it's important to update the OpChanged array correctly before moving on.
4272     if (Kind == InlineAsm::Kind_Mem) {
4273       SDValue op = N->getOperand(++i);
4274       AsmNodeOperands.push_back(op);
4275       continue;
4276     }
4277
4278     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4279         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4280       continue;
4281
4282     unsigned RC;
4283     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4284     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4285         || NumRegs != 2)
4286       continue;
4287
4288     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4289     SDValue V0 = N->getOperand(i+1);
4290     SDValue V1 = N->getOperand(i+2);
4291     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4292     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4293     SDValue PairedReg;
4294     MachineRegisterInfo &MRI = MF->getRegInfo();
4295
4296     if (Kind == InlineAsm::Kind_RegDef ||
4297         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4298       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4299       // the original GPRs.
4300
4301       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4302       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4303       SDValue Chain = SDValue(N,0);
4304
4305       SDNode *GU = N->getGluedUser();
4306       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4307                                                Chain.getValue(1));
4308
4309       // Extract values from a GPRPair reg and copy to the original GPR reg.
4310       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4311                                                     RegCopy);
4312       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4313                                                     RegCopy);
4314       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4315                                         RegCopy.getValue(1));
4316       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4317
4318       // Update the original glue user.
4319       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4320       Ops.push_back(T1.getValue(1));
4321       CurDAG->UpdateNodeOperands(GU, Ops);
4322     }
4323     else {
4324       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4325       // GPRPair and then pass the GPRPair to the inline asm.
4326       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4327
4328       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4329       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4330                                           Chain.getValue(1));
4331       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4332                                           T0.getValue(1));
4333       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4334
4335       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4336       // i32 VRs of inline asm with it.
4337       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4338       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4339       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4340
4341       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4342       Glue = Chain.getValue(1);
4343     }
4344
4345     Changed = true;
4346
4347     if(PairedReg.getNode()) {
4348       OpChanged[OpChanged.size() -1 ] = true;
4349       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4350       if (IsTiedToChangedOp)
4351         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4352       else
4353         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4354       // Replace the current flag.
4355       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4356           Flag, dl, MVT::i32);
4357       // Add the new register node and skip the original two GPRs.
4358       AsmNodeOperands.push_back(PairedReg);
4359       // Skip the next two GPRs.
4360       i += 2;
4361     }
4362   }
4363
4364   if (Glue.getNode())
4365     AsmNodeOperands.push_back(Glue);
4366   if (!Changed)
4367     return false;
4368
4369   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4370       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4371   New->setNodeId(-1);
4372   ReplaceNode(N, New.getNode());
4373   return true;
4374 }
4375
4376
4377 bool ARMDAGToDAGISel::
4378 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4379                              std::vector<SDValue> &OutOps) {
4380   switch(ConstraintID) {
4381   default:
4382     llvm_unreachable("Unexpected asm memory constraint");
4383   case InlineAsm::Constraint_i:
4384     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4385     //        be an immediate and not a memory constraint.
4386     // Fallthrough.
4387   case InlineAsm::Constraint_m:
4388   case InlineAsm::Constraint_o:
4389   case InlineAsm::Constraint_Q:
4390   case InlineAsm::Constraint_Um:
4391   case InlineAsm::Constraint_Un:
4392   case InlineAsm::Constraint_Uq:
4393   case InlineAsm::Constraint_Us:
4394   case InlineAsm::Constraint_Ut:
4395   case InlineAsm::Constraint_Uv:
4396   case InlineAsm::Constraint_Uy:
4397     // Require the address to be in a register.  That is safe for all ARM
4398     // variants and it is hard to do anything much smarter without knowing
4399     // how the operand is used.
4400     OutOps.push_back(Op);
4401     return false;
4402   }
4403   return true;
4404 }
4405
4406 /// createARMISelDag - This pass converts a legalized DAG into a
4407 /// ARM-specific DAG, ready for instruction scheduling.
4408 ///
4409 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4410                                      CodeGenOpt::Level OptLevel) {
4411   return new ARMDAGToDAGISel(TM, OptLevel);
4412 }