contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// Try to select SBFX/UBFX instructions for ARM.
 232   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 233
 234   // Select special operations if node forms integer ABS pattern
 235   bool tryABSOp(SDNode *N);
 236
 237   bool tryReadRegister(SDNode *N);
 238   bool tryWriteRegister(SDNode *N);
 239
 240   bool tryInlineAsm(SDNode *N);
 241
 242   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 243
 244   void SelectCMP_SWAP(SDNode *N);
 245
 246   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 247   /// inline asm expressions.
 248   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 249                                     std::vector<SDValue> &OutOps) override;
 250
 251   // Form pairs of consecutive R, S, D, or Q registers.
 252   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 253   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 254   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 255   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 256
 257   // Form sequences of 4 consecutive S, D, or Q registers.
 258   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 259   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 260   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 261
 262   // Get the alignment operand for a NEON VLD or VST instruction.
 263   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 264                         bool is64BitVector);
 265
 266   /// Returns the number of instructions required to materialize the given
 267   /// constant in a register, or 3 if a literal pool load is needed.
 268   unsigned ConstantMaterializationCost(unsigned Val) const;
 269
 270   /// Checks if N is a multiplication by a constant where we can extract out a
 271   /// power of two from the constant so that it can be used in a shift, but only
 272   /// if it simplifies the materialization of the constant. Returns true if it
 273   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 274   /// out and to NewMulConst the new constant to be multiplied by.
 275   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 276                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 277
 278   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 279   /// selected when N would have been selected.
 280   void replaceDAGValue(const SDValue &N, SDValue M);
 281 };
 282 }
 283
 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 285 /// operand. If so Imm will receive the 32-bit value.
 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 287   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 288     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 289     return true;
 290   }
 291   return false;
 292 }
 293
 294 // isInt32Immediate - This method tests to see if a constant operand.
 295 // If so Imm will receive the 32 bit value.
 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 297   return isInt32Immediate(N.getNode(), Imm);
 298 }
 299
 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 301 // opcode and that it has a immediate integer right operand.
 302 // If so Imm will receive the 32 bit value.
 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 304   return N->getOpcode() == Opc &&
 305          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 306 }
 307
 308 /// \brief Check whether a particular node is a constant value representable as
 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 310 ///
 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 312 static bool isScaledConstantInRange(SDValue Node, int Scale,
 313                                     int RangeMin, int RangeMax,
 314                                     int &ScaledConstant) {
 315   assert(Scale > 0 && "Invalid scale!");
 316
 317   // Check that this is a constant.
 318   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 319   if (!C)
 320     return false;
 321
 322   ScaledConstant = (int) C->getZExtValue();
 323   if ((ScaledConstant % Scale) != 0)
 324     return false;
 325
 326   ScaledConstant /= Scale;
 327   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 328 }
 329
 330 void ARMDAGToDAGISel::PreprocessISelDAG() {
 331   if (!Subtarget->hasV6T2Ops())
 332     return;
 333
 334   bool isThumb2 = Subtarget->isThumb();
 335   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 336        E = CurDAG->allnodes_end(); I != E; ) {
 337     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 338
 339     if (N->getOpcode() != ISD::ADD)
 340       continue;
 341
 342     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 343     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 344     // trailing zeros, e.g. 1020.
 345     // Transform the expression to
 346     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 347     // of trailing zeros of c2. The left shift would be folded as an shifter
 348     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 349     // node (UBFX).
 350
 351     SDValue N0 = N->getOperand(0);
 352     SDValue N1 = N->getOperand(1);
 353     unsigned And_imm = 0;
 354     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 355       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 356         std::swap(N0, N1);
 357     }
 358     if (!And_imm)
 359       continue;
 360
 361     // Check if the AND mask is an immediate of the form: 000.....1111111100
 362     unsigned TZ = countTrailingZeros(And_imm);
 363     if (TZ != 1 && TZ != 2)
 364       // Be conservative here. Shifter operands aren't always free. e.g. On
 365       // Swift, left shifter operand of 1 / 2 for free but others are not.
 366       // e.g.
 367       //  ubfx   r3, r1, #16, #8
 368       //  ldr.w  r3, [r0, r3, lsl #2]
 369       // vs.
 370       //  mov.w  r9, #1020
 371       //  and.w  r2, r9, r1, lsr #14
 372       //  ldr    r2, [r0, r2]
 373       continue;
 374     And_imm >>= TZ;
 375     if (And_imm & (And_imm + 1))
 376       continue;
 377
 378     // Look for (and (srl X, c1), c2).
 379     SDValue Srl = N1.getOperand(0);
 380     unsigned Srl_imm = 0;
 381     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 382         (Srl_imm <= 2))
 383       continue;
 384
 385     // Make sure first operand is not a shifter operand which would prevent
 386     // folding of the left shift.
 387     SDValue CPTmp0;
 388     SDValue CPTmp1;
 389     SDValue CPTmp2;
 390     if (isThumb2) {
 391       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 392         continue;
 393     } else {
 394       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 395           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 396         continue;
 397     }
 398
 399     // Now make the transformation.
 400     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 401                           Srl.getOperand(0),
 402                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 403                                               MVT::i32));
 404     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 405                          Srl,
 406                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!Subtarget->hasVMLxHazards())
 421     return true;
 422
 423   if (!N->hasOneUse())
 424     return false;
 425
 426   SDNode *Use = *N->use_begin();
 427   if (Use->getOpcode() == ISD::CopyToReg)
 428     return true;
 429   if (Use->isMachineOpcode()) {
 430     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 431         CurDAG->getSubtarget().getInstrInfo());
 432
 433     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 434     if (MCID.mayStore())
 435       return true;
 436     unsigned Opcode = MCID.getOpcode();
 437     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 438       return true;
 439     // vmlx feeding into another vmlx. We actually want to unfold
 440     // the use later in the MLxExpansion pass. e.g.
 441     // vmla
 442     // vmla (stall 8 cycles)
 443     //
 444     // vmul (5 cycles)
 445     // vadd (5 cycles)
 446     // vmla
 447     // This adds up to about 18 - 19 cycles.
 448     //
 449     // vmla
 450     // vmul (stall 4 cycles)
 451     // vadd adds up to about 14 cycles.
 452     return TII->isFpMLxInstruction(Opcode);
 453   }
 454
 455   return false;
 456 }
 457
 458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 459                                             ARM_AM::ShiftOpc ShOpcVal,
 460                                             unsigned ShAmt) {
 461   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 462     return true;
 463   if (Shift.hasOneUse())
 464     return true;
 465   // R << 2 is free.
 466   return ShOpcVal == ARM_AM::lsl &&
 467          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 468 }
 469
 470 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 471   if (Subtarget->isThumb()) {
 472     if (Val <= 255) return 1;                               // MOV
 473     if (Subtarget->hasV6T2Ops() &&
 474         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 475       return 1; // MOVW
 476     if (Val <= 510) return 2;                               // MOV + ADDi8
 477     if (~Val <= 255) return 2;                              // MOV + MVN
 478     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 479   } else {
 480     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 481     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 482     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 483     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 484   }
 485   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 486   return 3; // Literal pool load
 487 }
 488
 489 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 490                                              unsigned MaxShift,
 491                                              unsigned &PowerOfTwo,
 492                                              SDValue &NewMulConst) const {
 493   assert(N.getOpcode() == ISD::MUL);
 494   assert(MaxShift > 0);
 495
 496   // If the multiply is used in more than one place then changing the constant
 497   // will make other uses incorrect, so don't.
 498   if (!N.hasOneUse()) return false;
 499   // Check if the multiply is by a constant
 500   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 501   if (!MulConst) return false;
 502   // If the constant is used in more than one place then modifying it will mean
 503   // we need to materialize two constants instead of one, which is a bad idea.
 504   if (!MulConst->hasOneUse()) return false;
 505   unsigned MulConstVal = MulConst->getZExtValue();
 506   if (MulConstVal == 0) return false;
 507
 508   // Find the largest power of 2 that MulConstVal is a multiple of
 509   PowerOfTwo = MaxShift;
 510   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 511     --PowerOfTwo;
 512     if (PowerOfTwo == 0) return false;
 513   }
 514
 515   // Only optimise if the new cost is better
 516   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 517   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 518   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 519   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 520   return NewCost < OldCost;
 521 }
 522
 523 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 524   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 525   CurDAG->ReplaceAllUsesWith(N, M);
 526 }
 527
 528 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 529                                               SDValue &BaseReg,
 530                                               SDValue &Opc,
 531                                               bool CheckProfitability) {
 532   if (DisableShifterOp)
 533     return false;
 534
 535   // If N is a multiply-by-constant and it's profitable to extract a shift and
 536   // use it in a shifted operand do so.
 537   if (N.getOpcode() == ISD::MUL) {
 538     unsigned PowerOfTwo = 0;
 539     SDValue NewMulConst;
 540     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 541       HandleSDNode Handle(N);
 542       SDLoc Loc(N);
 543       replaceDAGValue(N.getOperand(1), NewMulConst);
 544       BaseReg = Handle.getValue();
 545       Opc = CurDAG->getTargetConstant(
 546           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 547       return true;
 548     }
 549   }
 550
 551   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 552
 553   // Don't match base register only case. That is matched to a separate
 554   // lower complexity pattern with explicit register operand.
 555   if (ShOpcVal == ARM_AM::no_shift) return false;
 556
 557   BaseReg = N.getOperand(0);
 558   unsigned ShImmVal = 0;
 559   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 560   if (!RHS) return false;
 561   ShImmVal = RHS->getZExtValue() & 31;
 562   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 563                                   SDLoc(N), MVT::i32);
 564   return true;
 565 }
 566
 567 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 568                                               SDValue &BaseReg,
 569                                               SDValue &ShReg,
 570                                               SDValue &Opc,
 571                                               bool CheckProfitability) {
 572   if (DisableShifterOp)
 573     return false;
 574
 575   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 576
 577   // Don't match base register only case. That is matched to a separate
 578   // lower complexity pattern with explicit register operand.
 579   if (ShOpcVal == ARM_AM::no_shift) return false;
 580
 581   BaseReg = N.getOperand(0);
 582   unsigned ShImmVal = 0;
 583   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 584   if (RHS) return false;
 585
 586   ShReg = N.getOperand(1);
 587   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 588     return false;
 589   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 590                                   SDLoc(N), MVT::i32);
 591   return true;
 592 }
 593
 594
 595 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 596                                           SDValue &Base,
 597                                           SDValue &OffImm) {
 598   // Match simple R + imm12 operands.
 599
 600   // Base only.
 601   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 602       !CurDAG->isBaseWithConstantOffset(N)) {
 603     if (N.getOpcode() == ISD::FrameIndex) {
 604       // Match frame index.
 605       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 606       Base = CurDAG->getTargetFrameIndex(
 607           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 608       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 609       return true;
 610     }
 611
 612     if (N.getOpcode() == ARMISD::Wrapper &&
 613         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 614         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 615         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 616       Base = N.getOperand(0);
 617     } else
 618       Base = N;
 619     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 620     return true;
 621   }
 622
 623   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 624     int RHSC = (int)RHS->getSExtValue();
 625     if (N.getOpcode() == ISD::SUB)
 626       RHSC = -RHSC;
 627
 628     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 629       Base   = N.getOperand(0);
 630       if (Base.getOpcode() == ISD::FrameIndex) {
 631         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 632         Base = CurDAG->getTargetFrameIndex(
 633             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 634       }
 635       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 636       return true;
 637     }
 638   }
 639
 640   // Base only.
 641   Base = N;
 642   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 643   return true;
 644 }
 645
 646
 647
 648 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 649                                       SDValue &Opc) {
 650   if (N.getOpcode() == ISD::MUL &&
 651       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 652     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 653       // X * [3,5,9] -> X + X * [2,4,8] etc.
 654       int RHSC = (int)RHS->getZExtValue();
 655       if (RHSC & 1) {
 656         RHSC = RHSC & ~1;
 657         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 658         if (RHSC < 0) {
 659           AddSub = ARM_AM::sub;
 660           RHSC = - RHSC;
 661         }
 662         if (isPowerOf2_32(RHSC)) {
 663           unsigned ShAmt = Log2_32(RHSC);
 664           Base = Offset = N.getOperand(0);
 665           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 666                                                             ARM_AM::lsl),
 667                                           SDLoc(N), MVT::i32);
 668           return true;
 669         }
 670       }
 671     }
 672   }
 673
 674   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 675       // ISD::OR that is equivalent to an ISD::ADD.
 676       !CurDAG->isBaseWithConstantOffset(N))
 677     return false;
 678
 679   // Leave simple R +/- imm12 operands for LDRi12
 680   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 681     int RHSC;
 682     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 683                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 684       return false;
 685   }
 686
 687   // Otherwise this is R +/- [possibly shifted] R.
 688   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 689   ARM_AM::ShiftOpc ShOpcVal =
 690     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 691   unsigned ShAmt = 0;
 692
 693   Base   = N.getOperand(0);
 694   Offset = N.getOperand(1);
 695
 696   if (ShOpcVal != ARM_AM::no_shift) {
 697     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 698     // it.
 699     if (ConstantSDNode *Sh =
 700            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 701       ShAmt = Sh->getZExtValue();
 702       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 703         Offset = N.getOperand(1).getOperand(0);
 704       else {
 705         ShAmt = 0;
 706         ShOpcVal = ARM_AM::no_shift;
 707       }
 708     } else {
 709       ShOpcVal = ARM_AM::no_shift;
 710     }
 711   }
 712
 713   // Try matching (R shl C) + (R).
 714   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 715       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 716         N.getOperand(0).hasOneUse())) {
 717     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 718     if (ShOpcVal != ARM_AM::no_shift) {
 719       // Check to see if the RHS of the shift is a constant, if not, we can't
 720       // fold it.
 721       if (ConstantSDNode *Sh =
 722           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 723         ShAmt = Sh->getZExtValue();
 724         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 725           Offset = N.getOperand(0).getOperand(0);
 726           Base = N.getOperand(1);
 727         } else {
 728           ShAmt = 0;
 729           ShOpcVal = ARM_AM::no_shift;
 730         }
 731       } else {
 732         ShOpcVal = ARM_AM::no_shift;
 733       }
 734     }
 735   }
 736
 737   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 738   // and use it in a shifted operand do so.
 739   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 740     unsigned PowerOfTwo = 0;
 741     SDValue NewMulConst;
 742     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 743       HandleSDNode Handle(Offset);
 744       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 745       Offset = Handle.getValue();
 746       ShAmt = PowerOfTwo;
 747       ShOpcVal = ARM_AM::lsl;
 748     }
 749   }
 750
 751   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 752                                   SDLoc(N), MVT::i32);
 753   return true;
 754 }
 755
 756
 757 //-----
 758
 759 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 760                                                      SDValue &Base,
 761                                                      SDValue &Offset,
 762                                                      SDValue &Opc) {
 763   if (N.getOpcode() == ISD::MUL &&
 764       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 765     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 766       // X * [3,5,9] -> X + X * [2,4,8] etc.
 767       int RHSC = (int)RHS->getZExtValue();
 768       if (RHSC & 1) {
 769         RHSC = RHSC & ~1;
 770         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 771         if (RHSC < 0) {
 772           AddSub = ARM_AM::sub;
 773           RHSC = - RHSC;
 774         }
 775         if (isPowerOf2_32(RHSC)) {
 776           unsigned ShAmt = Log2_32(RHSC);
 777           Base = Offset = N.getOperand(0);
 778           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 779                                                             ARM_AM::lsl),
 780                                           SDLoc(N), MVT::i32);
 781           return AM2_SHOP;
 782         }
 783       }
 784     }
 785   }
 786
 787   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 788       // ISD::OR that is equivalent to an ADD.
 789       !CurDAG->isBaseWithConstantOffset(N)) {
 790     Base = N;
 791     if (N.getOpcode() == ISD::FrameIndex) {
 792       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 793       Base = CurDAG->getTargetFrameIndex(
 794           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 795     } else if (N.getOpcode() == ARMISD::Wrapper &&
 796                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 797                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 798                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 799       Base = N.getOperand(0);
 800     }
 801     Offset = CurDAG->getRegister(0, MVT::i32);
 802     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 803                                                       ARM_AM::no_shift),
 804                                     SDLoc(N), MVT::i32);
 805     return AM2_BASE;
 806   }
 807
 808   // Match simple R +/- imm12 operands.
 809   if (N.getOpcode() != ISD::SUB) {
 810     int RHSC;
 811     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 812                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 813       Base = N.getOperand(0);
 814       if (Base.getOpcode() == ISD::FrameIndex) {
 815         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 816         Base = CurDAG->getTargetFrameIndex(
 817             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 818       }
 819       Offset = CurDAG->getRegister(0, MVT::i32);
 820
 821       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 822       if (RHSC < 0) {
 823         AddSub = ARM_AM::sub;
 824         RHSC = - RHSC;
 825       }
 826       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 827                                                         ARM_AM::no_shift),
 828                                       SDLoc(N), MVT::i32);
 829       return AM2_BASE;
 830     }
 831   }
 832
 833   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 834     // Compute R +/- (R << N) and reuse it.
 835     Base = N;
 836     Offset = CurDAG->getRegister(0, MVT::i32);
 837     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 838                                                       ARM_AM::no_shift),
 839                                     SDLoc(N), MVT::i32);
 840     return AM2_BASE;
 841   }
 842
 843   // Otherwise this is R +/- [possibly shifted] R.
 844   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 845   ARM_AM::ShiftOpc ShOpcVal =
 846     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 847   unsigned ShAmt = 0;
 848
 849   Base   = N.getOperand(0);
 850   Offset = N.getOperand(1);
 851
 852   if (ShOpcVal != ARM_AM::no_shift) {
 853     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 854     // it.
 855     if (ConstantSDNode *Sh =
 856            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 857       ShAmt = Sh->getZExtValue();
 858       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 859         Offset = N.getOperand(1).getOperand(0);
 860       else {
 861         ShAmt = 0;
 862         ShOpcVal = ARM_AM::no_shift;
 863       }
 864     } else {
 865       ShOpcVal = ARM_AM::no_shift;
 866     }
 867   }
 868
 869   // Try matching (R shl C) + (R).
 870   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 871       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 872         N.getOperand(0).hasOneUse())) {
 873     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 874     if (ShOpcVal != ARM_AM::no_shift) {
 875       // Check to see if the RHS of the shift is a constant, if not, we can't
 876       // fold it.
 877       if (ConstantSDNode *Sh =
 878           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 879         ShAmt = Sh->getZExtValue();
 880         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 881           Offset = N.getOperand(0).getOperand(0);
 882           Base = N.getOperand(1);
 883         } else {
 884           ShAmt = 0;
 885           ShOpcVal = ARM_AM::no_shift;
 886         }
 887       } else {
 888         ShOpcVal = ARM_AM::no_shift;
 889       }
 890     }
 891   }
 892
 893   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 894                                   SDLoc(N), MVT::i32);
 895   return AM2_SHOP;
 896 }
 897
 898 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 899                                             SDValue &Offset, SDValue &Opc) {
 900   unsigned Opcode = Op->getOpcode();
 901   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 902     ? cast<LoadSDNode>(Op)->getAddressingMode()
 903     : cast<StoreSDNode>(Op)->getAddressingMode();
 904   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 905     ? ARM_AM::add : ARM_AM::sub;
 906   int Val;
 907   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 908     return false;
 909
 910   Offset = N;
 911   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 912   unsigned ShAmt = 0;
 913   if (ShOpcVal != ARM_AM::no_shift) {
 914     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 915     // it.
 916     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 917       ShAmt = Sh->getZExtValue();
 918       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 919         Offset = N.getOperand(0);
 920       else {
 921         ShAmt = 0;
 922         ShOpcVal = ARM_AM::no_shift;
 923       }
 924     } else {
 925       ShOpcVal = ARM_AM::no_shift;
 926     }
 927   }
 928
 929   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 930                                   SDLoc(N), MVT::i32);
 931   return true;
 932 }
 933
 934 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 935                                             SDValue &Offset, SDValue &Opc) {
 936   unsigned Opcode = Op->getOpcode();
 937   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 938     ? cast<LoadSDNode>(Op)->getAddressingMode()
 939     : cast<StoreSDNode>(Op)->getAddressingMode();
 940   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 941     ? ARM_AM::add : ARM_AM::sub;
 942   int Val;
 943   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 944     if (AddSub == ARM_AM::sub) Val *= -1;
 945     Offset = CurDAG->getRegister(0, MVT::i32);
 946     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 947     return true;
 948   }
 949
 950   return false;
 951 }
 952
 953
 954 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 955                                             SDValue &Offset, SDValue &Opc) {
 956   unsigned Opcode = Op->getOpcode();
 957   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 958     ? cast<LoadSDNode>(Op)->getAddressingMode()
 959     : cast<StoreSDNode>(Op)->getAddressingMode();
 960   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 961     ? ARM_AM::add : ARM_AM::sub;
 962   int Val;
 963   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 964     Offset = CurDAG->getRegister(0, MVT::i32);
 965     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 966                                                       ARM_AM::no_shift),
 967                                     SDLoc(Op), MVT::i32);
 968     return true;
 969   }
 970
 971   return false;
 972 }
 973
 974 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 975   Base = N;
 976   return true;
 977 }
 978
 979 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 980                                       SDValue &Base, SDValue &Offset,
 981                                       SDValue &Opc) {
 982   if (N.getOpcode() == ISD::SUB) {
 983     // X - C  is canonicalize to X + -C, no need to handle it here.
 984     Base = N.getOperand(0);
 985     Offset = N.getOperand(1);
 986     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 987                                     MVT::i32);
 988     return true;
 989   }
 990
 991   if (!CurDAG->isBaseWithConstantOffset(N)) {
 992     Base = N;
 993     if (N.getOpcode() == ISD::FrameIndex) {
 994       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 995       Base = CurDAG->getTargetFrameIndex(
 996           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 997     }
 998     Offset = CurDAG->getRegister(0, MVT::i32);
 999     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1000                                     MVT::i32);
1001     return true;
1002   }
1003
1004   // If the RHS is +/- imm8, fold into addr mode.
1005   int RHSC;
1006   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1007                               -256 + 1, 256, RHSC)) { // 8 bits.
1008     Base = N.getOperand(0);
1009     if (Base.getOpcode() == ISD::FrameIndex) {
1010       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1011       Base = CurDAG->getTargetFrameIndex(
1012           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1013     }
1014     Offset = CurDAG->getRegister(0, MVT::i32);
1015
1016     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1017     if (RHSC < 0) {
1018       AddSub = ARM_AM::sub;
1019       RHSC = -RHSC;
1020     }
1021     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1022                                     MVT::i32);
1023     return true;
1024   }
1025
1026   Base = N.getOperand(0);
1027   Offset = N.getOperand(1);
1028   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1029                                   MVT::i32);
1030   return true;
1031 }
1032
1033 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1034                                             SDValue &Offset, SDValue &Opc) {
1035   unsigned Opcode = Op->getOpcode();
1036   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1037     ? cast<LoadSDNode>(Op)->getAddressingMode()
1038     : cast<StoreSDNode>(Op)->getAddressingMode();
1039   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1040     ? ARM_AM::add : ARM_AM::sub;
1041   int Val;
1042   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1043     Offset = CurDAG->getRegister(0, MVT::i32);
1044     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1045                                     MVT::i32);
1046     return true;
1047   }
1048
1049   Offset = N;
1050   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1051                                   MVT::i32);
1052   return true;
1053 }
1054
1055 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1056                                       SDValue &Base, SDValue &Offset) {
1057   if (!CurDAG->isBaseWithConstantOffset(N)) {
1058     Base = N;
1059     if (N.getOpcode() == ISD::FrameIndex) {
1060       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1061       Base = CurDAG->getTargetFrameIndex(
1062           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1063     } else if (N.getOpcode() == ARMISD::Wrapper &&
1064                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1065                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1066                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1067       Base = N.getOperand(0);
1068     }
1069     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1070                                        SDLoc(N), MVT::i32);
1071     return true;
1072   }
1073
1074   // If the RHS is +/- imm8, fold into addr mode.
1075   int RHSC;
1076   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1077                               -256 + 1, 256, RHSC)) {
1078     Base = N.getOperand(0);
1079     if (Base.getOpcode() == ISD::FrameIndex) {
1080       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1081       Base = CurDAG->getTargetFrameIndex(
1082           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1083     }
1084
1085     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1086     if (RHSC < 0) {
1087       AddSub = ARM_AM::sub;
1088       RHSC = -RHSC;
1089     }
1090     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1091                                        SDLoc(N), MVT::i32);
1092     return true;
1093   }
1094
1095   Base = N;
1096   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1097                                      SDLoc(N), MVT::i32);
1098   return true;
1099 }
1100
1101 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1102                                       SDValue &Align) {
1103   Addr = N;
1104
1105   unsigned Alignment = 0;
1106
1107   MemSDNode *MemN = cast<MemSDNode>(Parent);
1108
1109   if (isa<LSBaseSDNode>(MemN) ||
1110       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1111         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1112        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1113     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1114     // The maximum alignment is equal to the memory size being referenced.
1115     unsigned MMOAlign = MemN->getAlignment();
1116     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1117     if (MMOAlign >= MemSize && MemSize > 1)
1118       Alignment = MemSize;
1119   } else {
1120     // All other uses of addrmode6 are for intrinsics.  For now just record
1121     // the raw alignment value; it will be refined later based on the legal
1122     // alignment operands for the intrinsic.
1123     Alignment = MemN->getAlignment();
1124   }
1125
1126   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1127   return true;
1128 }
1129
1130 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1131                                             SDValue &Offset) {
1132   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1133   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1134   if (AM != ISD::POST_INC)
1135     return false;
1136   Offset = N;
1137   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1138     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1139       Offset = CurDAG->getRegister(0, MVT::i32);
1140   }
1141   return true;
1142 }
1143
1144 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1145                                        SDValue &Offset, SDValue &Label) {
1146   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1147     Offset = N.getOperand(0);
1148     SDValue N1 = N.getOperand(1);
1149     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1150                                       SDLoc(N), MVT::i32);
1151     return true;
1152   }
1153
1154   return false;
1155 }
1156
1157
1158 //===----------------------------------------------------------------------===//
1159 //                         Thumb Addressing Modes
1160 //===----------------------------------------------------------------------===//
1161
1162 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1163                                             SDValue &Base, SDValue &Offset){
1164   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1165     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1166     if (!NC || !NC->isNullValue())
1167       return false;
1168
1169     Base = Offset = N;
1170     return true;
1171   }
1172
1173   Base = N.getOperand(0);
1174   Offset = N.getOperand(1);
1175   return true;
1176 }
1177
1178 bool
1179 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1180                                           SDValue &Base, SDValue &OffImm) {
1181   if (!CurDAG->isBaseWithConstantOffset(N)) {
1182     if (N.getOpcode() == ISD::ADD) {
1183       return false; // We want to select register offset instead
1184     } else if (N.getOpcode() == ARMISD::Wrapper &&
1185         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1186         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1187         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1189       Base = N.getOperand(0);
1190     } else {
1191       Base = N;
1192     }
1193
1194     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1195     return true;
1196   }
1197
1198   // If the RHS is + imm5 * scale, fold into addr mode.
1199   int RHSC;
1200   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1201     Base = N.getOperand(0);
1202     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1203     return true;
1204   }
1205
1206   // Offset is too large, so use register offset instead.
1207   return false;
1208 }
1209
1210 bool
1211 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1212                                            SDValue &OffImm) {
1213   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1214 }
1215
1216 bool
1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1218                                            SDValue &OffImm) {
1219   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1220 }
1221
1222 bool
1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1224                                            SDValue &OffImm) {
1225   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226 }
1227
1228 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1229                                             SDValue &Base, SDValue &OffImm) {
1230   if (N.getOpcode() == ISD::FrameIndex) {
1231     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1232     // Only multiples of 4 are allowed for the offset, so the frame object
1233     // alignment must be at least 4.
1234     MachineFrameInfo &MFI = MF->getFrameInfo();
1235     if (MFI.getObjectAlignment(FI) < 4)
1236       MFI.setObjectAlignment(FI, 4);
1237     Base = CurDAG->getTargetFrameIndex(
1238         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1240     return true;
1241   }
1242
1243   if (!CurDAG->isBaseWithConstantOffset(N))
1244     return false;
1245
1246   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1247   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1248       (LHSR && LHSR->getReg() == ARM::SP)) {
1249     // If the RHS is + imm8 * scale, fold into addr mode.
1250     int RHSC;
1251     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1252       Base = N.getOperand(0);
1253       if (Base.getOpcode() == ISD::FrameIndex) {
1254         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1256         // indexed by the LHS must be 4-byte aligned.
1257         MachineFrameInfo &MFI = MF->getFrameInfo();
1258         if (MFI.getObjectAlignment(FI) < 4)
1259           MFI.setObjectAlignment(FI, 4);
1260         Base = CurDAG->getTargetFrameIndex(
1261             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1262       }
1263       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1264       return true;
1265     }
1266   }
1267
1268   return false;
1269 }
1270
1271
1272 //===----------------------------------------------------------------------===//
1273 //                        Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1275
1276
1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278                                             SDValue &Base, SDValue &OffImm) {
1279   // Match simple R + imm12 operands.
1280
1281   // Base only.
1282   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283       !CurDAG->isBaseWithConstantOffset(N)) {
1284     if (N.getOpcode() == ISD::FrameIndex) {
1285       // Match frame index.
1286       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287       Base = CurDAG->getTargetFrameIndex(
1288           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290       return true;
1291     }
1292
1293     if (N.getOpcode() == ARMISD::Wrapper &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297       Base = N.getOperand(0);
1298       if (Base.getOpcode() == ISD::TargetConstantPool)
1299         return false;  // We want to select t2LDRpci instead.
1300     } else
1301       Base = N;
1302     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303     return true;
1304   }
1305
1306   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307     if (SelectT2AddrModeImm8(N, Base, OffImm))
1308       // Let t2LDRi8 handle (R - imm8).
1309       return false;
1310
1311     int RHSC = (int)RHS->getZExtValue();
1312     if (N.getOpcode() == ISD::SUB)
1313       RHSC = -RHSC;
1314
1315     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316       Base   = N.getOperand(0);
1317       if (Base.getOpcode() == ISD::FrameIndex) {
1318         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319         Base = CurDAG->getTargetFrameIndex(
1320             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321       }
1322       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323       return true;
1324     }
1325   }
1326
1327   // Base only.
1328   Base = N;
1329   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330   return true;
1331 }
1332
1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1334                                            SDValue &Base, SDValue &OffImm) {
1335   // Match simple R - imm8 operands.
1336   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1337       !CurDAG->isBaseWithConstantOffset(N))
1338     return false;
1339
1340   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341     int RHSC = (int)RHS->getSExtValue();
1342     if (N.getOpcode() == ISD::SUB)
1343       RHSC = -RHSC;
1344
1345     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1346       Base = N.getOperand(0);
1347       if (Base.getOpcode() == ISD::FrameIndex) {
1348         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1349         Base = CurDAG->getTargetFrameIndex(
1350             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351       }
1352       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1353       return true;
1354     }
1355   }
1356
1357   return false;
1358 }
1359
1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1361                                                  SDValue &OffImm){
1362   unsigned Opcode = Op->getOpcode();
1363   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1364     ? cast<LoadSDNode>(Op)->getAddressingMode()
1365     : cast<StoreSDNode>(Op)->getAddressingMode();
1366   int RHSC;
1367   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1368     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1369       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1370       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1371     return true;
1372   }
1373
1374   return false;
1375 }
1376
1377 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1378                                             SDValue &Base,
1379                                             SDValue &OffReg, SDValue &ShImm) {
1380   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1381   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382     return false;
1383
1384   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1385   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1386     int RHSC = (int)RHS->getZExtValue();
1387     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1388       return false;
1389     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1390       return false;
1391   }
1392
1393   // Look for (R + R) or (R + (R << [1,2,3])).
1394   unsigned ShAmt = 0;
1395   Base   = N.getOperand(0);
1396   OffReg = N.getOperand(1);
1397
1398   // Swap if it is ((R << c) + R).
1399   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1400   if (ShOpcVal != ARM_AM::lsl) {
1401     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1402     if (ShOpcVal == ARM_AM::lsl)
1403       std::swap(Base, OffReg);
1404   }
1405
1406   if (ShOpcVal == ARM_AM::lsl) {
1407     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1408     // it.
1409     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1410       ShAmt = Sh->getZExtValue();
1411       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1412         OffReg = OffReg.getOperand(0);
1413       else {
1414         ShAmt = 0;
1415       }
1416     }
1417   }
1418
1419   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1420   // and use it in a shifted operand do so.
1421   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1422     unsigned PowerOfTwo = 0;
1423     SDValue NewMulConst;
1424     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1425       HandleSDNode Handle(OffReg);
1426       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1427       OffReg = Handle.getValue();
1428       ShAmt = PowerOfTwo;
1429     }
1430   }
1431
1432   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1433
1434   return true;
1435 }
1436
1437 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1438                                                 SDValue &OffImm) {
1439   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1440   // instructions.
1441   Base = N;
1442   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1443
1444   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1445     return true;
1446
1447   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1448   if (!RHS)
1449     return true;
1450
1451   uint32_t RHSC = (int)RHS->getZExtValue();
1452   if (RHSC > 1020 || RHSC % 4 != 0)
1453     return true;
1454
1455   Base = N.getOperand(0);
1456   if (Base.getOpcode() == ISD::FrameIndex) {
1457     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1458     Base = CurDAG->getTargetFrameIndex(
1459         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1460   }
1461
1462   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1463   return true;
1464 }
1465
1466 //===--------------------------------------------------------------------===//
1467
1468 /// getAL - Returns a ARMCC::AL immediate node.
1469 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1470   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1471 }
1472
1473 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1474   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1475   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1476   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1477 }
1478
1479 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1480   LoadSDNode *LD = cast<LoadSDNode>(N);
1481   ISD::MemIndexedMode AM = LD->getAddressingMode();
1482   if (AM == ISD::UNINDEXED)
1483     return false;
1484
1485   EVT LoadedVT = LD->getMemoryVT();
1486   SDValue Offset, AMOpc;
1487   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1488   unsigned Opcode = 0;
1489   bool Match = false;
1490   if (LoadedVT == MVT::i32 && isPre &&
1491       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1492     Opcode = ARM::LDR_PRE_IMM;
1493     Match = true;
1494   } else if (LoadedVT == MVT::i32 && !isPre &&
1495       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1496     Opcode = ARM::LDR_POST_IMM;
1497     Match = true;
1498   } else if (LoadedVT == MVT::i32 &&
1499       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1500     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1501     Match = true;
1502
1503   } else if (LoadedVT == MVT::i16 &&
1504              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1505     Match = true;
1506     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1507       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1508       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1509   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1510     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1511       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1512         Match = true;
1513         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1514       }
1515     } else {
1516       if (isPre &&
1517           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1518         Match = true;
1519         Opcode = ARM::LDRB_PRE_IMM;
1520       } else if (!isPre &&
1521                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1522         Match = true;
1523         Opcode = ARM::LDRB_POST_IMM;
1524       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1525         Match = true;
1526         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1527       }
1528     }
1529   }
1530
1531   if (Match) {
1532     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1533       SDValue Chain = LD->getChain();
1534       SDValue Base = LD->getBasePtr();
1535       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1536                        CurDAG->getRegister(0, MVT::i32), Chain };
1537       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1538                                            MVT::Other, Ops);
1539       transferMemOperands(N, New);
1540       ReplaceNode(N, New);
1541       return true;
1542     } else {
1543       SDValue Chain = LD->getChain();
1544       SDValue Base = LD->getBasePtr();
1545       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1546                        CurDAG->getRegister(0, MVT::i32), Chain };
1547       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1548                                            MVT::Other, Ops);
1549       transferMemOperands(N, New);
1550       ReplaceNode(N, New);
1551       return true;
1552     }
1553   }
1554
1555   return false;
1556 }
1557
1558 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1559   LoadSDNode *LD = cast<LoadSDNode>(N);
1560   EVT LoadedVT = LD->getMemoryVT();
1561   ISD::MemIndexedMode AM = LD->getAddressingMode();
1562   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1563       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1564     return false;
1565
1566   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1567   if (!COffs || COffs->getZExtValue() != 4)
1568     return false;
1569
1570   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1571   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1572   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1573   // ISel.
1574   SDValue Chain = LD->getChain();
1575   SDValue Base = LD->getBasePtr();
1576   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1577                    CurDAG->getRegister(0, MVT::i32), Chain };
1578   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1579                                        MVT::i32, MVT::Other, Ops);
1580   transferMemOperands(N, New);
1581   ReplaceNode(N, New);
1582   return true;
1583 }
1584
1585 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1586   LoadSDNode *LD = cast<LoadSDNode>(N);
1587   ISD::MemIndexedMode AM = LD->getAddressingMode();
1588   if (AM == ISD::UNINDEXED)
1589     return false;
1590
1591   EVT LoadedVT = LD->getMemoryVT();
1592   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1593   SDValue Offset;
1594   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1595   unsigned Opcode = 0;
1596   bool Match = false;
1597   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1598     switch (LoadedVT.getSimpleVT().SimpleTy) {
1599     case MVT::i32:
1600       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1601       break;
1602     case MVT::i16:
1603       if (isSExtLd)
1604         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1605       else
1606         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1607       break;
1608     case MVT::i8:
1609     case MVT::i1:
1610       if (isSExtLd)
1611         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1612       else
1613         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1614       break;
1615     default:
1616       return false;
1617     }
1618     Match = true;
1619   }
1620
1621   if (Match) {
1622     SDValue Chain = LD->getChain();
1623     SDValue Base = LD->getBasePtr();
1624     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1625                      CurDAG->getRegister(0, MVT::i32), Chain };
1626     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1627                                          MVT::Other, Ops);
1628     transferMemOperands(N, New);
1629     ReplaceNode(N, New);
1630     return true;
1631   }
1632
1633   return false;
1634 }
1635
1636 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1637 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1638   SDLoc dl(V0.getNode());
1639   SDValue RegClass =
1640     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1641   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1642   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1643   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1644   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1645 }
1646
1647 /// \brief Form a D register from a pair of S registers.
1648 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1649   SDLoc dl(V0.getNode());
1650   SDValue RegClass =
1651     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1652   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1653   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1654   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1655   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1656 }
1657
1658 /// \brief Form a quad register from a pair of D registers.
1659 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1660   SDLoc dl(V0.getNode());
1661   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1662                                                MVT::i32);
1663   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1664   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1665   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1666   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667 }
1668
1669 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1670 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1671   SDLoc dl(V0.getNode());
1672   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1673                                                MVT::i32);
1674   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1675   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1676   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1677   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1678 }
1679
1680 /// \brief Form 4 consecutive S registers.
1681 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1682                                    SDValue V2, SDValue V3) {
1683   SDLoc dl(V0.getNode());
1684   SDValue RegClass =
1685     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1686   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1687   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1688   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1689   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1690   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1691                                     V2, SubReg2, V3, SubReg3 };
1692   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1693 }
1694
1695 /// \brief Form 4 consecutive D registers.
1696 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1697                                    SDValue V2, SDValue V3) {
1698   SDLoc dl(V0.getNode());
1699   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1700                                                MVT::i32);
1701   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1702   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1703   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1704   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1705   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1706                                     V2, SubReg2, V3, SubReg3 };
1707   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1708 }
1709
1710 /// \brief Form 4 consecutive Q registers.
1711 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1712                                    SDValue V2, SDValue V3) {
1713   SDLoc dl(V0.getNode());
1714   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1715                                                MVT::i32);
1716   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1717   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1718   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1719   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1720   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1721                                     V2, SubReg2, V3, SubReg3 };
1722   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1723 }
1724
1725 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1726 /// of a NEON VLD or VST instruction.  The supported values depend on the
1727 /// number of registers being loaded.
1728 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1729                                        unsigned NumVecs, bool is64BitVector) {
1730   unsigned NumRegs = NumVecs;
1731   if (!is64BitVector && NumVecs < 3)
1732     NumRegs *= 2;
1733
1734   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1735   if (Alignment >= 32 && NumRegs == 4)
1736     Alignment = 32;
1737   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1738     Alignment = 16;
1739   else if (Alignment >= 8)
1740     Alignment = 8;
1741   else
1742     Alignment = 0;
1743
1744   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1745 }
1746
1747 static bool isVLDfixed(unsigned Opc)
1748 {
1749   switch (Opc) {
1750   default: return false;
1751   case ARM::VLD1d8wb_fixed : return true;
1752   case ARM::VLD1d16wb_fixed : return true;
1753   case ARM::VLD1d64Qwb_fixed : return true;
1754   case ARM::VLD1d32wb_fixed : return true;
1755   case ARM::VLD1d64wb_fixed : return true;
1756   case ARM::VLD1d64TPseudoWB_fixed : return true;
1757   case ARM::VLD1d64QPseudoWB_fixed : return true;
1758   case ARM::VLD1q8wb_fixed : return true;
1759   case ARM::VLD1q16wb_fixed : return true;
1760   case ARM::VLD1q32wb_fixed : return true;
1761   case ARM::VLD1q64wb_fixed : return true;
1762   case ARM::VLD1DUPd8wb_fixed : return true;
1763   case ARM::VLD1DUPd16wb_fixed : return true;
1764   case ARM::VLD1DUPd32wb_fixed : return true;
1765   case ARM::VLD1DUPq8wb_fixed : return true;
1766   case ARM::VLD1DUPq16wb_fixed : return true;
1767   case ARM::VLD1DUPq32wb_fixed : return true;
1768   case ARM::VLD2d8wb_fixed : return true;
1769   case ARM::VLD2d16wb_fixed : return true;
1770   case ARM::VLD2d32wb_fixed : return true;
1771   case ARM::VLD2q8PseudoWB_fixed : return true;
1772   case ARM::VLD2q16PseudoWB_fixed : return true;
1773   case ARM::VLD2q32PseudoWB_fixed : return true;
1774   case ARM::VLD2DUPd8wb_fixed : return true;
1775   case ARM::VLD2DUPd16wb_fixed : return true;
1776   case ARM::VLD2DUPd32wb_fixed : return true;
1777   }
1778 }
1779
1780 static bool isVSTfixed(unsigned Opc)
1781 {
1782   switch (Opc) {
1783   default: return false;
1784   case ARM::VST1d8wb_fixed : return true;
1785   case ARM::VST1d16wb_fixed : return true;
1786   case ARM::VST1d32wb_fixed : return true;
1787   case ARM::VST1d64wb_fixed : return true;
1788   case ARM::VST1q8wb_fixed : return true;
1789   case ARM::VST1q16wb_fixed : return true;
1790   case ARM::VST1q32wb_fixed : return true;
1791   case ARM::VST1q64wb_fixed : return true;
1792   case ARM::VST1d64TPseudoWB_fixed : return true;
1793   case ARM::VST1d64QPseudoWB_fixed : return true;
1794   case ARM::VST2d8wb_fixed : return true;
1795   case ARM::VST2d16wb_fixed : return true;
1796   case ARM::VST2d32wb_fixed : return true;
1797   case ARM::VST2q8PseudoWB_fixed : return true;
1798   case ARM::VST2q16PseudoWB_fixed : return true;
1799   case ARM::VST2q32PseudoWB_fixed : return true;
1800   }
1801 }
1802
1803 // Get the register stride update opcode of a VLD/VST instruction that
1804 // is otherwise equivalent to the given fixed stride updating instruction.
1805 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1806   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1807     && "Incorrect fixed stride updating instruction.");
1808   switch (Opc) {
1809   default: break;
1810   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1811   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1812   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1813   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1814   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1815   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1816   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1817   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1818   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1819   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1820   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1821   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1822   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1823   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1824   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1825   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1826   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1827   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1828
1829   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1830   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1831   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1832   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1833   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1834   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1835   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1836   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1837   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1838   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1839
1840   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1841   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1842   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1843   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1844   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1845   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1846
1847   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1848   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1849   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1850   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1851   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1852   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1853
1854   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1855   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1856   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1857   }
1858   return Opc; // If not one we handle, return it unchanged.
1859 }
1860
1861 /// Returns true if the given increment is a Constant known to be equal to the
1862 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1863 /// be used.
1864 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1865   auto C = dyn_cast<ConstantSDNode>(Inc);
1866   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1867 }
1868
1869 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1870                                 const uint16_t *DOpcodes,
1871                                 const uint16_t *QOpcodes0,
1872                                 const uint16_t *QOpcodes1) {
1873   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1874   SDLoc dl(N);
1875
1876   SDValue MemAddr, Align;
1877   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1878   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1879     return;
1880
1881   SDValue Chain = N->getOperand(0);
1882   EVT VT = N->getValueType(0);
1883   bool is64BitVector = VT.is64BitVector();
1884   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1885
1886   unsigned OpcodeIndex;
1887   switch (VT.getSimpleVT().SimpleTy) {
1888   default: llvm_unreachable("unhandled vld type");
1889     // Double-register operations:
1890   case MVT::v8i8:  OpcodeIndex = 0; break;
1891   case MVT::v4i16: OpcodeIndex = 1; break;
1892   case MVT::v2f32:
1893   case MVT::v2i32: OpcodeIndex = 2; break;
1894   case MVT::v1i64: OpcodeIndex = 3; break;
1895     // Quad-register operations:
1896   case MVT::v16i8: OpcodeIndex = 0; break;
1897   case MVT::v8i16: OpcodeIndex = 1; break;
1898   case MVT::v4f32:
1899   case MVT::v4i32: OpcodeIndex = 2; break;
1900   case MVT::v2f64:
1901   case MVT::v2i64: OpcodeIndex = 3;
1902     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1903     break;
1904   }
1905
1906   EVT ResTy;
1907   if (NumVecs == 1)
1908     ResTy = VT;
1909   else {
1910     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1911     if (!is64BitVector)
1912       ResTyElts *= 2;
1913     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1914   }
1915   std::vector<EVT> ResTys;
1916   ResTys.push_back(ResTy);
1917   if (isUpdating)
1918     ResTys.push_back(MVT::i32);
1919   ResTys.push_back(MVT::Other);
1920
1921   SDValue Pred = getAL(CurDAG, dl);
1922   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1923   SDNode *VLd;
1924   SmallVector<SDValue, 7> Ops;
1925
1926   // Double registers and VLD1/VLD2 quad registers are directly supported.
1927   if (is64BitVector || NumVecs <= 2) {
1928     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1929                     QOpcodes0[OpcodeIndex]);
1930     Ops.push_back(MemAddr);
1931     Ops.push_back(Align);
1932     if (isUpdating) {
1933       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1934       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1935       // case entirely when the rest are updated to that form, too.
1936       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1937       if ((NumVecs <= 2) && !IsImmUpdate)
1938         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1939       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1940       // check for that explicitly too. Horribly hacky, but temporary.
1941       if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
1942         Ops.push_back(IsImmUpdate ? Reg0 : Inc);
1943     }
1944     Ops.push_back(Pred);
1945     Ops.push_back(Reg0);
1946     Ops.push_back(Chain);
1947     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1948
1949   } else {
1950     // Otherwise, quad registers are loaded with two separate instructions,
1951     // where one loads the even registers and the other loads the odd registers.
1952     EVT AddrTy = MemAddr.getValueType();
1953
1954     // Load the even subregs.  This is always an updating load, so that it
1955     // provides the address to the second load for the odd subregs.
1956     SDValue ImplDef =
1957       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1958     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1959     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1960                                           ResTy, AddrTy, MVT::Other, OpsA);
1961     Chain = SDValue(VLdA, 2);
1962
1963     // Load the odd subregs.
1964     Ops.push_back(SDValue(VLdA, 1));
1965     Ops.push_back(Align);
1966     if (isUpdating) {
1967       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1968       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1969              "only constant post-increment update allowed for VLD3/4");
1970       (void)Inc;
1971       Ops.push_back(Reg0);
1972     }
1973     Ops.push_back(SDValue(VLdA, 0));
1974     Ops.push_back(Pred);
1975     Ops.push_back(Reg0);
1976     Ops.push_back(Chain);
1977     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1978   }
1979
1980   // Transfer memoperands.
1981   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1982   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1983   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1984
1985   if (NumVecs == 1) {
1986     ReplaceNode(N, VLd);
1987     return;
1988   }
1989
1990   // Extract out the subregisters.
1991   SDValue SuperReg = SDValue(VLd, 0);
1992   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1993                     ARM::qsub_3 == ARM::qsub_0 + 3,
1994                 "Unexpected subreg numbering");
1995   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1996   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1997     ReplaceUses(SDValue(N, Vec),
1998                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1999   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2000   if (isUpdating)
2001     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2002   CurDAG->RemoveDeadNode(N);
2003 }
2004
2005 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2006                                 const uint16_t *DOpcodes,
2007                                 const uint16_t *QOpcodes0,
2008                                 const uint16_t *QOpcodes1) {
2009   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2010   SDLoc dl(N);
2011
2012   SDValue MemAddr, Align;
2013   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2014   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2015   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2016     return;
2017
2018   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2019   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2020
2021   SDValue Chain = N->getOperand(0);
2022   EVT VT = N->getOperand(Vec0Idx).getValueType();
2023   bool is64BitVector = VT.is64BitVector();
2024   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2025
2026   unsigned OpcodeIndex;
2027   switch (VT.getSimpleVT().SimpleTy) {
2028   default: llvm_unreachable("unhandled vst type");
2029     // Double-register operations:
2030   case MVT::v8i8:  OpcodeIndex = 0; break;
2031   case MVT::v4i16: OpcodeIndex = 1; break;
2032   case MVT::v2f32:
2033   case MVT::v2i32: OpcodeIndex = 2; break;
2034   case MVT::v1i64: OpcodeIndex = 3; break;
2035     // Quad-register operations:
2036   case MVT::v16i8: OpcodeIndex = 0; break;
2037   case MVT::v8i16: OpcodeIndex = 1; break;
2038   case MVT::v4f32:
2039   case MVT::v4i32: OpcodeIndex = 2; break;
2040   case MVT::v2f64:
2041   case MVT::v2i64: OpcodeIndex = 3;
2042     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2043     break;
2044   }
2045
2046   std::vector<EVT> ResTys;
2047   if (isUpdating)
2048     ResTys.push_back(MVT::i32);
2049   ResTys.push_back(MVT::Other);
2050
2051   SDValue Pred = getAL(CurDAG, dl);
2052   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2053   SmallVector<SDValue, 7> Ops;
2054
2055   // Double registers and VST1/VST2 quad registers are directly supported.
2056   if (is64BitVector || NumVecs <= 2) {
2057     SDValue SrcReg;
2058     if (NumVecs == 1) {
2059       SrcReg = N->getOperand(Vec0Idx);
2060     } else if (is64BitVector) {
2061       // Form a REG_SEQUENCE to force register allocation.
2062       SDValue V0 = N->getOperand(Vec0Idx + 0);
2063       SDValue V1 = N->getOperand(Vec0Idx + 1);
2064       if (NumVecs == 2)
2065         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2066       else {
2067         SDValue V2 = N->getOperand(Vec0Idx + 2);
2068         // If it's a vst3, form a quad D-register and leave the last part as
2069         // an undef.
2070         SDValue V3 = (NumVecs == 3)
2071           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2072           : N->getOperand(Vec0Idx + 3);
2073         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2074       }
2075     } else {
2076       // Form a QQ register.
2077       SDValue Q0 = N->getOperand(Vec0Idx);
2078       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2079       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2080     }
2081
2082     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2083                     QOpcodes0[OpcodeIndex]);
2084     Ops.push_back(MemAddr);
2085     Ops.push_back(Align);
2086     if (isUpdating) {
2087       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2088       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2089       // case entirely when the rest are updated to that form, too.
2090       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2091       if (NumVecs <= 2 && !IsImmUpdate)
2092         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2093       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2094       // check for that explicitly too. Horribly hacky, but temporary.
2095       if  (!IsImmUpdate)
2096         Ops.push_back(Inc);
2097       else if (NumVecs > 2 && !isVSTfixed(Opc))
2098         Ops.push_back(Reg0);
2099     }
2100     Ops.push_back(SrcReg);
2101     Ops.push_back(Pred);
2102     Ops.push_back(Reg0);
2103     Ops.push_back(Chain);
2104     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2105
2106     // Transfer memoperands.
2107     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2108
2109     ReplaceNode(N, VSt);
2110     return;
2111   }
2112
2113   // Otherwise, quad registers are stored with two separate instructions,
2114   // where one stores the even registers and the other stores the odd registers.
2115
2116   // Form the QQQQ REG_SEQUENCE.
2117   SDValue V0 = N->getOperand(Vec0Idx + 0);
2118   SDValue V1 = N->getOperand(Vec0Idx + 1);
2119   SDValue V2 = N->getOperand(Vec0Idx + 2);
2120   SDValue V3 = (NumVecs == 3)
2121     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2122     : N->getOperand(Vec0Idx + 3);
2123   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2124
2125   // Store the even D registers.  This is always an updating store, so that it
2126   // provides the address to the second store for the odd subregs.
2127   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2128   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2129                                         MemAddr.getValueType(),
2130                                         MVT::Other, OpsA);
2131   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2132   Chain = SDValue(VStA, 1);
2133
2134   // Store the odd D registers.
2135   Ops.push_back(SDValue(VStA, 0));
2136   Ops.push_back(Align);
2137   if (isUpdating) {
2138     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2139     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2140            "only constant post-increment update allowed for VST3/4");
2141     (void)Inc;
2142     Ops.push_back(Reg0);
2143   }
2144   Ops.push_back(RegSeq);
2145   Ops.push_back(Pred);
2146   Ops.push_back(Reg0);
2147   Ops.push_back(Chain);
2148   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2149                                         Ops);
2150   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2151   ReplaceNode(N, VStB);
2152 }
2153
2154 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2155                                       unsigned NumVecs,
2156                                       const uint16_t *DOpcodes,
2157                                       const uint16_t *QOpcodes) {
2158   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2159   SDLoc dl(N);
2160
2161   SDValue MemAddr, Align;
2162   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2163   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2164   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2165     return;
2166
2167   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2168   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2169
2170   SDValue Chain = N->getOperand(0);
2171   unsigned Lane =
2172     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2173   EVT VT = N->getOperand(Vec0Idx).getValueType();
2174   bool is64BitVector = VT.is64BitVector();
2175
2176   unsigned Alignment = 0;
2177   if (NumVecs != 3) {
2178     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2179     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2180     if (Alignment > NumBytes)
2181       Alignment = NumBytes;
2182     if (Alignment < 8 && Alignment < NumBytes)
2183       Alignment = 0;
2184     // Alignment must be a power of two; make sure of that.
2185     Alignment = (Alignment & -Alignment);
2186     if (Alignment == 1)
2187       Alignment = 0;
2188   }
2189   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2190
2191   unsigned OpcodeIndex;
2192   switch (VT.getSimpleVT().SimpleTy) {
2193   default: llvm_unreachable("unhandled vld/vst lane type");
2194     // Double-register operations:
2195   case MVT::v8i8:  OpcodeIndex = 0; break;
2196   case MVT::v4i16: OpcodeIndex = 1; break;
2197   case MVT::v2f32:
2198   case MVT::v2i32: OpcodeIndex = 2; break;
2199     // Quad-register operations:
2200   case MVT::v8i16: OpcodeIndex = 0; break;
2201   case MVT::v4f32:
2202   case MVT::v4i32: OpcodeIndex = 1; break;
2203   }
2204
2205   std::vector<EVT> ResTys;
2206   if (IsLoad) {
2207     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2208     if (!is64BitVector)
2209       ResTyElts *= 2;
2210     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2211                                       MVT::i64, ResTyElts));
2212   }
2213   if (isUpdating)
2214     ResTys.push_back(MVT::i32);
2215   ResTys.push_back(MVT::Other);
2216
2217   SDValue Pred = getAL(CurDAG, dl);
2218   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2219
2220   SmallVector<SDValue, 8> Ops;
2221   Ops.push_back(MemAddr);
2222   Ops.push_back(Align);
2223   if (isUpdating) {
2224     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2225     bool IsImmUpdate =
2226         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2227     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2228   }
2229
2230   SDValue SuperReg;
2231   SDValue V0 = N->getOperand(Vec0Idx + 0);
2232   SDValue V1 = N->getOperand(Vec0Idx + 1);
2233   if (NumVecs == 2) {
2234     if (is64BitVector)
2235       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2236     else
2237       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2238   } else {
2239     SDValue V2 = N->getOperand(Vec0Idx + 2);
2240     SDValue V3 = (NumVecs == 3)
2241       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2242       : N->getOperand(Vec0Idx + 3);
2243     if (is64BitVector)
2244       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2245     else
2246       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2247   }
2248   Ops.push_back(SuperReg);
2249   Ops.push_back(getI32Imm(Lane, dl));
2250   Ops.push_back(Pred);
2251   Ops.push_back(Reg0);
2252   Ops.push_back(Chain);
2253
2254   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2255                                   QOpcodes[OpcodeIndex]);
2256   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2257   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2258   if (!IsLoad) {
2259     ReplaceNode(N, VLdLn);
2260     return;
2261   }
2262
2263   // Extract the subregisters.
2264   SuperReg = SDValue(VLdLn, 0);
2265   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2266                     ARM::qsub_3 == ARM::qsub_0 + 3,
2267                 "Unexpected subreg numbering");
2268   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2269   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2270     ReplaceUses(SDValue(N, Vec),
2271                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2272   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2273   if (isUpdating)
2274     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2275   CurDAG->RemoveDeadNode(N);
2276 }
2277
2278 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2279                                    const uint16_t *DOpcodes,
2280                                    const uint16_t *QOpcodes) {
2281   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2282   SDLoc dl(N);
2283
2284   SDValue MemAddr, Align;
2285   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2286     return;
2287
2288   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2289   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2290
2291   SDValue Chain = N->getOperand(0);
2292   EVT VT = N->getValueType(0);
2293
2294   unsigned Alignment = 0;
2295   if (NumVecs != 3) {
2296     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2297     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2298     if (Alignment > NumBytes)
2299       Alignment = NumBytes;
2300     if (Alignment < 8 && Alignment < NumBytes)
2301       Alignment = 0;
2302     // Alignment must be a power of two; make sure of that.
2303     Alignment = (Alignment & -Alignment);
2304     if (Alignment == 1)
2305       Alignment = 0;
2306   }
2307   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2308
2309   unsigned Opc;
2310   switch (VT.getSimpleVT().SimpleTy) {
2311   default: llvm_unreachable("unhandled vld-dup type");
2312   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2313   case MVT::v16i8: Opc = QOpcodes[0]; break;
2314   case MVT::v4i16: Opc = DOpcodes[1]; break;
2315   case MVT::v8i16: Opc = QOpcodes[1]; break;
2316   case MVT::v2f32:
2317   case MVT::v2i32: Opc = DOpcodes[2]; break;
2318   case MVT::v4f32:
2319   case MVT::v4i32: Opc = QOpcodes[2]; break;
2320   }
2321
2322   SDValue Pred = getAL(CurDAG, dl);
2323   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2324   SmallVector<SDValue, 6> Ops;
2325   Ops.push_back(MemAddr);
2326   Ops.push_back(Align);
2327   if (isUpdating) {
2328     // fixed-stride update instructions don't have an explicit writeback
2329     // operand. It's implicit in the opcode itself.
2330     SDValue Inc = N->getOperand(2);
2331     bool IsImmUpdate =
2332         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2333     if (NumVecs <= 2 && !IsImmUpdate)
2334       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2335     if (!IsImmUpdate)
2336       Ops.push_back(Inc);
2337     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2338     else if (NumVecs > 2)
2339       Ops.push_back(Reg0);
2340   }
2341   Ops.push_back(Pred);
2342   Ops.push_back(Reg0);
2343   Ops.push_back(Chain);
2344
2345   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2346   std::vector<EVT> ResTys;
2347   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2348   if (isUpdating)
2349     ResTys.push_back(MVT::i32);
2350   ResTys.push_back(MVT::Other);
2351   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2352   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2353
2354   // Extract the subregisters.
2355   if (NumVecs == 1) {
2356     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2357   } else {
2358     SDValue SuperReg = SDValue(VLdDup, 0);
2359     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2360     unsigned SubIdx = ARM::dsub_0;
2361     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2362       ReplaceUses(SDValue(N, Vec),
2363                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2364   }
2365   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2366   if (isUpdating)
2367     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2368   CurDAG->RemoveDeadNode(N);
2369 }
2370
2371 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2372   if (!Subtarget->hasV6T2Ops())
2373     return false;
2374
2375   unsigned Opc = isSigned
2376     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2377     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2378   SDLoc dl(N);
2379
2380   // For unsigned extracts, check for a shift right and mask
2381   unsigned And_imm = 0;
2382   if (N->getOpcode() == ISD::AND) {
2383     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2384
2385       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2386       if (And_imm & (And_imm + 1))
2387         return false;
2388
2389       unsigned Srl_imm = 0;
2390       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2391                                 Srl_imm)) {
2392         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2393
2394         // Note: The width operand is encoded as width-1.
2395         unsigned Width = countTrailingOnes(And_imm) - 1;
2396         unsigned LSB = Srl_imm;
2397
2398         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2399
2400         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2401           // It's cheaper to use a right shift to extract the top bits.
2402           if (Subtarget->isThumb()) {
2403             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2404             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2405                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2406                               getAL(CurDAG, dl), Reg0, Reg0 };
2407             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2408             return true;
2409           }
2410
2411           // ARM models shift instructions as MOVsi with shifter operand.
2412           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2413           SDValue ShOpc =
2414             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2415                                       MVT::i32);
2416           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2417                             getAL(CurDAG, dl), Reg0, Reg0 };
2418           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2419           return true;
2420         }
2421
2422         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2423                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2424                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2425                           getAL(CurDAG, dl), Reg0 };
2426         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2427         return true;
2428       }
2429     }
2430     return false;
2431   }
2432
2433   // Otherwise, we're looking for a shift of a shift
2434   unsigned Shl_imm = 0;
2435   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2436     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2437     unsigned Srl_imm = 0;
2438     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2439       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2440       // Note: The width operand is encoded as width-1.
2441       unsigned Width = 32 - Srl_imm - 1;
2442       int LSB = Srl_imm - Shl_imm;
2443       if (LSB < 0)
2444         return false;
2445       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2446       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2447                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2448                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2449                         getAL(CurDAG, dl), Reg0 };
2450       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2451       return true;
2452     }
2453   }
2454
2455   // Or we are looking for a shift of an and, with a mask operand
2456   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2457       isShiftedMask_32(And_imm)) {
2458     unsigned Srl_imm = 0;
2459     unsigned LSB = countTrailingZeros(And_imm);
2460     // Shift must be the same as the ands lsb
2461     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2462       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2463       unsigned MSB = 31 - countLeadingZeros(And_imm);
2464       // Note: The width operand is encoded as width-1.
2465       unsigned Width = MSB - LSB;
2466       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2467       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2468                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2469                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2470                         getAL(CurDAG, dl), Reg0 };
2471       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2472       return true;
2473     }
2474   }
2475
2476   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2477     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2478     unsigned LSB = 0;
2479     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2480         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2481       return false;
2482
2483     if (LSB + Width > 32)
2484       return false;
2485
2486     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2487     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2488                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2489                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2490                       getAL(CurDAG, dl), Reg0 };
2491     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2492     return true;
2493   }
2494
2495   return false;
2496 }
2497
2498 /// Target-specific DAG combining for ISD::XOR.
2499 /// Target-independent combining lowers SELECT_CC nodes of the form
2500 /// select_cc setg[ge] X,  0,  X, -X
2501 /// select_cc setgt    X, -1,  X, -X
2502 /// select_cc setl[te] X,  0, -X,  X
2503 /// select_cc setlt    X,  1, -X,  X
2504 /// which represent Integer ABS into:
2505 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2506 /// ARM instruction selection detects the latter and matches it to
2507 /// ARM::ABS or ARM::t2ABS machine node.
2508 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2509   SDValue XORSrc0 = N->getOperand(0);
2510   SDValue XORSrc1 = N->getOperand(1);
2511   EVT VT = N->getValueType(0);
2512
2513   if (Subtarget->isThumb1Only())
2514     return false;
2515
2516   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2517     return false;
2518
2519   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2520   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2521   SDValue SRASrc0 = XORSrc1.getOperand(0);
2522   SDValue SRASrc1 = XORSrc1.getOperand(1);
2523   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2524   EVT XType = SRASrc0.getValueType();
2525   unsigned Size = XType.getSizeInBits() - 1;
2526
2527   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2528       XType.isInteger() && SRAConstant != nullptr &&
2529       Size == SRAConstant->getZExtValue()) {
2530     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2531     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2532     return true;
2533   }
2534
2535   return false;
2536 }
2537
2538 /// We've got special pseudo-instructions for these
2539 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2540   unsigned Opcode;
2541   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2542   if (MemTy == MVT::i8)
2543     Opcode = ARM::CMP_SWAP_8;
2544   else if (MemTy == MVT::i16)
2545     Opcode = ARM::CMP_SWAP_16;
2546   else if (MemTy == MVT::i32)
2547     Opcode = ARM::CMP_SWAP_32;
2548   else
2549     llvm_unreachable("Unknown AtomicCmpSwap type");
2550
2551   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2552                    N->getOperand(0)};
2553   SDNode *CmpSwap = CurDAG->getMachineNode(
2554       Opcode, SDLoc(N),
2555       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2556
2557   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2558   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2559   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2560
2561   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2562   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2563   CurDAG->RemoveDeadNode(N);
2564 }
2565
2566 static Optional<std::pair<unsigned, unsigned>>
2567 getContiguousRangeOfSetBits(const APInt &A) {
2568   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2569   unsigned LastOne = A.countTrailingZeros();
2570   if (A.countPopulation() != (FirstOne - LastOne + 1))
2571     return Optional<std::pair<unsigned,unsigned>>();
2572   return std::make_pair(FirstOne, LastOne);
2573 }
2574
2575 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2576   assert(N->getOpcode() == ARMISD::CMPZ);
2577   SwitchEQNEToPLMI = false;
2578
2579   if (!Subtarget->isThumb())
2580     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2581     // LSR don't exist as standalone instructions - they need the barrel shifter.
2582     return;
2583
2584   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2585   SDValue And = N->getOperand(0);
2586   if (!And->hasOneUse())
2587     return;
2588
2589   SDValue Zero = N->getOperand(1);
2590   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2591       And->getOpcode() != ISD::AND)
2592     return;
2593   SDValue X = And.getOperand(0);
2594   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2595
2596   if (!C || !X->hasOneUse())
2597     return;
2598   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2599   if (!Range)
2600     return;
2601
2602   // There are several ways to lower this:
2603   SDNode *NewN;
2604   SDLoc dl(N);
2605
2606   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2607     if (Subtarget->isThumb2()) {
2608       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2609       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2610                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2611                         CurDAG->getRegister(0, MVT::i32) };
2612       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2613     } else {
2614       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2615                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2616                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2617       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2618     }
2619   };
2620
2621   if (Range->second == 0) {
2622     //  1. Mask includes the LSB -> Simply shift the top N bits off
2623     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2624     ReplaceNode(And.getNode(), NewN);
2625   } else if (Range->first == 31) {
2626     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2627     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2628     ReplaceNode(And.getNode(), NewN);
2629   } else if (Range->first == Range->second) {
2630     //  3. Only one bit is set. We can shift this into the sign bit and use a
2631     //     PL/MI comparison.
2632     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2633     ReplaceNode(And.getNode(), NewN);
2634
2635     SwitchEQNEToPLMI = true;
2636   } else if (!Subtarget->hasV6T2Ops()) {
2637     //  4. Do a double shift to clear bottom and top bits, but only in
2638     //     thumb-1 mode as in thumb-2 we can use UBFX.
2639     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2640     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2641                      Range->second + (31 - Range->first));
2642     ReplaceNode(And.getNode(), NewN);
2643   }
2644
2645 }
2646
2647 void ARMDAGToDAGISel::Select(SDNode *N) {
2648   SDLoc dl(N);
2649
2650   if (N->isMachineOpcode()) {
2651     N->setNodeId(-1);
2652     return;   // Already selected.
2653   }
2654
2655   switch (N->getOpcode()) {
2656   default: break;
2657   case ISD::WRITE_REGISTER:
2658     if (tryWriteRegister(N))
2659       return;
2660     break;
2661   case ISD::READ_REGISTER:
2662     if (tryReadRegister(N))
2663       return;
2664     break;
2665   case ISD::INLINEASM:
2666     if (tryInlineAsm(N))
2667       return;
2668     break;
2669   case ISD::XOR:
2670     // Select special operations if XOR node forms integer ABS pattern
2671     if (tryABSOp(N))
2672       return;
2673     // Other cases are autogenerated.
2674     break;
2675   case ISD::Constant: {
2676     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2677     // If we can't materialize the constant we need to use a literal pool
2678     if (ConstantMaterializationCost(Val) > 2) {
2679       SDValue CPIdx = CurDAG->getTargetConstantPool(
2680           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2681           TLI->getPointerTy(CurDAG->getDataLayout()));
2682
2683       SDNode *ResNode;
2684       if (Subtarget->isThumb()) {
2685         SDValue Ops[] = {
2686           CPIdx,
2687           getAL(CurDAG, dl),
2688           CurDAG->getRegister(0, MVT::i32),
2689           CurDAG->getEntryNode()
2690         };
2691         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2692                                          Ops);
2693       } else {
2694         SDValue Ops[] = {
2695           CPIdx,
2696           CurDAG->getTargetConstant(0, dl, MVT::i32),
2697           getAL(CurDAG, dl),
2698           CurDAG->getRegister(0, MVT::i32),
2699           CurDAG->getEntryNode()
2700         };
2701         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2702                                          Ops);
2703       }
2704       // Annotate the Node with memory operand information so that MachineInstr
2705       // queries work properly. This e.g. gives the register allocation the
2706       // required information for rematerialization.
2707       MachineFunction& MF = CurDAG->getMachineFunction();
2708       MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
2709       MemOp[0] = MF.getMachineMemOperand(
2710           MachinePointerInfo::getConstantPool(MF),
2711           MachineMemOperand::MOLoad, 4, 4);
2712
2713       cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
2714
2715       ReplaceNode(N, ResNode);
2716       return;
2717     }
2718
2719     // Other cases are autogenerated.
2720     break;
2721   }
2722   case ISD::FrameIndex: {
2723     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2724     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2725     SDValue TFI = CurDAG->getTargetFrameIndex(
2726         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2727     if (Subtarget->isThumb1Only()) {
2728       // Set the alignment of the frame object to 4, to avoid having to generate
2729       // more than one ADD
2730       MachineFrameInfo &MFI = MF->getFrameInfo();
2731       if (MFI.getObjectAlignment(FI) < 4)
2732         MFI.setObjectAlignment(FI, 4);
2733       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2734                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2735       return;
2736     } else {
2737       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2738                       ARM::t2ADDri : ARM::ADDri);
2739       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2740                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2741                         CurDAG->getRegister(0, MVT::i32) };
2742       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2743       return;
2744     }
2745   }
2746   case ISD::SRL:
2747     if (tryV6T2BitfieldExtractOp(N, false))
2748       return;
2749     break;
2750   case ISD::SIGN_EXTEND_INREG:
2751   case ISD::SRA:
2752     if (tryV6T2BitfieldExtractOp(N, true))
2753       return;
2754     break;
2755   case ISD::MUL:
2756     if (Subtarget->isThumb1Only())
2757       break;
2758     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2759       unsigned RHSV = C->getZExtValue();
2760       if (!RHSV) break;
2761       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2762         unsigned ShImm = Log2_32(RHSV-1);
2763         if (ShImm >= 32)
2764           break;
2765         SDValue V = N->getOperand(0);
2766         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2767         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2768         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2769         if (Subtarget->isThumb()) {
2770           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2771           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2772           return;
2773         } else {
2774           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2775                             Reg0 };
2776           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2777           return;
2778         }
2779       }
2780       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2781         unsigned ShImm = Log2_32(RHSV+1);
2782         if (ShImm >= 32)
2783           break;
2784         SDValue V = N->getOperand(0);
2785         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2786         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2787         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2788         if (Subtarget->isThumb()) {
2789           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2790           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2791           return;
2792         } else {
2793           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2794                             Reg0 };
2795           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2796           return;
2797         }
2798       }
2799     }
2800     break;
2801   case ISD::AND: {
2802     // Check for unsigned bitfield extract
2803     if (tryV6T2BitfieldExtractOp(N, false))
2804       return;
2805
2806     // If an immediate is used in an AND node, it is possible that the immediate
2807     // can be more optimally materialized when negated. If this is the case we
2808     // can negate the immediate and use a BIC instead.
2809     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2810     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2811       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2812
2813       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2814       // immediate can be negated and fit in the immediate operand of
2815       // a t2BIC, don't do any manual transform here as this can be
2816       // handled by the generic ISel machinery.
2817       bool PreferImmediateEncoding =
2818         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2819       if (!PreferImmediateEncoding &&
2820           ConstantMaterializationCost(Imm) >
2821               ConstantMaterializationCost(~Imm)) {
2822         // The current immediate costs more to materialize than a negated
2823         // immediate, so negate the immediate and use a BIC.
2824         SDValue NewImm =
2825           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2826         // If the new constant didn't exist before, reposition it in the topological
2827         // ordering so it is just before N. Otherwise, don't touch its location.
2828         if (NewImm->getNodeId() == -1)
2829           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2830
2831         if (!Subtarget->hasThumb2()) {
2832           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2833                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2834                            CurDAG->getRegister(0, MVT::i32)};
2835           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2836           return;
2837         } else {
2838           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2839                            CurDAG->getRegister(0, MVT::i32),
2840                            CurDAG->getRegister(0, MVT::i32)};
2841           ReplaceNode(N,
2842                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2843           return;
2844         }
2845       }
2846     }
2847
2848     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2849     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2850     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2851     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2852     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2853     EVT VT = N->getValueType(0);
2854     if (VT != MVT::i32)
2855       break;
2856     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2857       ? ARM::t2MOVTi16
2858       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2859     if (!Opc)
2860       break;
2861     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2862     N1C = dyn_cast<ConstantSDNode>(N1);
2863     if (!N1C)
2864       break;
2865     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2866       SDValue N2 = N0.getOperand(1);
2867       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2868       if (!N2C)
2869         break;
2870       unsigned N1CVal = N1C->getZExtValue();
2871       unsigned N2CVal = N2C->getZExtValue();
2872       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2873           (N1CVal & 0xffffU) == 0xffffU &&
2874           (N2CVal & 0xffffU) == 0x0U) {
2875         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2876                                                   dl, MVT::i32);
2877         SDValue Ops[] = { N0.getOperand(0), Imm16,
2878                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2879         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2880         return;
2881       }
2882     }
2883
2884     break;
2885   }
2886   case ARMISD::UMAAL: {
2887     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2888     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2889                       N->getOperand(2), N->getOperand(3),
2890                       getAL(CurDAG, dl),
2891                       CurDAG->getRegister(0, MVT::i32) };
2892     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2893     return;
2894   }
2895   case ARMISD::UMLAL:{
2896     if (Subtarget->isThumb()) {
2897       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2898                         N->getOperand(3), getAL(CurDAG, dl),
2899                         CurDAG->getRegister(0, MVT::i32)};
2900       ReplaceNode(
2901           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2902       return;
2903     }else{
2904       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2905                         N->getOperand(3), getAL(CurDAG, dl),
2906                         CurDAG->getRegister(0, MVT::i32),
2907                         CurDAG->getRegister(0, MVT::i32) };
2908       ReplaceNode(N, CurDAG->getMachineNode(
2909                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2910                          MVT::i32, MVT::i32, Ops));
2911       return;
2912     }
2913   }
2914   case ARMISD::SMLAL:{
2915     if (Subtarget->isThumb()) {
2916       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2917                         N->getOperand(3), getAL(CurDAG, dl),
2918                         CurDAG->getRegister(0, MVT::i32)};
2919       ReplaceNode(
2920           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2921       return;
2922     }else{
2923       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2924                         N->getOperand(3), getAL(CurDAG, dl),
2925                         CurDAG->getRegister(0, MVT::i32),
2926                         CurDAG->getRegister(0, MVT::i32) };
2927       ReplaceNode(N, CurDAG->getMachineNode(
2928                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2929                          MVT::i32, MVT::i32, Ops));
2930       return;
2931     }
2932   }
2933   case ARMISD::SUBE: {
2934     if (!Subtarget->hasV6Ops())
2935       break;
2936     // Look for a pattern to match SMMLS
2937     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2938     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2939         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2940         !SDValue(N, 1).use_empty())
2941       break;
2942
2943     if (Subtarget->isThumb())
2944       assert(Subtarget->hasThumb2() &&
2945              "This pattern should not be generated for Thumb");
2946
2947     SDValue SmulLoHi = N->getOperand(1);
2948     SDValue Subc = N->getOperand(2);
2949     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2950
2951     if (!Zero || Zero->getZExtValue() != 0 ||
2952         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2953         N->getOperand(1) != SmulLoHi.getValue(1) ||
2954         N->getOperand(2) != Subc.getValue(1))
2955       break;
2956
2957     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2958     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2959                       N->getOperand(0), getAL(CurDAG, dl),
2960                       CurDAG->getRegister(0, MVT::i32) };
2961     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2962     return;
2963   }
2964   case ISD::LOAD: {
2965     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2966       if (tryT2IndexedLoad(N))
2967         return;
2968     } else if (Subtarget->isThumb()) {
2969       if (tryT1IndexedLoad(N))
2970         return;
2971     } else if (tryARMIndexedLoad(N))
2972       return;
2973     // Other cases are autogenerated.
2974     break;
2975   }
2976   case ARMISD::BRCOND: {
2977     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2978     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2979     // Pattern complexity = 6  cost = 1  size = 0
2980
2981     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2982     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2983     // Pattern complexity = 6  cost = 1  size = 0
2984
2985     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2986     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2987     // Pattern complexity = 6  cost = 1  size = 0
2988
2989     unsigned Opc = Subtarget->isThumb() ?
2990       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2991     SDValue Chain = N->getOperand(0);
2992     SDValue N1 = N->getOperand(1);
2993     SDValue N2 = N->getOperand(2);
2994     SDValue N3 = N->getOperand(3);
2995     SDValue InFlag = N->getOperand(4);
2996     assert(N1.getOpcode() == ISD::BasicBlock);
2997     assert(N2.getOpcode() == ISD::Constant);
2998     assert(N3.getOpcode() == ISD::Register);
2999
3000     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3001
3002     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3003       bool SwitchEQNEToPLMI;
3004       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3005       InFlag = N->getOperand(4);
3006
3007       if (SwitchEQNEToPLMI) {
3008         switch ((ARMCC::CondCodes)CC) {
3009         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3010         case ARMCC::NE:
3011           CC = (unsigned)ARMCC::MI;
3012           break;
3013         case ARMCC::EQ:
3014           CC = (unsigned)ARMCC::PL;
3015           break;
3016         }
3017       }
3018     }
3019
3020     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3021     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3022     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3023                                              MVT::Glue, Ops);
3024     Chain = SDValue(ResNode, 0);
3025     if (N->getNumValues() == 2) {
3026       InFlag = SDValue(ResNode, 1);
3027       ReplaceUses(SDValue(N, 1), InFlag);
3028     }
3029     ReplaceUses(SDValue(N, 0),
3030                 SDValue(Chain.getNode(), Chain.getResNo()));
3031     CurDAG->RemoveDeadNode(N);
3032     return;
3033   }
3034
3035   case ARMISD::CMPZ: {
3036     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3037     //   This allows us to avoid materializing the expensive negative constant.
3038     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3039     //   for its glue output.
3040     SDValue X = N->getOperand(0);
3041     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3042     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3043       int64_t Addend = -C->getSExtValue();
3044
3045       SDNode *Add = nullptr;
3046       // ADDS can be better than CMN if the immediate fits in a
3047       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3048       // Outside that range we can just use a CMN which is 32-bit but has a
3049       // 12-bit immediate range.
3050       if (Addend < 1<<8) {
3051         if (Subtarget->isThumb2()) {
3052           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3053                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3054                             CurDAG->getRegister(0, MVT::i32) };
3055           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3056         } else {
3057           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3058           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3059                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3060                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3061           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3062         }
3063       }
3064       if (Add) {
3065         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3066         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3067       }
3068     }
3069     // Other cases are autogenerated.
3070     break;
3071   }
3072
3073   case ARMISD::CMOV: {
3074     SDValue InFlag = N->getOperand(4);
3075
3076     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3077       bool SwitchEQNEToPLMI;
3078       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3079
3080       if (SwitchEQNEToPLMI) {
3081         SDValue ARMcc = N->getOperand(2);
3082         ARMCC::CondCodes CC =
3083           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3084
3085         switch (CC) {
3086         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3087         case ARMCC::NE:
3088           CC = ARMCC::MI;
3089           break;
3090         case ARMCC::EQ:
3091           CC = ARMCC::PL;
3092           break;
3093         }
3094         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3095         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3096                          N->getOperand(3), N->getOperand(4)};
3097         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3098       }
3099
3100     }
3101     // Other cases are autogenerated.
3102     break;
3103   }
3104
3105   case ARMISD::VZIP: {
3106     unsigned Opc = 0;
3107     EVT VT = N->getValueType(0);
3108     switch (VT.getSimpleVT().SimpleTy) {
3109     default: return;
3110     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3111     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3112     case MVT::v2f32:
3113     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3114     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3115     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3116     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3117     case MVT::v4f32:
3118     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3119     }
3120     SDValue Pred = getAL(CurDAG, dl);
3121     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3122     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3123     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3124     return;
3125   }
3126   case ARMISD::VUZP: {
3127     unsigned Opc = 0;
3128     EVT VT = N->getValueType(0);
3129     switch (VT.getSimpleVT().SimpleTy) {
3130     default: return;
3131     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3132     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3133     case MVT::v2f32:
3134     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3135     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3136     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3137     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3138     case MVT::v4f32:
3139     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3140     }
3141     SDValue Pred = getAL(CurDAG, dl);
3142     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3143     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3144     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3145     return;
3146   }
3147   case ARMISD::VTRN: {
3148     unsigned Opc = 0;
3149     EVT VT = N->getValueType(0);
3150     switch (VT.getSimpleVT().SimpleTy) {
3151     default: return;
3152     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3153     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3154     case MVT::v2f32:
3155     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3156     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3157     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3158     case MVT::v4f32:
3159     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3160     }
3161     SDValue Pred = getAL(CurDAG, dl);
3162     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3163     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3164     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3165     return;
3166   }
3167   case ARMISD::BUILD_VECTOR: {
3168     EVT VecVT = N->getValueType(0);
3169     EVT EltVT = VecVT.getVectorElementType();
3170     unsigned NumElts = VecVT.getVectorNumElements();
3171     if (EltVT == MVT::f64) {
3172       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3173       ReplaceNode(
3174           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3175       return;
3176     }
3177     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3178     if (NumElts == 2) {
3179       ReplaceNode(
3180           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3181       return;
3182     }
3183     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3184     ReplaceNode(N,
3185                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3186                                     N->getOperand(2), N->getOperand(3)));
3187     return;
3188   }
3189
3190   case ARMISD::VLD1DUP: {
3191     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3192                                          ARM::VLD1DUPd32 };
3193     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3194                                          ARM::VLD1DUPq32 };
3195     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3196     return;
3197   }
3198
3199   case ARMISD::VLD2DUP: {
3200     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3201                                         ARM::VLD2DUPd32 };
3202     SelectVLDDup(N, false, 2, Opcodes);
3203     return;
3204   }
3205
3206   case ARMISD::VLD3DUP: {
3207     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3208                                         ARM::VLD3DUPd16Pseudo,
3209                                         ARM::VLD3DUPd32Pseudo };
3210     SelectVLDDup(N, false, 3, Opcodes);
3211     return;
3212   }
3213
3214   case ARMISD::VLD4DUP: {
3215     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3216                                         ARM::VLD4DUPd16Pseudo,
3217                                         ARM::VLD4DUPd32Pseudo };
3218     SelectVLDDup(N, false, 4, Opcodes);
3219     return;
3220   }
3221
3222   case ARMISD::VLD1DUP_UPD: {
3223     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3224                                          ARM::VLD1DUPd16wb_fixed,
3225                                          ARM::VLD1DUPd32wb_fixed };
3226     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3227                                          ARM::VLD1DUPq16wb_fixed,
3228                                          ARM::VLD1DUPq32wb_fixed };
3229     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3230     return;
3231   }
3232
3233   case ARMISD::VLD2DUP_UPD: {
3234     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3235                                         ARM::VLD2DUPd16wb_fixed,
3236                                         ARM::VLD2DUPd32wb_fixed };
3237     SelectVLDDup(N, true, 2, Opcodes);
3238     return;
3239   }
3240
3241   case ARMISD::VLD3DUP_UPD: {
3242     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3243                                         ARM::VLD3DUPd16Pseudo_UPD,
3244                                         ARM::VLD3DUPd32Pseudo_UPD };
3245     SelectVLDDup(N, true, 3, Opcodes);
3246     return;
3247   }
3248
3249   case ARMISD::VLD4DUP_UPD: {
3250     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3251                                         ARM::VLD4DUPd16Pseudo_UPD,
3252                                         ARM::VLD4DUPd32Pseudo_UPD };
3253     SelectVLDDup(N, true, 4, Opcodes);
3254     return;
3255   }
3256
3257   case ARMISD::VLD1_UPD: {
3258     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3259                                          ARM::VLD1d16wb_fixed,
3260                                          ARM::VLD1d32wb_fixed,
3261                                          ARM::VLD1d64wb_fixed };
3262     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3263                                          ARM::VLD1q16wb_fixed,
3264                                          ARM::VLD1q32wb_fixed,
3265                                          ARM::VLD1q64wb_fixed };
3266     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3267     return;
3268   }
3269
3270   case ARMISD::VLD2_UPD: {
3271     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3272                                          ARM::VLD2d16wb_fixed,
3273                                          ARM::VLD2d32wb_fixed,
3274                                          ARM::VLD1q64wb_fixed};
3275     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3276                                          ARM::VLD2q16PseudoWB_fixed,
3277                                          ARM::VLD2q32PseudoWB_fixed };
3278     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3279     return;
3280   }
3281
3282   case ARMISD::VLD3_UPD: {
3283     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3284                                          ARM::VLD3d16Pseudo_UPD,
3285                                          ARM::VLD3d32Pseudo_UPD,
3286                                          ARM::VLD1d64TPseudoWB_fixed};
3287     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3288                                           ARM::VLD3q16Pseudo_UPD,
3289                                           ARM::VLD3q32Pseudo_UPD };
3290     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3291                                           ARM::VLD3q16oddPseudo_UPD,
3292                                           ARM::VLD3q32oddPseudo_UPD };
3293     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3294     return;
3295   }
3296
3297   case ARMISD::VLD4_UPD: {
3298     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3299                                          ARM::VLD4d16Pseudo_UPD,
3300                                          ARM::VLD4d32Pseudo_UPD,
3301                                          ARM::VLD1d64QPseudoWB_fixed};
3302     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3303                                           ARM::VLD4q16Pseudo_UPD,
3304                                           ARM::VLD4q32Pseudo_UPD };
3305     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3306                                           ARM::VLD4q16oddPseudo_UPD,
3307                                           ARM::VLD4q32oddPseudo_UPD };
3308     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3309     return;
3310   }
3311
3312   case ARMISD::VLD2LN_UPD: {
3313     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3314                                          ARM::VLD2LNd16Pseudo_UPD,
3315                                          ARM::VLD2LNd32Pseudo_UPD };
3316     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3317                                          ARM::VLD2LNq32Pseudo_UPD };
3318     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3319     return;
3320   }
3321
3322   case ARMISD::VLD3LN_UPD: {
3323     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3324                                          ARM::VLD3LNd16Pseudo_UPD,
3325                                          ARM::VLD3LNd32Pseudo_UPD };
3326     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3327                                          ARM::VLD3LNq32Pseudo_UPD };
3328     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3329     return;
3330   }
3331
3332   case ARMISD::VLD4LN_UPD: {
3333     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3334                                          ARM::VLD4LNd16Pseudo_UPD,
3335                                          ARM::VLD4LNd32Pseudo_UPD };
3336     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3337                                          ARM::VLD4LNq32Pseudo_UPD };
3338     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3339     return;
3340   }
3341
3342   case ARMISD::VST1_UPD: {
3343     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3344                                          ARM::VST1d16wb_fixed,
3345                                          ARM::VST1d32wb_fixed,
3346                                          ARM::VST1d64wb_fixed };
3347     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3348                                          ARM::VST1q16wb_fixed,
3349                                          ARM::VST1q32wb_fixed,
3350                                          ARM::VST1q64wb_fixed };
3351     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3352     return;
3353   }
3354
3355   case ARMISD::VST2_UPD: {
3356     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3357                                          ARM::VST2d16wb_fixed,
3358                                          ARM::VST2d32wb_fixed,
3359                                          ARM::VST1q64wb_fixed};
3360     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3361                                          ARM::VST2q16PseudoWB_fixed,
3362                                          ARM::VST2q32PseudoWB_fixed };
3363     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3364     return;
3365   }
3366
3367   case ARMISD::VST3_UPD: {
3368     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3369                                          ARM::VST3d16Pseudo_UPD,
3370                                          ARM::VST3d32Pseudo_UPD,
3371                                          ARM::VST1d64TPseudoWB_fixed};
3372     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3373                                           ARM::VST3q16Pseudo_UPD,
3374                                           ARM::VST3q32Pseudo_UPD };
3375     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3376                                           ARM::VST3q16oddPseudo_UPD,
3377                                           ARM::VST3q32oddPseudo_UPD };
3378     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3379     return;
3380   }
3381
3382   case ARMISD::VST4_UPD: {
3383     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3384                                          ARM::VST4d16Pseudo_UPD,
3385                                          ARM::VST4d32Pseudo_UPD,
3386                                          ARM::VST1d64QPseudoWB_fixed};
3387     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3388                                           ARM::VST4q16Pseudo_UPD,
3389                                           ARM::VST4q32Pseudo_UPD };
3390     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3391                                           ARM::VST4q16oddPseudo_UPD,
3392                                           ARM::VST4q32oddPseudo_UPD };
3393     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3394     return;
3395   }
3396
3397   case ARMISD::VST2LN_UPD: {
3398     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3399                                          ARM::VST2LNd16Pseudo_UPD,
3400                                          ARM::VST2LNd32Pseudo_UPD };
3401     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3402                                          ARM::VST2LNq32Pseudo_UPD };
3403     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3404     return;
3405   }
3406
3407   case ARMISD::VST3LN_UPD: {
3408     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3409                                          ARM::VST3LNd16Pseudo_UPD,
3410                                          ARM::VST3LNd32Pseudo_UPD };
3411     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3412                                          ARM::VST3LNq32Pseudo_UPD };
3413     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3414     return;
3415   }
3416
3417   case ARMISD::VST4LN_UPD: {
3418     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3419                                          ARM::VST4LNd16Pseudo_UPD,
3420                                          ARM::VST4LNd32Pseudo_UPD };
3421     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3422                                          ARM::VST4LNq32Pseudo_UPD };
3423     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3424     return;
3425   }
3426
3427   case ISD::INTRINSIC_VOID:
3428   case ISD::INTRINSIC_W_CHAIN: {
3429     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3430     switch (IntNo) {
3431     default:
3432       break;
3433
3434     case Intrinsic::arm_mrrc:
3435     case Intrinsic::arm_mrrc2: {
3436       SDLoc dl(N);
3437       SDValue Chain = N->getOperand(0);
3438       unsigned Opc;
3439
3440       if (Subtarget->isThumb())
3441         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3442       else
3443         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3444
3445       SmallVector<SDValue, 5> Ops;
3446       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3447       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3448       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3449
3450       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3451       // instruction will always be '1111' but it is possible in assembly language to specify
3452       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3453       if (Opc != ARM::MRRC2) {
3454         Ops.push_back(getAL(CurDAG, dl));
3455         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3456       }
3457
3458       Ops.push_back(Chain);
3459
3460       // Writes to two registers.
3461       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3462
3463       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3464       return;
3465     }
3466     case Intrinsic::arm_ldaexd:
3467     case Intrinsic::arm_ldrexd: {
3468       SDLoc dl(N);
3469       SDValue Chain = N->getOperand(0);
3470       SDValue MemAddr = N->getOperand(2);
3471       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3472
3473       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3474       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3475                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3476
3477       // arm_ldrexd returns a i64 value in {i32, i32}
3478       std::vector<EVT> ResTys;
3479       if (isThumb) {
3480         ResTys.push_back(MVT::i32);
3481         ResTys.push_back(MVT::i32);
3482       } else
3483         ResTys.push_back(MVT::Untyped);
3484       ResTys.push_back(MVT::Other);
3485
3486       // Place arguments in the right order.
3487       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3488                        CurDAG->getRegister(0, MVT::i32), Chain};
3489       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3490       // Transfer memoperands.
3491       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3492       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3493       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3494
3495       // Remap uses.
3496       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3497       if (!SDValue(N, 0).use_empty()) {
3498         SDValue Result;
3499         if (isThumb)
3500           Result = SDValue(Ld, 0);
3501         else {
3502           SDValue SubRegIdx =
3503             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3504           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3505               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3506           Result = SDValue(ResNode,0);
3507         }
3508         ReplaceUses(SDValue(N, 0), Result);
3509       }
3510       if (!SDValue(N, 1).use_empty()) {
3511         SDValue Result;
3512         if (isThumb)
3513           Result = SDValue(Ld, 1);
3514         else {
3515           SDValue SubRegIdx =
3516             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3517           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3518               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3519           Result = SDValue(ResNode,0);
3520         }
3521         ReplaceUses(SDValue(N, 1), Result);
3522       }
3523       ReplaceUses(SDValue(N, 2), OutChain);
3524       CurDAG->RemoveDeadNode(N);
3525       return;
3526     }
3527     case Intrinsic::arm_stlexd:
3528     case Intrinsic::arm_strexd: {
3529       SDLoc dl(N);
3530       SDValue Chain = N->getOperand(0);
3531       SDValue Val0 = N->getOperand(2);
3532       SDValue Val1 = N->getOperand(3);
3533       SDValue MemAddr = N->getOperand(4);
3534
3535       // Store exclusive double return a i32 value which is the return status
3536       // of the issued store.
3537       const EVT ResTys[] = {MVT::i32, MVT::Other};
3538
3539       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3540       // Place arguments in the right order.
3541       SmallVector<SDValue, 7> Ops;
3542       if (isThumb) {
3543         Ops.push_back(Val0);
3544         Ops.push_back(Val1);
3545       } else
3546         // arm_strexd uses GPRPair.
3547         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3548       Ops.push_back(MemAddr);
3549       Ops.push_back(getAL(CurDAG, dl));
3550       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3551       Ops.push_back(Chain);
3552
3553       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3554       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3555                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3556
3557       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3558       // Transfer memoperands.
3559       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3560       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3561       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3562
3563       ReplaceNode(N, St);
3564       return;
3565     }
3566
3567     case Intrinsic::arm_neon_vld1: {
3568       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3569                                            ARM::VLD1d32, ARM::VLD1d64 };
3570       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3571                                            ARM::VLD1q32, ARM::VLD1q64};
3572       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3573       return;
3574     }
3575
3576     case Intrinsic::arm_neon_vld2: {
3577       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3578                                            ARM::VLD2d32, ARM::VLD1q64 };
3579       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3580                                            ARM::VLD2q32Pseudo };
3581       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3582       return;
3583     }
3584
3585     case Intrinsic::arm_neon_vld3: {
3586       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3587                                            ARM::VLD3d16Pseudo,
3588                                            ARM::VLD3d32Pseudo,
3589                                            ARM::VLD1d64TPseudo };
3590       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3591                                             ARM::VLD3q16Pseudo_UPD,
3592                                             ARM::VLD3q32Pseudo_UPD };
3593       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3594                                             ARM::VLD3q16oddPseudo,
3595                                             ARM::VLD3q32oddPseudo };
3596       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3597       return;
3598     }
3599
3600     case Intrinsic::arm_neon_vld4: {
3601       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3602                                            ARM::VLD4d16Pseudo,
3603                                            ARM::VLD4d32Pseudo,
3604                                            ARM::VLD1d64QPseudo };
3605       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3606                                             ARM::VLD4q16Pseudo_UPD,
3607                                             ARM::VLD4q32Pseudo_UPD };
3608       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3609                                             ARM::VLD4q16oddPseudo,
3610                                             ARM::VLD4q32oddPseudo };
3611       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3612       return;
3613     }
3614
3615     case Intrinsic::arm_neon_vld2lane: {
3616       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3617                                            ARM::VLD2LNd16Pseudo,
3618                                            ARM::VLD2LNd32Pseudo };
3619       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3620                                            ARM::VLD2LNq32Pseudo };
3621       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3622       return;
3623     }
3624
3625     case Intrinsic::arm_neon_vld3lane: {
3626       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3627                                            ARM::VLD3LNd16Pseudo,
3628                                            ARM::VLD3LNd32Pseudo };
3629       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3630                                            ARM::VLD3LNq32Pseudo };
3631       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3632       return;
3633     }
3634
3635     case Intrinsic::arm_neon_vld4lane: {
3636       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3637                                            ARM::VLD4LNd16Pseudo,
3638                                            ARM::VLD4LNd32Pseudo };
3639       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3640                                            ARM::VLD4LNq32Pseudo };
3641       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3642       return;
3643     }
3644
3645     case Intrinsic::arm_neon_vst1: {
3646       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3647                                            ARM::VST1d32, ARM::VST1d64 };
3648       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3649                                            ARM::VST1q32, ARM::VST1q64 };
3650       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3651       return;
3652     }
3653
3654     case Intrinsic::arm_neon_vst2: {
3655       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3656                                            ARM::VST2d32, ARM::VST1q64 };
3657       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3658                                            ARM::VST2q32Pseudo };
3659       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3660       return;
3661     }
3662
3663     case Intrinsic::arm_neon_vst3: {
3664       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3665                                            ARM::VST3d16Pseudo,
3666                                            ARM::VST3d32Pseudo,
3667                                            ARM::VST1d64TPseudo };
3668       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3669                                             ARM::VST3q16Pseudo_UPD,
3670                                             ARM::VST3q32Pseudo_UPD };
3671       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3672                                             ARM::VST3q16oddPseudo,
3673                                             ARM::VST3q32oddPseudo };
3674       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3675       return;
3676     }
3677
3678     case Intrinsic::arm_neon_vst4: {
3679       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3680                                            ARM::VST4d16Pseudo,
3681                                            ARM::VST4d32Pseudo,
3682                                            ARM::VST1d64QPseudo };
3683       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3684                                             ARM::VST4q16Pseudo_UPD,
3685                                             ARM::VST4q32Pseudo_UPD };
3686       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3687                                             ARM::VST4q16oddPseudo,
3688                                             ARM::VST4q32oddPseudo };
3689       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3690       return;
3691     }
3692
3693     case Intrinsic::arm_neon_vst2lane: {
3694       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3695                                            ARM::VST2LNd16Pseudo,
3696                                            ARM::VST2LNd32Pseudo };
3697       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3698                                            ARM::VST2LNq32Pseudo };
3699       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3700       return;
3701     }
3702
3703     case Intrinsic::arm_neon_vst3lane: {
3704       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3705                                            ARM::VST3LNd16Pseudo,
3706                                            ARM::VST3LNd32Pseudo };
3707       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3708                                            ARM::VST3LNq32Pseudo };
3709       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3710       return;
3711     }
3712
3713     case Intrinsic::arm_neon_vst4lane: {
3714       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3715                                            ARM::VST4LNd16Pseudo,
3716                                            ARM::VST4LNd32Pseudo };
3717       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3718                                            ARM::VST4LNq32Pseudo };
3719       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3720       return;
3721     }
3722     }
3723     break;
3724   }
3725
3726   case ISD::ATOMIC_CMP_SWAP:
3727     SelectCMP_SWAP(N);
3728     return;
3729   }
3730
3731   SelectCode(N);
3732 }
3733
3734 // Inspect a register string of the form
3735 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3736 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3737 // and obtain the integer operands from them, adding these operands to the
3738 // provided vector.
3739 static void getIntOperandsFromRegisterString(StringRef RegString,
3740                                              SelectionDAG *CurDAG,
3741                                              const SDLoc &DL,
3742                                              std::vector<SDValue> &Ops) {
3743   SmallVector<StringRef, 5> Fields;
3744   RegString.split(Fields, ':');
3745
3746   if (Fields.size() > 1) {
3747     bool AllIntFields = true;
3748
3749     for (StringRef Field : Fields) {
3750       // Need to trim out leading 'cp' characters and get the integer field.
3751       unsigned IntField;
3752       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3753       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3754     }
3755
3756     assert(AllIntFields &&
3757             "Unexpected non-integer value in special register string.");
3758   }
3759 }
3760
3761 // Maps a Banked Register string to its mask value. The mask value returned is
3762 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3763 // mask operand, which expresses which register is to be used, e.g. r8, and in
3764 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3765 // was invalid.
3766 static inline int getBankedRegisterMask(StringRef RegString) {
3767   return StringSwitch<int>(RegString.lower())
3768           .Case("r8_usr", 0x00)
3769           .Case("r9_usr", 0x01)
3770           .Case("r10_usr", 0x02)
3771           .Case("r11_usr", 0x03)
3772           .Case("r12_usr", 0x04)
3773           .Case("sp_usr", 0x05)
3774           .Case("lr_usr", 0x06)
3775           .Case("r8_fiq", 0x08)
3776           .Case("r9_fiq", 0x09)
3777           .Case("r10_fiq", 0x0a)
3778           .Case("r11_fiq", 0x0b)
3779           .Case("r12_fiq", 0x0c)
3780           .Case("sp_fiq", 0x0d)
3781           .Case("lr_fiq", 0x0e)
3782           .Case("lr_irq", 0x10)
3783           .Case("sp_irq", 0x11)
3784           .Case("lr_svc", 0x12)
3785           .Case("sp_svc", 0x13)
3786           .Case("lr_abt", 0x14)
3787           .Case("sp_abt", 0x15)
3788           .Case("lr_und", 0x16)
3789           .Case("sp_und", 0x17)
3790           .Case("lr_mon", 0x1c)
3791           .Case("sp_mon", 0x1d)
3792           .Case("elr_hyp", 0x1e)
3793           .Case("sp_hyp", 0x1f)
3794           .Case("spsr_fiq", 0x2e)
3795           .Case("spsr_irq", 0x30)
3796           .Case("spsr_svc", 0x32)
3797           .Case("spsr_abt", 0x34)
3798           .Case("spsr_und", 0x36)
3799           .Case("spsr_mon", 0x3c)
3800           .Case("spsr_hyp", 0x3e)
3801           .Default(-1);
3802 }
3803
3804 // Maps a MClass special register string to its value for use in the
3805 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3806 // Returns -1 to signify that the string was invalid.
3807 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3808   return StringSwitch<int>(RegString.lower())
3809           .Case("apsr", 0x0)
3810           .Case("iapsr", 0x1)
3811           .Case("eapsr", 0x2)
3812           .Case("xpsr", 0x3)
3813           .Case("ipsr", 0x5)
3814           .Case("epsr", 0x6)
3815           .Case("iepsr", 0x7)
3816           .Case("msp", 0x8)
3817           .Case("psp", 0x9)
3818           .Case("primask", 0x10)
3819           .Case("basepri", 0x11)
3820           .Case("basepri_max", 0x12)
3821           .Case("faultmask", 0x13)
3822           .Case("control", 0x14)
3823           .Case("msplim", 0x0a)
3824           .Case("psplim", 0x0b)
3825           .Case("sp", 0x18)
3826           .Default(-1);
3827 }
3828
3829 // The flags here are common to those allowed for apsr in the A class cores and
3830 // those allowed for the special registers in the M class cores. Returns a
3831 // value representing which flags were present, -1 if invalid.
3832 static inline int getMClassFlagsMask(StringRef Flags) {
3833   return StringSwitch<int>(Flags)
3834           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3835                          // correct when flags are not permitted
3836           .Case("g", 0x1)
3837           .Case("nzcvq", 0x2)
3838           .Case("nzcvqg", 0x3)
3839           .Default(-1);
3840 }
3841
3842 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3843                                  const ARMSubtarget *Subtarget) {
3844   // Ensure that the register (without flags) was a valid M Class special
3845   // register.
3846   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3847   if (SYSmvalue == -1)
3848     return -1;
3849
3850   // basepri, basepri_max and faultmask are only valid for V7m.
3851   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3852     return -1;
3853
3854   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3855     Flags = "";
3856     SYSmvalue |= 0x80;
3857   }
3858
3859   if (!Subtarget->has8MSecExt() &&
3860       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3861     return -1;
3862
3863   if (!Subtarget->hasV8MMainlineOps() &&
3864       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3865        SYSmvalue == 0x93))
3866     return -1;
3867
3868   // If it was a read then we won't be expecting flags and so at this point
3869   // we can return the mask.
3870   if (IsRead) {
3871     if (Flags.empty())
3872       return SYSmvalue;
3873     else
3874       return -1;
3875   }
3876
3877   // We know we are now handling a write so need to get the mask for the flags.
3878   int Mask = getMClassFlagsMask(Flags);
3879
3880   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3881   // shouldn't have flags present.
3882   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3883     return -1;
3884
3885   // The _g and _nzcvqg versions are only valid if the DSP extension is
3886   // available.
3887   if (!Subtarget->hasDSP() && (Mask & 0x1))
3888     return -1;
3889
3890   // The register was valid so need to put the mask in the correct place
3891   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3892   // construct the operand for the instruction node.
3893   return SYSmvalue | Mask << 10;
3894 }
3895
3896 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3897   // The mask operand contains the special register (R Bit) in bit 4, whether
3898   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3899   // bits 3-0 contains the fields to be accessed in the special register, set by
3900   // the flags provided with the register.
3901   int Mask = 0;
3902   if (Reg == "apsr") {
3903     // The flags permitted for apsr are the same flags that are allowed in
3904     // M class registers. We get the flag value and then shift the flags into
3905     // the correct place to combine with the mask.
3906     Mask = getMClassFlagsMask(Flags);
3907     if (Mask == -1)
3908       return -1;
3909     return Mask << 2;
3910   }
3911
3912   if (Reg != "cpsr" && Reg != "spsr") {
3913     return -1;
3914   }
3915
3916   // This is the same as if the flags were "fc"
3917   if (Flags.empty() || Flags == "all")
3918     return Mask | 0x9;
3919
3920   // Inspect the supplied flags string and set the bits in the mask for
3921   // the relevant and valid flags allowed for cpsr and spsr.
3922   for (char Flag : Flags) {
3923     int FlagVal;
3924     switch (Flag) {
3925       case 'c':
3926         FlagVal = 0x1;
3927         break;
3928       case 'x':
3929         FlagVal = 0x2;
3930         break;
3931       case 's':
3932         FlagVal = 0x4;
3933         break;
3934       case 'f':
3935         FlagVal = 0x8;
3936         break;
3937       default:
3938         FlagVal = 0;
3939     }
3940
3941     // This avoids allowing strings where the same flag bit appears twice.
3942     if (!FlagVal || (Mask & FlagVal))
3943       return -1;
3944     Mask |= FlagVal;
3945   }
3946
3947   // If the register is spsr then we need to set the R bit.
3948   if (Reg == "spsr")
3949     Mask |= 0x10;
3950
3951   return Mask;
3952 }
3953
3954 // Lower the read_register intrinsic to ARM specific DAG nodes
3955 // using the supplied metadata string to select the instruction node to use
3956 // and the registers/masks to construct as operands for the node.
3957 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3958   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3959   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3960   bool IsThumb2 = Subtarget->isThumb2();
3961   SDLoc DL(N);
3962
3963   std::vector<SDValue> Ops;
3964   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3965
3966   if (!Ops.empty()) {
3967     // If the special register string was constructed of fields (as defined
3968     // in the ACLE) then need to lower to MRC node (32 bit) or
3969     // MRRC node(64 bit), we can make the distinction based on the number of
3970     // operands we have.
3971     unsigned Opcode;
3972     SmallVector<EVT, 3> ResTypes;
3973     if (Ops.size() == 5){
3974       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3975       ResTypes.append({ MVT::i32, MVT::Other });
3976     } else {
3977       assert(Ops.size() == 3 &&
3978               "Invalid number of fields in special register string.");
3979       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3980       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3981     }
3982
3983     Ops.push_back(getAL(CurDAG, DL));
3984     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3985     Ops.push_back(N->getOperand(0));
3986     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3987     return true;
3988   }
3989
3990   std::string SpecialReg = RegString->getString().lower();
3991
3992   int BankedReg = getBankedRegisterMask(SpecialReg);
3993   if (BankedReg != -1) {
3994     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3995             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3996             N->getOperand(0) };
3997     ReplaceNode(
3998         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3999                                   DL, MVT::i32, MVT::Other, Ops));
4000     return true;
4001   }
4002
4003   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4004   // corresponding to the register that is being read from. So we switch on the
4005   // string to find which opcode we need to use.
4006   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4007                     .Case("fpscr", ARM::VMRS)
4008                     .Case("fpexc", ARM::VMRS_FPEXC)
4009                     .Case("fpsid", ARM::VMRS_FPSID)
4010                     .Case("mvfr0", ARM::VMRS_MVFR0)
4011                     .Case("mvfr1", ARM::VMRS_MVFR1)
4012                     .Case("mvfr2", ARM::VMRS_MVFR2)
4013                     .Case("fpinst", ARM::VMRS_FPINST)
4014                     .Case("fpinst2", ARM::VMRS_FPINST2)
4015                     .Default(0);
4016
4017   // If an opcode was found then we can lower the read to a VFP instruction.
4018   if (Opcode) {
4019     if (!Subtarget->hasVFP2())
4020       return false;
4021     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4022       return false;
4023
4024     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4025             N->getOperand(0) };
4026     ReplaceNode(N,
4027                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4028     return true;
4029   }
4030
4031   // If the target is M Class then need to validate that the register string
4032   // is an acceptable value, so check that a mask can be constructed from the
4033   // string.
4034   if (Subtarget->isMClass()) {
4035     StringRef Flags = "", Reg = SpecialReg;
4036     if (Reg.endswith("_ns")) {
4037       Flags = "ns";
4038       Reg = Reg.drop_back(3);
4039     }
4040
4041     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4042     if (SYSmValue == -1)
4043       return false;
4044
4045     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4046                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4047                       N->getOperand(0) };
4048     ReplaceNode(
4049         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4050     return true;
4051   }
4052
4053   // Here we know the target is not M Class so we need to check if it is one
4054   // of the remaining possible values which are apsr, cpsr or spsr.
4055   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4056     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4057             N->getOperand(0) };
4058     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4059                                           DL, MVT::i32, MVT::Other, Ops));
4060     return true;
4061   }
4062
4063   if (SpecialReg == "spsr") {
4064     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4065             N->getOperand(0) };
4066     ReplaceNode(
4067         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4068                                   MVT::i32, MVT::Other, Ops));
4069     return true;
4070   }
4071
4072   return false;
4073 }
4074
4075 // Lower the write_register intrinsic to ARM specific DAG nodes
4076 // using the supplied metadata string to select the instruction node to use
4077 // and the registers/masks to use in the nodes
4078 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4079   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4080   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4081   bool IsThumb2 = Subtarget->isThumb2();
4082   SDLoc DL(N);
4083
4084   std::vector<SDValue> Ops;
4085   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4086
4087   if (!Ops.empty()) {
4088     // If the special register string was constructed of fields (as defined
4089     // in the ACLE) then need to lower to MCR node (32 bit) or
4090     // MCRR node(64 bit), we can make the distinction based on the number of
4091     // operands we have.
4092     unsigned Opcode;
4093     if (Ops.size() == 5) {
4094       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4095       Ops.insert(Ops.begin()+2, N->getOperand(2));
4096     } else {
4097       assert(Ops.size() == 3 &&
4098               "Invalid number of fields in special register string.");
4099       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4100       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4101       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4102     }
4103
4104     Ops.push_back(getAL(CurDAG, DL));
4105     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4106     Ops.push_back(N->getOperand(0));
4107
4108     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4109     return true;
4110   }
4111
4112   std::string SpecialReg = RegString->getString().lower();
4113   int BankedReg = getBankedRegisterMask(SpecialReg);
4114   if (BankedReg != -1) {
4115     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4116             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4117             N->getOperand(0) };
4118     ReplaceNode(
4119         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4120                                   DL, MVT::Other, Ops));
4121     return true;
4122   }
4123
4124   // The VFP registers are written to by creating SelectionDAG nodes with
4125   // opcodes corresponding to the register that is being written. So we switch
4126   // on the string to find which opcode we need to use.
4127   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4128                     .Case("fpscr", ARM::VMSR)
4129                     .Case("fpexc", ARM::VMSR_FPEXC)
4130                     .Case("fpsid", ARM::VMSR_FPSID)
4131                     .Case("fpinst", ARM::VMSR_FPINST)
4132                     .Case("fpinst2", ARM::VMSR_FPINST2)
4133                     .Default(0);
4134
4135   if (Opcode) {
4136     if (!Subtarget->hasVFP2())
4137       return false;
4138     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4139             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4140     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4141     return true;
4142   }
4143
4144   std::pair<StringRef, StringRef> Fields;
4145   Fields = StringRef(SpecialReg).rsplit('_');
4146   std::string Reg = Fields.first.str();
4147   StringRef Flags = Fields.second;
4148
4149   // If the target was M Class then need to validate the special register value
4150   // and retrieve the mask for use in the instruction node.
4151   if (Subtarget->isMClass()) {
4152     // basepri_max gets split so need to correct Reg and Flags.
4153     if (SpecialReg == "basepri_max") {
4154       Reg = SpecialReg;
4155       Flags = "";
4156     }
4157     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4158     if (SYSmValue == -1)
4159       return false;
4160
4161     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4162                       N->getOperand(2), getAL(CurDAG, DL),
4163                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4164     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4165     return true;
4166   }
4167
4168   // We then check to see if a valid mask can be constructed for one of the
4169   // register string values permitted for the A and R class cores. These values
4170   // are apsr, spsr and cpsr; these are also valid on older cores.
4171   int Mask = getARClassRegisterMask(Reg, Flags);
4172   if (Mask != -1) {
4173     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4174             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4175             N->getOperand(0) };
4176     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4177                                           DL, MVT::Other, Ops));
4178     return true;
4179   }
4180
4181   return false;
4182 }
4183
4184 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4185   std::vector<SDValue> AsmNodeOperands;
4186   unsigned Flag, Kind;
4187   bool Changed = false;
4188   unsigned NumOps = N->getNumOperands();
4189
4190   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4191   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4192   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4193   // respectively. Since there is no constraint to explicitly specify a
4194   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4195   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4196   // them into a GPRPair.
4197
4198   SDLoc dl(N);
4199   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4200                                    : SDValue(nullptr,0);
4201
4202   SmallVector<bool, 8> OpChanged;
4203   // Glue node will be appended late.
4204   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4205     SDValue op = N->getOperand(i);
4206     AsmNodeOperands.push_back(op);
4207
4208     if (i < InlineAsm::Op_FirstOperand)
4209       continue;
4210
4211     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4212       Flag = C->getZExtValue();
4213       Kind = InlineAsm::getKind(Flag);
4214     }
4215     else
4216       continue;
4217
4218     // Immediate operands to inline asm in the SelectionDAG are modeled with
4219     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4220     // the second is a constant with the value of the immediate. If we get here
4221     // and we have a Kind_Imm, skip the next operand, and continue.
4222     if (Kind == InlineAsm::Kind_Imm) {
4223       SDValue op = N->getOperand(++i);
4224       AsmNodeOperands.push_back(op);
4225       continue;
4226     }
4227
4228     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4229     if (NumRegs)
4230       OpChanged.push_back(false);
4231
4232     unsigned DefIdx = 0;
4233     bool IsTiedToChangedOp = false;
4234     // If it's a use that is tied with a previous def, it has no
4235     // reg class constraint.
4236     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4237       IsTiedToChangedOp = OpChanged[DefIdx];
4238
4239     // Memory operands to inline asm in the SelectionDAG are modeled with two
4240     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4241     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4242     // it doesn't get misinterpreted), and continue. We do this here because
4243     // it's important to update the OpChanged array correctly before moving on.
4244     if (Kind == InlineAsm::Kind_Mem) {
4245       SDValue op = N->getOperand(++i);
4246       AsmNodeOperands.push_back(op);
4247       continue;
4248     }
4249
4250     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4251         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4252       continue;
4253
4254     unsigned RC;
4255     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4256     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4257         || NumRegs != 2)
4258       continue;
4259
4260     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4261     SDValue V0 = N->getOperand(i+1);
4262     SDValue V1 = N->getOperand(i+2);
4263     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4264     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4265     SDValue PairedReg;
4266     MachineRegisterInfo &MRI = MF->getRegInfo();
4267
4268     if (Kind == InlineAsm::Kind_RegDef ||
4269         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4270       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4271       // the original GPRs.
4272
4273       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4274       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4275       SDValue Chain = SDValue(N,0);
4276
4277       SDNode *GU = N->getGluedUser();
4278       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4279                                                Chain.getValue(1));
4280
4281       // Extract values from a GPRPair reg and copy to the original GPR reg.
4282       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4283                                                     RegCopy);
4284       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4285                                                     RegCopy);
4286       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4287                                         RegCopy.getValue(1));
4288       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4289
4290       // Update the original glue user.
4291       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4292       Ops.push_back(T1.getValue(1));
4293       CurDAG->UpdateNodeOperands(GU, Ops);
4294     }
4295     else {
4296       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4297       // GPRPair and then pass the GPRPair to the inline asm.
4298       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4299
4300       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4301       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4302                                           Chain.getValue(1));
4303       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4304                                           T0.getValue(1));
4305       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4306
4307       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4308       // i32 VRs of inline asm with it.
4309       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4310       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4311       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4312
4313       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4314       Glue = Chain.getValue(1);
4315     }
4316
4317     Changed = true;
4318
4319     if(PairedReg.getNode()) {
4320       OpChanged[OpChanged.size() -1 ] = true;
4321       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4322       if (IsTiedToChangedOp)
4323         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4324       else
4325         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4326       // Replace the current flag.
4327       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4328           Flag, dl, MVT::i32);
4329       // Add the new register node and skip the original two GPRs.
4330       AsmNodeOperands.push_back(PairedReg);
4331       // Skip the next two GPRs.
4332       i += 2;
4333     }
4334   }
4335
4336   if (Glue.getNode())
4337     AsmNodeOperands.push_back(Glue);
4338   if (!Changed)
4339     return false;
4340
4341   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4342       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4343   New->setNodeId(-1);
4344   ReplaceNode(N, New.getNode());
4345   return true;
4346 }
4347
4348
4349 bool ARMDAGToDAGISel::
4350 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4351                              std::vector<SDValue> &OutOps) {
4352   switch(ConstraintID) {
4353   default:
4354     llvm_unreachable("Unexpected asm memory constraint");
4355   case InlineAsm::Constraint_i:
4356     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4357     //        be an immediate and not a memory constraint.
4358     LLVM_FALLTHROUGH;
4359   case InlineAsm::Constraint_m:
4360   case InlineAsm::Constraint_o:
4361   case InlineAsm::Constraint_Q:
4362   case InlineAsm::Constraint_Um:
4363   case InlineAsm::Constraint_Un:
4364   case InlineAsm::Constraint_Uq:
4365   case InlineAsm::Constraint_Us:
4366   case InlineAsm::Constraint_Ut:
4367   case InlineAsm::Constraint_Uv:
4368   case InlineAsm::Constraint_Uy:
4369     // Require the address to be in a register.  That is safe for all ARM
4370     // variants and it is hard to do anything much smarter without knowing
4371     // how the operand is used.
4372     OutOps.push_back(Op);
4373     return false;
4374   }
4375   return true;
4376 }
4377
4378 /// createARMISelDag - This pass converts a legalized DAG into a
4379 /// ARM-specific DAG, ready for instruction scheduling.
4380 ///
4381 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4382                                      CodeGenOpt::Level OptLevel) {
4383   return new ARMDAGToDAGISel(TM, OptLevel);
4384 }