contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// Try to select SBFX/UBFX instructions for ARM.
 232   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 233
 234   // Select special operations if node forms integer ABS pattern
 235   bool tryABSOp(SDNode *N);
 236
 237   bool tryReadRegister(SDNode *N);
 238   bool tryWriteRegister(SDNode *N);
 239
 240   bool tryInlineAsm(SDNode *N);
 241
 242   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 243
 244   void SelectCMP_SWAP(SDNode *N);
 245
 246   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 247   /// inline asm expressions.
 248   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 249                                     std::vector<SDValue> &OutOps) override;
 250
 251   // Form pairs of consecutive R, S, D, or Q registers.
 252   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 253   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 254   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 255   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 256
 257   // Form sequences of 4 consecutive S, D, or Q registers.
 258   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 259   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 260   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 261
 262   // Get the alignment operand for a NEON VLD or VST instruction.
 263   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 264                         bool is64BitVector);
 265
 266   /// Returns the number of instructions required to materialize the given
 267   /// constant in a register, or 3 if a literal pool load is needed.
 268   unsigned ConstantMaterializationCost(unsigned Val) const;
 269
 270   /// Checks if N is a multiplication by a constant where we can extract out a
 271   /// power of two from the constant so that it can be used in a shift, but only
 272   /// if it simplifies the materialization of the constant. Returns true if it
 273   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 274   /// out and to NewMulConst the new constant to be multiplied by.
 275   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 276                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 277
 278   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 279   /// selected when N would have been selected.
 280   void replaceDAGValue(const SDValue &N, SDValue M);
 281 };
 282 }
 283
 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 285 /// operand. If so Imm will receive the 32-bit value.
 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 287   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 288     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 289     return true;
 290   }
 291   return false;
 292 }
 293
 294 // isInt32Immediate - This method tests to see if a constant operand.
 295 // If so Imm will receive the 32 bit value.
 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 297   return isInt32Immediate(N.getNode(), Imm);
 298 }
 299
 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 301 // opcode and that it has a immediate integer right operand.
 302 // If so Imm will receive the 32 bit value.
 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 304   return N->getOpcode() == Opc &&
 305          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 306 }
 307
 308 /// \brief Check whether a particular node is a constant value representable as
 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 310 ///
 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 312 static bool isScaledConstantInRange(SDValue Node, int Scale,
 313                                     int RangeMin, int RangeMax,
 314                                     int &ScaledConstant) {
 315   assert(Scale > 0 && "Invalid scale!");
 316
 317   // Check that this is a constant.
 318   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 319   if (!C)
 320     return false;
 321
 322   ScaledConstant = (int) C->getZExtValue();
 323   if ((ScaledConstant % Scale) != 0)
 324     return false;
 325
 326   ScaledConstant /= Scale;
 327   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 328 }
 329
 330 void ARMDAGToDAGISel::PreprocessISelDAG() {
 331   if (!Subtarget->hasV6T2Ops())
 332     return;
 333
 334   bool isThumb2 = Subtarget->isThumb();
 335   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 336        E = CurDAG->allnodes_end(); I != E; ) {
 337     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 338
 339     if (N->getOpcode() != ISD::ADD)
 340       continue;
 341
 342     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 343     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 344     // trailing zeros, e.g. 1020.
 345     // Transform the expression to
 346     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 347     // of trailing zeros of c2. The left shift would be folded as an shifter
 348     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 349     // node (UBFX).
 350
 351     SDValue N0 = N->getOperand(0);
 352     SDValue N1 = N->getOperand(1);
 353     unsigned And_imm = 0;
 354     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 355       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 356         std::swap(N0, N1);
 357     }
 358     if (!And_imm)
 359       continue;
 360
 361     // Check if the AND mask is an immediate of the form: 000.....1111111100
 362     unsigned TZ = countTrailingZeros(And_imm);
 363     if (TZ != 1 && TZ != 2)
 364       // Be conservative here. Shifter operands aren't always free. e.g. On
 365       // Swift, left shifter operand of 1 / 2 for free but others are not.
 366       // e.g.
 367       //  ubfx   r3, r1, #16, #8
 368       //  ldr.w  r3, [r0, r3, lsl #2]
 369       // vs.
 370       //  mov.w  r9, #1020
 371       //  and.w  r2, r9, r1, lsr #14
 372       //  ldr    r2, [r0, r2]
 373       continue;
 374     And_imm >>= TZ;
 375     if (And_imm & (And_imm + 1))
 376       continue;
 377
 378     // Look for (and (srl X, c1), c2).
 379     SDValue Srl = N1.getOperand(0);
 380     unsigned Srl_imm = 0;
 381     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 382         (Srl_imm <= 2))
 383       continue;
 384
 385     // Make sure first operand is not a shifter operand which would prevent
 386     // folding of the left shift.
 387     SDValue CPTmp0;
 388     SDValue CPTmp1;
 389     SDValue CPTmp2;
 390     if (isThumb2) {
 391       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 392         continue;
 393     } else {
 394       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 395           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 396         continue;
 397     }
 398
 399     // Now make the transformation.
 400     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 401                           Srl.getOperand(0),
 402                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 403                                               MVT::i32));
 404     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 405                          Srl,
 406                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!Subtarget->hasVMLxHazards())
 421     return true;
 422
 423   if (!N->hasOneUse())
 424     return false;
 425
 426   SDNode *Use = *N->use_begin();
 427   if (Use->getOpcode() == ISD::CopyToReg)
 428     return true;
 429   if (Use->isMachineOpcode()) {
 430     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 431         CurDAG->getSubtarget().getInstrInfo());
 432
 433     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 434     if (MCID.mayStore())
 435       return true;
 436     unsigned Opcode = MCID.getOpcode();
 437     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 438       return true;
 439     // vmlx feeding into another vmlx. We actually want to unfold
 440     // the use later in the MLxExpansion pass. e.g.
 441     // vmla
 442     // vmla (stall 8 cycles)
 443     //
 444     // vmul (5 cycles)
 445     // vadd (5 cycles)
 446     // vmla
 447     // This adds up to about 18 - 19 cycles.
 448     //
 449     // vmla
 450     // vmul (stall 4 cycles)
 451     // vadd adds up to about 14 cycles.
 452     return TII->isFpMLxInstruction(Opcode);
 453   }
 454
 455   return false;
 456 }
 457
 458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 459                                             ARM_AM::ShiftOpc ShOpcVal,
 460                                             unsigned ShAmt) {
 461   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 462     return true;
 463   if (Shift.hasOneUse())
 464     return true;
 465   // R << 2 is free.
 466   return ShOpcVal == ARM_AM::lsl &&
 467          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 468 }
 469
 470 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 471   if (Subtarget->isThumb()) {
 472     if (Val <= 255) return 1;                               // MOV
 473     if (Subtarget->hasV6T2Ops() &&
 474         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 475       return 1; // MOVW
 476     if (Val <= 510) return 2;                               // MOV + ADDi8
 477     if (~Val <= 255) return 2;                              // MOV + MVN
 478     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 479   } else {
 480     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 481     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 482     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 483     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 484   }
 485   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 486   return 3; // Literal pool load
 487 }
 488
 489 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 490                                              unsigned MaxShift,
 491                                              unsigned &PowerOfTwo,
 492                                              SDValue &NewMulConst) const {
 493   assert(N.getOpcode() == ISD::MUL);
 494   assert(MaxShift > 0);
 495
 496   // If the multiply is used in more than one place then changing the constant
 497   // will make other uses incorrect, so don't.
 498   if (!N.hasOneUse()) return false;
 499   // Check if the multiply is by a constant
 500   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 501   if (!MulConst) return false;
 502   // If the constant is used in more than one place then modifying it will mean
 503   // we need to materialize two constants instead of one, which is a bad idea.
 504   if (!MulConst->hasOneUse()) return false;
 505   unsigned MulConstVal = MulConst->getZExtValue();
 506   if (MulConstVal == 0) return false;
 507
 508   // Find the largest power of 2 that MulConstVal is a multiple of
 509   PowerOfTwo = MaxShift;
 510   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 511     --PowerOfTwo;
 512     if (PowerOfTwo == 0) return false;
 513   }
 514
 515   // Only optimise if the new cost is better
 516   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 517   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 518   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 519   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 520   return NewCost < OldCost;
 521 }
 522
 523 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 524   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 525   CurDAG->ReplaceAllUsesWith(N, M);
 526 }
 527
 528 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 529                                               SDValue &BaseReg,
 530                                               SDValue &Opc,
 531                                               bool CheckProfitability) {
 532   if (DisableShifterOp)
 533     return false;
 534
 535   // If N is a multiply-by-constant and it's profitable to extract a shift and
 536   // use it in a shifted operand do so.
 537   if (N.getOpcode() == ISD::MUL) {
 538     unsigned PowerOfTwo = 0;
 539     SDValue NewMulConst;
 540     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 541       HandleSDNode Handle(N);
 542       SDLoc Loc(N);
 543       replaceDAGValue(N.getOperand(1), NewMulConst);
 544       BaseReg = Handle.getValue();
 545       Opc = CurDAG->getTargetConstant(
 546           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 547       return true;
 548     }
 549   }
 550
 551   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 552
 553   // Don't match base register only case. That is matched to a separate
 554   // lower complexity pattern with explicit register operand.
 555   if (ShOpcVal == ARM_AM::no_shift) return false;
 556
 557   BaseReg = N.getOperand(0);
 558   unsigned ShImmVal = 0;
 559   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 560   if (!RHS) return false;
 561   ShImmVal = RHS->getZExtValue() & 31;
 562   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 563                                   SDLoc(N), MVT::i32);
 564   return true;
 565 }
 566
 567 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 568                                               SDValue &BaseReg,
 569                                               SDValue &ShReg,
 570                                               SDValue &Opc,
 571                                               bool CheckProfitability) {
 572   if (DisableShifterOp)
 573     return false;
 574
 575   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 576
 577   // Don't match base register only case. That is matched to a separate
 578   // lower complexity pattern with explicit register operand.
 579   if (ShOpcVal == ARM_AM::no_shift) return false;
 580
 581   BaseReg = N.getOperand(0);
 582   unsigned ShImmVal = 0;
 583   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 584   if (RHS) return false;
 585
 586   ShReg = N.getOperand(1);
 587   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 588     return false;
 589   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 590                                   SDLoc(N), MVT::i32);
 591   return true;
 592 }
 593
 594
 595 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 596                                           SDValue &Base,
 597                                           SDValue &OffImm) {
 598   // Match simple R + imm12 operands.
 599
 600   // Base only.
 601   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 602       !CurDAG->isBaseWithConstantOffset(N)) {
 603     if (N.getOpcode() == ISD::FrameIndex) {
 604       // Match frame index.
 605       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 606       Base = CurDAG->getTargetFrameIndex(
 607           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 608       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 609       return true;
 610     }
 611
 612     if (N.getOpcode() == ARMISD::Wrapper &&
 613         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 614         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 615         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 616       Base = N.getOperand(0);
 617     } else
 618       Base = N;
 619     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 620     return true;
 621   }
 622
 623   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 624     int RHSC = (int)RHS->getSExtValue();
 625     if (N.getOpcode() == ISD::SUB)
 626       RHSC = -RHSC;
 627
 628     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 629       Base   = N.getOperand(0);
 630       if (Base.getOpcode() == ISD::FrameIndex) {
 631         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 632         Base = CurDAG->getTargetFrameIndex(
 633             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 634       }
 635       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 636       return true;
 637     }
 638   }
 639
 640   // Base only.
 641   Base = N;
 642   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 643   return true;
 644 }
 645
 646
 647
 648 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 649                                       SDValue &Opc) {
 650   if (N.getOpcode() == ISD::MUL &&
 651       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 652     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 653       // X * [3,5,9] -> X + X * [2,4,8] etc.
 654       int RHSC = (int)RHS->getZExtValue();
 655       if (RHSC & 1) {
 656         RHSC = RHSC & ~1;
 657         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 658         if (RHSC < 0) {
 659           AddSub = ARM_AM::sub;
 660           RHSC = - RHSC;
 661         }
 662         if (isPowerOf2_32(RHSC)) {
 663           unsigned ShAmt = Log2_32(RHSC);
 664           Base = Offset = N.getOperand(0);
 665           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 666                                                             ARM_AM::lsl),
 667                                           SDLoc(N), MVT::i32);
 668           return true;
 669         }
 670       }
 671     }
 672   }
 673
 674   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 675       // ISD::OR that is equivalent to an ISD::ADD.
 676       !CurDAG->isBaseWithConstantOffset(N))
 677     return false;
 678
 679   // Leave simple R +/- imm12 operands for LDRi12
 680   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 681     int RHSC;
 682     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 683                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 684       return false;
 685   }
 686
 687   // Otherwise this is R +/- [possibly shifted] R.
 688   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 689   ARM_AM::ShiftOpc ShOpcVal =
 690     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 691   unsigned ShAmt = 0;
 692
 693   Base   = N.getOperand(0);
 694   Offset = N.getOperand(1);
 695
 696   if (ShOpcVal != ARM_AM::no_shift) {
 697     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 698     // it.
 699     if (ConstantSDNode *Sh =
 700            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 701       ShAmt = Sh->getZExtValue();
 702       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 703         Offset = N.getOperand(1).getOperand(0);
 704       else {
 705         ShAmt = 0;
 706         ShOpcVal = ARM_AM::no_shift;
 707       }
 708     } else {
 709       ShOpcVal = ARM_AM::no_shift;
 710     }
 711   }
 712
 713   // Try matching (R shl C) + (R).
 714   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 715       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 716         N.getOperand(0).hasOneUse())) {
 717     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 718     if (ShOpcVal != ARM_AM::no_shift) {
 719       // Check to see if the RHS of the shift is a constant, if not, we can't
 720       // fold it.
 721       if (ConstantSDNode *Sh =
 722           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 723         ShAmt = Sh->getZExtValue();
 724         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 725           Offset = N.getOperand(0).getOperand(0);
 726           Base = N.getOperand(1);
 727         } else {
 728           ShAmt = 0;
 729           ShOpcVal = ARM_AM::no_shift;
 730         }
 731       } else {
 732         ShOpcVal = ARM_AM::no_shift;
 733       }
 734     }
 735   }
 736
 737   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 738   // and use it in a shifted operand do so.
 739   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 740     unsigned PowerOfTwo = 0;
 741     SDValue NewMulConst;
 742     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 743       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 744       ShAmt = PowerOfTwo;
 745       ShOpcVal = ARM_AM::lsl;
 746     }
 747   }
 748
 749   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 750                                   SDLoc(N), MVT::i32);
 751   return true;
 752 }
 753
 754
 755 //-----
 756
 757 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 758                                                      SDValue &Base,
 759                                                      SDValue &Offset,
 760                                                      SDValue &Opc) {
 761   if (N.getOpcode() == ISD::MUL &&
 762       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 763     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 764       // X * [3,5,9] -> X + X * [2,4,8] etc.
 765       int RHSC = (int)RHS->getZExtValue();
 766       if (RHSC & 1) {
 767         RHSC = RHSC & ~1;
 768         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 769         if (RHSC < 0) {
 770           AddSub = ARM_AM::sub;
 771           RHSC = - RHSC;
 772         }
 773         if (isPowerOf2_32(RHSC)) {
 774           unsigned ShAmt = Log2_32(RHSC);
 775           Base = Offset = N.getOperand(0);
 776           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 777                                                             ARM_AM::lsl),
 778                                           SDLoc(N), MVT::i32);
 779           return AM2_SHOP;
 780         }
 781       }
 782     }
 783   }
 784
 785   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 786       // ISD::OR that is equivalent to an ADD.
 787       !CurDAG->isBaseWithConstantOffset(N)) {
 788     Base = N;
 789     if (N.getOpcode() == ISD::FrameIndex) {
 790       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 791       Base = CurDAG->getTargetFrameIndex(
 792           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 793     } else if (N.getOpcode() == ARMISD::Wrapper &&
 794                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 795                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 796                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 797       Base = N.getOperand(0);
 798     }
 799     Offset = CurDAG->getRegister(0, MVT::i32);
 800     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 801                                                       ARM_AM::no_shift),
 802                                     SDLoc(N), MVT::i32);
 803     return AM2_BASE;
 804   }
 805
 806   // Match simple R +/- imm12 operands.
 807   if (N.getOpcode() != ISD::SUB) {
 808     int RHSC;
 809     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 810                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 811       Base = N.getOperand(0);
 812       if (Base.getOpcode() == ISD::FrameIndex) {
 813         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 814         Base = CurDAG->getTargetFrameIndex(
 815             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 816       }
 817       Offset = CurDAG->getRegister(0, MVT::i32);
 818
 819       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 820       if (RHSC < 0) {
 821         AddSub = ARM_AM::sub;
 822         RHSC = - RHSC;
 823       }
 824       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 825                                                         ARM_AM::no_shift),
 826                                       SDLoc(N), MVT::i32);
 827       return AM2_BASE;
 828     }
 829   }
 830
 831   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 832     // Compute R +/- (R << N) and reuse it.
 833     Base = N;
 834     Offset = CurDAG->getRegister(0, MVT::i32);
 835     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 836                                                       ARM_AM::no_shift),
 837                                     SDLoc(N), MVT::i32);
 838     return AM2_BASE;
 839   }
 840
 841   // Otherwise this is R +/- [possibly shifted] R.
 842   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 843   ARM_AM::ShiftOpc ShOpcVal =
 844     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 845   unsigned ShAmt = 0;
 846
 847   Base   = N.getOperand(0);
 848   Offset = N.getOperand(1);
 849
 850   if (ShOpcVal != ARM_AM::no_shift) {
 851     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 852     // it.
 853     if (ConstantSDNode *Sh =
 854            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 855       ShAmt = Sh->getZExtValue();
 856       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 857         Offset = N.getOperand(1).getOperand(0);
 858       else {
 859         ShAmt = 0;
 860         ShOpcVal = ARM_AM::no_shift;
 861       }
 862     } else {
 863       ShOpcVal = ARM_AM::no_shift;
 864     }
 865   }
 866
 867   // Try matching (R shl C) + (R).
 868   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 869       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 870         N.getOperand(0).hasOneUse())) {
 871     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 872     if (ShOpcVal != ARM_AM::no_shift) {
 873       // Check to see if the RHS of the shift is a constant, if not, we can't
 874       // fold it.
 875       if (ConstantSDNode *Sh =
 876           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 877         ShAmt = Sh->getZExtValue();
 878         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 879           Offset = N.getOperand(0).getOperand(0);
 880           Base = N.getOperand(1);
 881         } else {
 882           ShAmt = 0;
 883           ShOpcVal = ARM_AM::no_shift;
 884         }
 885       } else {
 886         ShOpcVal = ARM_AM::no_shift;
 887       }
 888     }
 889   }
 890
 891   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 892                                   SDLoc(N), MVT::i32);
 893   return AM2_SHOP;
 894 }
 895
 896 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 897                                             SDValue &Offset, SDValue &Opc) {
 898   unsigned Opcode = Op->getOpcode();
 899   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 900     ? cast<LoadSDNode>(Op)->getAddressingMode()
 901     : cast<StoreSDNode>(Op)->getAddressingMode();
 902   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 903     ? ARM_AM::add : ARM_AM::sub;
 904   int Val;
 905   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 906     return false;
 907
 908   Offset = N;
 909   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 910   unsigned ShAmt = 0;
 911   if (ShOpcVal != ARM_AM::no_shift) {
 912     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 913     // it.
 914     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 915       ShAmt = Sh->getZExtValue();
 916       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 917         Offset = N.getOperand(0);
 918       else {
 919         ShAmt = 0;
 920         ShOpcVal = ARM_AM::no_shift;
 921       }
 922     } else {
 923       ShOpcVal = ARM_AM::no_shift;
 924     }
 925   }
 926
 927   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 928                                   SDLoc(N), MVT::i32);
 929   return true;
 930 }
 931
 932 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 933                                             SDValue &Offset, SDValue &Opc) {
 934   unsigned Opcode = Op->getOpcode();
 935   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 936     ? cast<LoadSDNode>(Op)->getAddressingMode()
 937     : cast<StoreSDNode>(Op)->getAddressingMode();
 938   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 939     ? ARM_AM::add : ARM_AM::sub;
 940   int Val;
 941   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 942     if (AddSub == ARM_AM::sub) Val *= -1;
 943     Offset = CurDAG->getRegister(0, MVT::i32);
 944     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 945     return true;
 946   }
 947
 948   return false;
 949 }
 950
 951
 952 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 953                                             SDValue &Offset, SDValue &Opc) {
 954   unsigned Opcode = Op->getOpcode();
 955   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 956     ? cast<LoadSDNode>(Op)->getAddressingMode()
 957     : cast<StoreSDNode>(Op)->getAddressingMode();
 958   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 959     ? ARM_AM::add : ARM_AM::sub;
 960   int Val;
 961   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 962     Offset = CurDAG->getRegister(0, MVT::i32);
 963     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 964                                                       ARM_AM::no_shift),
 965                                     SDLoc(Op), MVT::i32);
 966     return true;
 967   }
 968
 969   return false;
 970 }
 971
 972 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 973   Base = N;
 974   return true;
 975 }
 976
 977 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 978                                       SDValue &Base, SDValue &Offset,
 979                                       SDValue &Opc) {
 980   if (N.getOpcode() == ISD::SUB) {
 981     // X - C  is canonicalize to X + -C, no need to handle it here.
 982     Base = N.getOperand(0);
 983     Offset = N.getOperand(1);
 984     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 985                                     MVT::i32);
 986     return true;
 987   }
 988
 989   if (!CurDAG->isBaseWithConstantOffset(N)) {
 990     Base = N;
 991     if (N.getOpcode() == ISD::FrameIndex) {
 992       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 993       Base = CurDAG->getTargetFrameIndex(
 994           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 995     }
 996     Offset = CurDAG->getRegister(0, MVT::i32);
 997     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 998                                     MVT::i32);
 999     return true;
1000   }
1001
1002   // If the RHS is +/- imm8, fold into addr mode.
1003   int RHSC;
1004   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1005                               -256 + 1, 256, RHSC)) { // 8 bits.
1006     Base = N.getOperand(0);
1007     if (Base.getOpcode() == ISD::FrameIndex) {
1008       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1009       Base = CurDAG->getTargetFrameIndex(
1010           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1011     }
1012     Offset = CurDAG->getRegister(0, MVT::i32);
1013
1014     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1015     if (RHSC < 0) {
1016       AddSub = ARM_AM::sub;
1017       RHSC = -RHSC;
1018     }
1019     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1020                                     MVT::i32);
1021     return true;
1022   }
1023
1024   Base = N.getOperand(0);
1025   Offset = N.getOperand(1);
1026   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1027                                   MVT::i32);
1028   return true;
1029 }
1030
1031 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1032                                             SDValue &Offset, SDValue &Opc) {
1033   unsigned Opcode = Op->getOpcode();
1034   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1035     ? cast<LoadSDNode>(Op)->getAddressingMode()
1036     : cast<StoreSDNode>(Op)->getAddressingMode();
1037   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1038     ? ARM_AM::add : ARM_AM::sub;
1039   int Val;
1040   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1041     Offset = CurDAG->getRegister(0, MVT::i32);
1042     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1043                                     MVT::i32);
1044     return true;
1045   }
1046
1047   Offset = N;
1048   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1049                                   MVT::i32);
1050   return true;
1051 }
1052
1053 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1054                                       SDValue &Base, SDValue &Offset) {
1055   if (!CurDAG->isBaseWithConstantOffset(N)) {
1056     Base = N;
1057     if (N.getOpcode() == ISD::FrameIndex) {
1058       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1059       Base = CurDAG->getTargetFrameIndex(
1060           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1061     } else if (N.getOpcode() == ARMISD::Wrapper &&
1062                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1063                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1064                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1065       Base = N.getOperand(0);
1066     }
1067     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1068                                        SDLoc(N), MVT::i32);
1069     return true;
1070   }
1071
1072   // If the RHS is +/- imm8, fold into addr mode.
1073   int RHSC;
1074   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1075                               -256 + 1, 256, RHSC)) {
1076     Base = N.getOperand(0);
1077     if (Base.getOpcode() == ISD::FrameIndex) {
1078       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1079       Base = CurDAG->getTargetFrameIndex(
1080           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1081     }
1082
1083     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1084     if (RHSC < 0) {
1085       AddSub = ARM_AM::sub;
1086       RHSC = -RHSC;
1087     }
1088     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1089                                        SDLoc(N), MVT::i32);
1090     return true;
1091   }
1092
1093   Base = N;
1094   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1095                                      SDLoc(N), MVT::i32);
1096   return true;
1097 }
1098
1099 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1100                                       SDValue &Align) {
1101   Addr = N;
1102
1103   unsigned Alignment = 0;
1104
1105   MemSDNode *MemN = cast<MemSDNode>(Parent);
1106
1107   if (isa<LSBaseSDNode>(MemN) ||
1108       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1109         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1110        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1111     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1112     // The maximum alignment is equal to the memory size being referenced.
1113     unsigned MMOAlign = MemN->getAlignment();
1114     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1115     if (MMOAlign >= MemSize && MemSize > 1)
1116       Alignment = MemSize;
1117   } else {
1118     // All other uses of addrmode6 are for intrinsics.  For now just record
1119     // the raw alignment value; it will be refined later based on the legal
1120     // alignment operands for the intrinsic.
1121     Alignment = MemN->getAlignment();
1122   }
1123
1124   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1125   return true;
1126 }
1127
1128 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1129                                             SDValue &Offset) {
1130   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1131   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1132   if (AM != ISD::POST_INC)
1133     return false;
1134   Offset = N;
1135   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1136     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1137       Offset = CurDAG->getRegister(0, MVT::i32);
1138   }
1139   return true;
1140 }
1141
1142 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1143                                        SDValue &Offset, SDValue &Label) {
1144   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1145     Offset = N.getOperand(0);
1146     SDValue N1 = N.getOperand(1);
1147     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1148                                       SDLoc(N), MVT::i32);
1149     return true;
1150   }
1151
1152   return false;
1153 }
1154
1155
1156 //===----------------------------------------------------------------------===//
1157 //                         Thumb Addressing Modes
1158 //===----------------------------------------------------------------------===//
1159
1160 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1161                                             SDValue &Base, SDValue &Offset){
1162   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1163     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1164     if (!NC || !NC->isNullValue())
1165       return false;
1166
1167     Base = Offset = N;
1168     return true;
1169   }
1170
1171   Base = N.getOperand(0);
1172   Offset = N.getOperand(1);
1173   return true;
1174 }
1175
1176 bool
1177 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1178                                           SDValue &Base, SDValue &OffImm) {
1179   if (!CurDAG->isBaseWithConstantOffset(N)) {
1180     if (N.getOpcode() == ISD::ADD) {
1181       return false; // We want to select register offset instead
1182     } else if (N.getOpcode() == ARMISD::Wrapper &&
1183         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1184         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1185         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1186         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1187       Base = N.getOperand(0);
1188     } else {
1189       Base = N;
1190     }
1191
1192     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1193     return true;
1194   }
1195
1196   // If the RHS is + imm5 * scale, fold into addr mode.
1197   int RHSC;
1198   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1199     Base = N.getOperand(0);
1200     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1201     return true;
1202   }
1203
1204   // Offset is too large, so use register offset instead.
1205   return false;
1206 }
1207
1208 bool
1209 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1210                                            SDValue &OffImm) {
1211   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1212 }
1213
1214 bool
1215 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1216                                            SDValue &OffImm) {
1217   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1218 }
1219
1220 bool
1221 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1222                                            SDValue &OffImm) {
1223   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1224 }
1225
1226 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1227                                             SDValue &Base, SDValue &OffImm) {
1228   if (N.getOpcode() == ISD::FrameIndex) {
1229     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1230     // Only multiples of 4 are allowed for the offset, so the frame object
1231     // alignment must be at least 4.
1232     MachineFrameInfo &MFI = MF->getFrameInfo();
1233     if (MFI.getObjectAlignment(FI) < 4)
1234       MFI.setObjectAlignment(FI, 4);
1235     Base = CurDAG->getTargetFrameIndex(
1236         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1237     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238     return true;
1239   }
1240
1241   if (!CurDAG->isBaseWithConstantOffset(N))
1242     return false;
1243
1244   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1245   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1246       (LHSR && LHSR->getReg() == ARM::SP)) {
1247     // If the RHS is + imm8 * scale, fold into addr mode.
1248     int RHSC;
1249     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1250       Base = N.getOperand(0);
1251       if (Base.getOpcode() == ISD::FrameIndex) {
1252         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1254         // indexed by the LHS must be 4-byte aligned.
1255         MachineFrameInfo &MFI = MF->getFrameInfo();
1256         if (MFI.getObjectAlignment(FI) < 4)
1257           MFI.setObjectAlignment(FI, 4);
1258         Base = CurDAG->getTargetFrameIndex(
1259             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1260       }
1261       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1262       return true;
1263     }
1264   }
1265
1266   return false;
1267 }
1268
1269
1270 //===----------------------------------------------------------------------===//
1271 //                        Thumb 2 Addressing Modes
1272 //===----------------------------------------------------------------------===//
1273
1274
1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1276                                             SDValue &Base, SDValue &OffImm) {
1277   // Match simple R + imm12 operands.
1278
1279   // Base only.
1280   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1281       !CurDAG->isBaseWithConstantOffset(N)) {
1282     if (N.getOpcode() == ISD::FrameIndex) {
1283       // Match frame index.
1284       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1285       Base = CurDAG->getTargetFrameIndex(
1286           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1287       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1288       return true;
1289     }
1290
1291     if (N.getOpcode() == ARMISD::Wrapper &&
1292         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1293         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1295       Base = N.getOperand(0);
1296       if (Base.getOpcode() == ISD::TargetConstantPool)
1297         return false;  // We want to select t2LDRpci instead.
1298     } else
1299       Base = N;
1300     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301     return true;
1302   }
1303
1304   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305     if (SelectT2AddrModeImm8(N, Base, OffImm))
1306       // Let t2LDRi8 handle (R - imm8).
1307       return false;
1308
1309     int RHSC = (int)RHS->getZExtValue();
1310     if (N.getOpcode() == ISD::SUB)
1311       RHSC = -RHSC;
1312
1313     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1314       Base   = N.getOperand(0);
1315       if (Base.getOpcode() == ISD::FrameIndex) {
1316         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1317         Base = CurDAG->getTargetFrameIndex(
1318             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1319       }
1320       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1321       return true;
1322     }
1323   }
1324
1325   // Base only.
1326   Base = N;
1327   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1328   return true;
1329 }
1330
1331 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1332                                            SDValue &Base, SDValue &OffImm) {
1333   // Match simple R - imm8 operands.
1334   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1335       !CurDAG->isBaseWithConstantOffset(N))
1336     return false;
1337
1338   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1339     int RHSC = (int)RHS->getSExtValue();
1340     if (N.getOpcode() == ISD::SUB)
1341       RHSC = -RHSC;
1342
1343     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1344       Base = N.getOperand(0);
1345       if (Base.getOpcode() == ISD::FrameIndex) {
1346         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1347         Base = CurDAG->getTargetFrameIndex(
1348             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1349       }
1350       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1351       return true;
1352     }
1353   }
1354
1355   return false;
1356 }
1357
1358 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1359                                                  SDValue &OffImm){
1360   unsigned Opcode = Op->getOpcode();
1361   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1362     ? cast<LoadSDNode>(Op)->getAddressingMode()
1363     : cast<StoreSDNode>(Op)->getAddressingMode();
1364   int RHSC;
1365   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1366     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1367       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1368       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1369     return true;
1370   }
1371
1372   return false;
1373 }
1374
1375 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1376                                             SDValue &Base,
1377                                             SDValue &OffReg, SDValue &ShImm) {
1378   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1379   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1380     return false;
1381
1382   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1383   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1384     int RHSC = (int)RHS->getZExtValue();
1385     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1386       return false;
1387     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1388       return false;
1389   }
1390
1391   // Look for (R + R) or (R + (R << [1,2,3])).
1392   unsigned ShAmt = 0;
1393   Base   = N.getOperand(0);
1394   OffReg = N.getOperand(1);
1395
1396   // Swap if it is ((R << c) + R).
1397   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1398   if (ShOpcVal != ARM_AM::lsl) {
1399     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1400     if (ShOpcVal == ARM_AM::lsl)
1401       std::swap(Base, OffReg);
1402   }
1403
1404   if (ShOpcVal == ARM_AM::lsl) {
1405     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1406     // it.
1407     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1408       ShAmt = Sh->getZExtValue();
1409       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1410         OffReg = OffReg.getOperand(0);
1411       else {
1412         ShAmt = 0;
1413       }
1414     }
1415   }
1416
1417   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1418   // and use it in a shifted operand do so.
1419   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1420     unsigned PowerOfTwo = 0;
1421     SDValue NewMulConst;
1422     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1423       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1424       ShAmt = PowerOfTwo;
1425     }
1426   }
1427
1428   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1429
1430   return true;
1431 }
1432
1433 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1434                                                 SDValue &OffImm) {
1435   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1436   // instructions.
1437   Base = N;
1438   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1439
1440   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1441     return true;
1442
1443   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1444   if (!RHS)
1445     return true;
1446
1447   uint32_t RHSC = (int)RHS->getZExtValue();
1448   if (RHSC > 1020 || RHSC % 4 != 0)
1449     return true;
1450
1451   Base = N.getOperand(0);
1452   if (Base.getOpcode() == ISD::FrameIndex) {
1453     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1454     Base = CurDAG->getTargetFrameIndex(
1455         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1456   }
1457
1458   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1459   return true;
1460 }
1461
1462 //===--------------------------------------------------------------------===//
1463
1464 /// getAL - Returns a ARMCC::AL immediate node.
1465 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1466   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1467 }
1468
1469 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1470   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1471   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1472   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1473 }
1474
1475 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1476   LoadSDNode *LD = cast<LoadSDNode>(N);
1477   ISD::MemIndexedMode AM = LD->getAddressingMode();
1478   if (AM == ISD::UNINDEXED)
1479     return false;
1480
1481   EVT LoadedVT = LD->getMemoryVT();
1482   SDValue Offset, AMOpc;
1483   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1484   unsigned Opcode = 0;
1485   bool Match = false;
1486   if (LoadedVT == MVT::i32 && isPre &&
1487       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1488     Opcode = ARM::LDR_PRE_IMM;
1489     Match = true;
1490   } else if (LoadedVT == MVT::i32 && !isPre &&
1491       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1492     Opcode = ARM::LDR_POST_IMM;
1493     Match = true;
1494   } else if (LoadedVT == MVT::i32 &&
1495       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1496     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1497     Match = true;
1498
1499   } else if (LoadedVT == MVT::i16 &&
1500              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1501     Match = true;
1502     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1503       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1504       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1505   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1506     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1507       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1508         Match = true;
1509         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1510       }
1511     } else {
1512       if (isPre &&
1513           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1514         Match = true;
1515         Opcode = ARM::LDRB_PRE_IMM;
1516       } else if (!isPre &&
1517                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1518         Match = true;
1519         Opcode = ARM::LDRB_POST_IMM;
1520       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1521         Match = true;
1522         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1523       }
1524     }
1525   }
1526
1527   if (Match) {
1528     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1529       SDValue Chain = LD->getChain();
1530       SDValue Base = LD->getBasePtr();
1531       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1532                        CurDAG->getRegister(0, MVT::i32), Chain };
1533       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1534                                            MVT::Other, Ops);
1535       transferMemOperands(N, New);
1536       ReplaceNode(N, New);
1537       return true;
1538     } else {
1539       SDValue Chain = LD->getChain();
1540       SDValue Base = LD->getBasePtr();
1541       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1542                        CurDAG->getRegister(0, MVT::i32), Chain };
1543       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1544                                            MVT::Other, Ops);
1545       transferMemOperands(N, New);
1546       ReplaceNode(N, New);
1547       return true;
1548     }
1549   }
1550
1551   return false;
1552 }
1553
1554 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1555   LoadSDNode *LD = cast<LoadSDNode>(N);
1556   EVT LoadedVT = LD->getMemoryVT();
1557   ISD::MemIndexedMode AM = LD->getAddressingMode();
1558   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1559       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1560     return false;
1561
1562   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1563   if (!COffs || COffs->getZExtValue() != 4)
1564     return false;
1565
1566   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1567   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1568   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1569   // ISel.
1570   SDValue Chain = LD->getChain();
1571   SDValue Base = LD->getBasePtr();
1572   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1573                    CurDAG->getRegister(0, MVT::i32), Chain };
1574   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1575                                        MVT::i32, MVT::Other, Ops);
1576   transferMemOperands(N, New);
1577   ReplaceNode(N, New);
1578   return true;
1579 }
1580
1581 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1582   LoadSDNode *LD = cast<LoadSDNode>(N);
1583   ISD::MemIndexedMode AM = LD->getAddressingMode();
1584   if (AM == ISD::UNINDEXED)
1585     return false;
1586
1587   EVT LoadedVT = LD->getMemoryVT();
1588   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1589   SDValue Offset;
1590   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1591   unsigned Opcode = 0;
1592   bool Match = false;
1593   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1594     switch (LoadedVT.getSimpleVT().SimpleTy) {
1595     case MVT::i32:
1596       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1597       break;
1598     case MVT::i16:
1599       if (isSExtLd)
1600         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1601       else
1602         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1603       break;
1604     case MVT::i8:
1605     case MVT::i1:
1606       if (isSExtLd)
1607         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1608       else
1609         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1610       break;
1611     default:
1612       return false;
1613     }
1614     Match = true;
1615   }
1616
1617   if (Match) {
1618     SDValue Chain = LD->getChain();
1619     SDValue Base = LD->getBasePtr();
1620     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1621                      CurDAG->getRegister(0, MVT::i32), Chain };
1622     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1623                                          MVT::Other, Ops);
1624     transferMemOperands(N, New);
1625     ReplaceNode(N, New);
1626     return true;
1627   }
1628
1629   return false;
1630 }
1631
1632 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1633 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1634   SDLoc dl(V0.getNode());
1635   SDValue RegClass =
1636     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1637   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1638   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1639   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1640   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1641 }
1642
1643 /// \brief Form a D register from a pair of S registers.
1644 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1645   SDLoc dl(V0.getNode());
1646   SDValue RegClass =
1647     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1648   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1649   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1650   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1651   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1652 }
1653
1654 /// \brief Form a quad register from a pair of D registers.
1655 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1656   SDLoc dl(V0.getNode());
1657   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1658                                                MVT::i32);
1659   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1660   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1661   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1662   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1663 }
1664
1665 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1666 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1667   SDLoc dl(V0.getNode());
1668   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1669                                                MVT::i32);
1670   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1671   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1672   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1673   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1674 }
1675
1676 /// \brief Form 4 consecutive S registers.
1677 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1678                                    SDValue V2, SDValue V3) {
1679   SDLoc dl(V0.getNode());
1680   SDValue RegClass =
1681     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1682   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1683   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1684   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1685   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1686   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1687                                     V2, SubReg2, V3, SubReg3 };
1688   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1689 }
1690
1691 /// \brief Form 4 consecutive D registers.
1692 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1693                                    SDValue V2, SDValue V3) {
1694   SDLoc dl(V0.getNode());
1695   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1696                                                MVT::i32);
1697   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1698   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1699   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1700   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1701   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1702                                     V2, SubReg2, V3, SubReg3 };
1703   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1704 }
1705
1706 /// \brief Form 4 consecutive Q registers.
1707 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1708                                    SDValue V2, SDValue V3) {
1709   SDLoc dl(V0.getNode());
1710   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1711                                                MVT::i32);
1712   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1713   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1714   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1715   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1716   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1717                                     V2, SubReg2, V3, SubReg3 };
1718   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1719 }
1720
1721 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1722 /// of a NEON VLD or VST instruction.  The supported values depend on the
1723 /// number of registers being loaded.
1724 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1725                                        unsigned NumVecs, bool is64BitVector) {
1726   unsigned NumRegs = NumVecs;
1727   if (!is64BitVector && NumVecs < 3)
1728     NumRegs *= 2;
1729
1730   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1731   if (Alignment >= 32 && NumRegs == 4)
1732     Alignment = 32;
1733   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1734     Alignment = 16;
1735   else if (Alignment >= 8)
1736     Alignment = 8;
1737   else
1738     Alignment = 0;
1739
1740   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1741 }
1742
1743 static bool isVLDfixed(unsigned Opc)
1744 {
1745   switch (Opc) {
1746   default: return false;
1747   case ARM::VLD1d8wb_fixed : return true;
1748   case ARM::VLD1d16wb_fixed : return true;
1749   case ARM::VLD1d64Qwb_fixed : return true;
1750   case ARM::VLD1d32wb_fixed : return true;
1751   case ARM::VLD1d64wb_fixed : return true;
1752   case ARM::VLD1d64TPseudoWB_fixed : return true;
1753   case ARM::VLD1d64QPseudoWB_fixed : return true;
1754   case ARM::VLD1q8wb_fixed : return true;
1755   case ARM::VLD1q16wb_fixed : return true;
1756   case ARM::VLD1q32wb_fixed : return true;
1757   case ARM::VLD1q64wb_fixed : return true;
1758   case ARM::VLD1DUPd8wb_fixed : return true;
1759   case ARM::VLD1DUPd16wb_fixed : return true;
1760   case ARM::VLD1DUPd32wb_fixed : return true;
1761   case ARM::VLD1DUPq8wb_fixed : return true;
1762   case ARM::VLD1DUPq16wb_fixed : return true;
1763   case ARM::VLD1DUPq32wb_fixed : return true;
1764   case ARM::VLD2d8wb_fixed : return true;
1765   case ARM::VLD2d16wb_fixed : return true;
1766   case ARM::VLD2d32wb_fixed : return true;
1767   case ARM::VLD2q8PseudoWB_fixed : return true;
1768   case ARM::VLD2q16PseudoWB_fixed : return true;
1769   case ARM::VLD2q32PseudoWB_fixed : return true;
1770   case ARM::VLD2DUPd8wb_fixed : return true;
1771   case ARM::VLD2DUPd16wb_fixed : return true;
1772   case ARM::VLD2DUPd32wb_fixed : return true;
1773   }
1774 }
1775
1776 static bool isVSTfixed(unsigned Opc)
1777 {
1778   switch (Opc) {
1779   default: return false;
1780   case ARM::VST1d8wb_fixed : return true;
1781   case ARM::VST1d16wb_fixed : return true;
1782   case ARM::VST1d32wb_fixed : return true;
1783   case ARM::VST1d64wb_fixed : return true;
1784   case ARM::VST1q8wb_fixed : return true;
1785   case ARM::VST1q16wb_fixed : return true;
1786   case ARM::VST1q32wb_fixed : return true;
1787   case ARM::VST1q64wb_fixed : return true;
1788   case ARM::VST1d64TPseudoWB_fixed : return true;
1789   case ARM::VST1d64QPseudoWB_fixed : return true;
1790   case ARM::VST2d8wb_fixed : return true;
1791   case ARM::VST2d16wb_fixed : return true;
1792   case ARM::VST2d32wb_fixed : return true;
1793   case ARM::VST2q8PseudoWB_fixed : return true;
1794   case ARM::VST2q16PseudoWB_fixed : return true;
1795   case ARM::VST2q32PseudoWB_fixed : return true;
1796   }
1797 }
1798
1799 // Get the register stride update opcode of a VLD/VST instruction that
1800 // is otherwise equivalent to the given fixed stride updating instruction.
1801 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1802   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1803     && "Incorrect fixed stride updating instruction.");
1804   switch (Opc) {
1805   default: break;
1806   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1807   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1808   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1809   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1810   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1811   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1812   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1813   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1814   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1815   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1816   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1817   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1818   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1819   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1820   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1821   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1822   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1823   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1824
1825   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1826   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1827   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1828   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1829   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1830   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1831   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1832   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1833   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1834   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1835
1836   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1837   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1838   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1839   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1840   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1841   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1842
1843   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1844   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1845   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1846   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1847   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1848   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1849
1850   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1851   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1852   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1853   }
1854   return Opc; // If not one we handle, return it unchanged.
1855 }
1856
1857 /// Returns true if the given increment is a Constant known to be equal to the
1858 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1859 /// be used.
1860 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1861   auto C = dyn_cast<ConstantSDNode>(Inc);
1862   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1863 }
1864
1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1866                                 const uint16_t *DOpcodes,
1867                                 const uint16_t *QOpcodes0,
1868                                 const uint16_t *QOpcodes1) {
1869   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1870   SDLoc dl(N);
1871
1872   SDValue MemAddr, Align;
1873   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1874   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1875     return;
1876
1877   SDValue Chain = N->getOperand(0);
1878   EVT VT = N->getValueType(0);
1879   bool is64BitVector = VT.is64BitVector();
1880   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1881
1882   unsigned OpcodeIndex;
1883   switch (VT.getSimpleVT().SimpleTy) {
1884   default: llvm_unreachable("unhandled vld type");
1885     // Double-register operations:
1886   case MVT::v8i8:  OpcodeIndex = 0; break;
1887   case MVT::v4i16: OpcodeIndex = 1; break;
1888   case MVT::v2f32:
1889   case MVT::v2i32: OpcodeIndex = 2; break;
1890   case MVT::v1i64: OpcodeIndex = 3; break;
1891     // Quad-register operations:
1892   case MVT::v16i8: OpcodeIndex = 0; break;
1893   case MVT::v8i16: OpcodeIndex = 1; break;
1894   case MVT::v4f32:
1895   case MVT::v4i32: OpcodeIndex = 2; break;
1896   case MVT::v2f64:
1897   case MVT::v2i64: OpcodeIndex = 3;
1898     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1899     break;
1900   }
1901
1902   EVT ResTy;
1903   if (NumVecs == 1)
1904     ResTy = VT;
1905   else {
1906     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1907     if (!is64BitVector)
1908       ResTyElts *= 2;
1909     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1910   }
1911   std::vector<EVT> ResTys;
1912   ResTys.push_back(ResTy);
1913   if (isUpdating)
1914     ResTys.push_back(MVT::i32);
1915   ResTys.push_back(MVT::Other);
1916
1917   SDValue Pred = getAL(CurDAG, dl);
1918   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1919   SDNode *VLd;
1920   SmallVector<SDValue, 7> Ops;
1921
1922   // Double registers and VLD1/VLD2 quad registers are directly supported.
1923   if (is64BitVector || NumVecs <= 2) {
1924     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1925                     QOpcodes0[OpcodeIndex]);
1926     Ops.push_back(MemAddr);
1927     Ops.push_back(Align);
1928     if (isUpdating) {
1929       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1930       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1931       // case entirely when the rest are updated to that form, too.
1932       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1933       if ((NumVecs <= 2) && !IsImmUpdate)
1934         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1935       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1936       // check for that explicitly too. Horribly hacky, but temporary.
1937       if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
1938         Ops.push_back(IsImmUpdate ? Reg0 : Inc);
1939     }
1940     Ops.push_back(Pred);
1941     Ops.push_back(Reg0);
1942     Ops.push_back(Chain);
1943     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1944
1945   } else {
1946     // Otherwise, quad registers are loaded with two separate instructions,
1947     // where one loads the even registers and the other loads the odd registers.
1948     EVT AddrTy = MemAddr.getValueType();
1949
1950     // Load the even subregs.  This is always an updating load, so that it
1951     // provides the address to the second load for the odd subregs.
1952     SDValue ImplDef =
1953       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1954     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1955     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1956                                           ResTy, AddrTy, MVT::Other, OpsA);
1957     Chain = SDValue(VLdA, 2);
1958
1959     // Load the odd subregs.
1960     Ops.push_back(SDValue(VLdA, 1));
1961     Ops.push_back(Align);
1962     if (isUpdating) {
1963       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1964       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1965              "only constant post-increment update allowed for VLD3/4");
1966       (void)Inc;
1967       Ops.push_back(Reg0);
1968     }
1969     Ops.push_back(SDValue(VLdA, 0));
1970     Ops.push_back(Pred);
1971     Ops.push_back(Reg0);
1972     Ops.push_back(Chain);
1973     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1974   }
1975
1976   // Transfer memoperands.
1977   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1978   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1979   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1980
1981   if (NumVecs == 1) {
1982     ReplaceNode(N, VLd);
1983     return;
1984   }
1985
1986   // Extract out the subregisters.
1987   SDValue SuperReg = SDValue(VLd, 0);
1988   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1989                     ARM::qsub_3 == ARM::qsub_0 + 3,
1990                 "Unexpected subreg numbering");
1991   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1992   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1993     ReplaceUses(SDValue(N, Vec),
1994                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1995   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1996   if (isUpdating)
1997     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1998   CurDAG->RemoveDeadNode(N);
1999 }
2000
2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2002                                 const uint16_t *DOpcodes,
2003                                 const uint16_t *QOpcodes0,
2004                                 const uint16_t *QOpcodes1) {
2005   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2006   SDLoc dl(N);
2007
2008   SDValue MemAddr, Align;
2009   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2010   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2011   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2012     return;
2013
2014   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2015   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2016
2017   SDValue Chain = N->getOperand(0);
2018   EVT VT = N->getOperand(Vec0Idx).getValueType();
2019   bool is64BitVector = VT.is64BitVector();
2020   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2021
2022   unsigned OpcodeIndex;
2023   switch (VT.getSimpleVT().SimpleTy) {
2024   default: llvm_unreachable("unhandled vst type");
2025     // Double-register operations:
2026   case MVT::v8i8:  OpcodeIndex = 0; break;
2027   case MVT::v4i16: OpcodeIndex = 1; break;
2028   case MVT::v2f32:
2029   case MVT::v2i32: OpcodeIndex = 2; break;
2030   case MVT::v1i64: OpcodeIndex = 3; break;
2031     // Quad-register operations:
2032   case MVT::v16i8: OpcodeIndex = 0; break;
2033   case MVT::v8i16: OpcodeIndex = 1; break;
2034   case MVT::v4f32:
2035   case MVT::v4i32: OpcodeIndex = 2; break;
2036   case MVT::v2f64:
2037   case MVT::v2i64: OpcodeIndex = 3;
2038     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2039     break;
2040   }
2041
2042   std::vector<EVT> ResTys;
2043   if (isUpdating)
2044     ResTys.push_back(MVT::i32);
2045   ResTys.push_back(MVT::Other);
2046
2047   SDValue Pred = getAL(CurDAG, dl);
2048   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2049   SmallVector<SDValue, 7> Ops;
2050
2051   // Double registers and VST1/VST2 quad registers are directly supported.
2052   if (is64BitVector || NumVecs <= 2) {
2053     SDValue SrcReg;
2054     if (NumVecs == 1) {
2055       SrcReg = N->getOperand(Vec0Idx);
2056     } else if (is64BitVector) {
2057       // Form a REG_SEQUENCE to force register allocation.
2058       SDValue V0 = N->getOperand(Vec0Idx + 0);
2059       SDValue V1 = N->getOperand(Vec0Idx + 1);
2060       if (NumVecs == 2)
2061         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2062       else {
2063         SDValue V2 = N->getOperand(Vec0Idx + 2);
2064         // If it's a vst3, form a quad D-register and leave the last part as
2065         // an undef.
2066         SDValue V3 = (NumVecs == 3)
2067           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2068           : N->getOperand(Vec0Idx + 3);
2069         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2070       }
2071     } else {
2072       // Form a QQ register.
2073       SDValue Q0 = N->getOperand(Vec0Idx);
2074       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2075       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2076     }
2077
2078     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2079                     QOpcodes0[OpcodeIndex]);
2080     Ops.push_back(MemAddr);
2081     Ops.push_back(Align);
2082     if (isUpdating) {
2083       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2084       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2085       // case entirely when the rest are updated to that form, too.
2086       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2087       if (NumVecs <= 2 && !IsImmUpdate)
2088         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2089       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2090       // check for that explicitly too. Horribly hacky, but temporary.
2091       if  (!IsImmUpdate)
2092         Ops.push_back(Inc);
2093       else if (NumVecs > 2 && !isVSTfixed(Opc))
2094         Ops.push_back(Reg0);
2095     }
2096     Ops.push_back(SrcReg);
2097     Ops.push_back(Pred);
2098     Ops.push_back(Reg0);
2099     Ops.push_back(Chain);
2100     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2101
2102     // Transfer memoperands.
2103     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2104
2105     ReplaceNode(N, VSt);
2106     return;
2107   }
2108
2109   // Otherwise, quad registers are stored with two separate instructions,
2110   // where one stores the even registers and the other stores the odd registers.
2111
2112   // Form the QQQQ REG_SEQUENCE.
2113   SDValue V0 = N->getOperand(Vec0Idx + 0);
2114   SDValue V1 = N->getOperand(Vec0Idx + 1);
2115   SDValue V2 = N->getOperand(Vec0Idx + 2);
2116   SDValue V3 = (NumVecs == 3)
2117     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2118     : N->getOperand(Vec0Idx + 3);
2119   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2120
2121   // Store the even D registers.  This is always an updating store, so that it
2122   // provides the address to the second store for the odd subregs.
2123   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2124   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2125                                         MemAddr.getValueType(),
2126                                         MVT::Other, OpsA);
2127   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2128   Chain = SDValue(VStA, 1);
2129
2130   // Store the odd D registers.
2131   Ops.push_back(SDValue(VStA, 0));
2132   Ops.push_back(Align);
2133   if (isUpdating) {
2134     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2135     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2136            "only constant post-increment update allowed for VST3/4");
2137     (void)Inc;
2138     Ops.push_back(Reg0);
2139   }
2140   Ops.push_back(RegSeq);
2141   Ops.push_back(Pred);
2142   Ops.push_back(Reg0);
2143   Ops.push_back(Chain);
2144   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2145                                         Ops);
2146   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2147   ReplaceNode(N, VStB);
2148 }
2149
2150 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2151                                       unsigned NumVecs,
2152                                       const uint16_t *DOpcodes,
2153                                       const uint16_t *QOpcodes) {
2154   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2155   SDLoc dl(N);
2156
2157   SDValue MemAddr, Align;
2158   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2159   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2160   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2161     return;
2162
2163   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2164   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2165
2166   SDValue Chain = N->getOperand(0);
2167   unsigned Lane =
2168     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2169   EVT VT = N->getOperand(Vec0Idx).getValueType();
2170   bool is64BitVector = VT.is64BitVector();
2171
2172   unsigned Alignment = 0;
2173   if (NumVecs != 3) {
2174     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2175     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2176     if (Alignment > NumBytes)
2177       Alignment = NumBytes;
2178     if (Alignment < 8 && Alignment < NumBytes)
2179       Alignment = 0;
2180     // Alignment must be a power of two; make sure of that.
2181     Alignment = (Alignment & -Alignment);
2182     if (Alignment == 1)
2183       Alignment = 0;
2184   }
2185   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2186
2187   unsigned OpcodeIndex;
2188   switch (VT.getSimpleVT().SimpleTy) {
2189   default: llvm_unreachable("unhandled vld/vst lane type");
2190     // Double-register operations:
2191   case MVT::v8i8:  OpcodeIndex = 0; break;
2192   case MVT::v4i16: OpcodeIndex = 1; break;
2193   case MVT::v2f32:
2194   case MVT::v2i32: OpcodeIndex = 2; break;
2195     // Quad-register operations:
2196   case MVT::v8i16: OpcodeIndex = 0; break;
2197   case MVT::v4f32:
2198   case MVT::v4i32: OpcodeIndex = 1; break;
2199   }
2200
2201   std::vector<EVT> ResTys;
2202   if (IsLoad) {
2203     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2204     if (!is64BitVector)
2205       ResTyElts *= 2;
2206     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2207                                       MVT::i64, ResTyElts));
2208   }
2209   if (isUpdating)
2210     ResTys.push_back(MVT::i32);
2211   ResTys.push_back(MVT::Other);
2212
2213   SDValue Pred = getAL(CurDAG, dl);
2214   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2215
2216   SmallVector<SDValue, 8> Ops;
2217   Ops.push_back(MemAddr);
2218   Ops.push_back(Align);
2219   if (isUpdating) {
2220     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2221     bool IsImmUpdate =
2222         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2223     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2224   }
2225
2226   SDValue SuperReg;
2227   SDValue V0 = N->getOperand(Vec0Idx + 0);
2228   SDValue V1 = N->getOperand(Vec0Idx + 1);
2229   if (NumVecs == 2) {
2230     if (is64BitVector)
2231       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2232     else
2233       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2234   } else {
2235     SDValue V2 = N->getOperand(Vec0Idx + 2);
2236     SDValue V3 = (NumVecs == 3)
2237       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2238       : N->getOperand(Vec0Idx + 3);
2239     if (is64BitVector)
2240       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2241     else
2242       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2243   }
2244   Ops.push_back(SuperReg);
2245   Ops.push_back(getI32Imm(Lane, dl));
2246   Ops.push_back(Pred);
2247   Ops.push_back(Reg0);
2248   Ops.push_back(Chain);
2249
2250   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2251                                   QOpcodes[OpcodeIndex]);
2252   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2253   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2254   if (!IsLoad) {
2255     ReplaceNode(N, VLdLn);
2256     return;
2257   }
2258
2259   // Extract the subregisters.
2260   SuperReg = SDValue(VLdLn, 0);
2261   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2262                     ARM::qsub_3 == ARM::qsub_0 + 3,
2263                 "Unexpected subreg numbering");
2264   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2265   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2266     ReplaceUses(SDValue(N, Vec),
2267                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2268   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2269   if (isUpdating)
2270     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2271   CurDAG->RemoveDeadNode(N);
2272 }
2273
2274 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2275                                    const uint16_t *DOpcodes,
2276                                    const uint16_t *QOpcodes) {
2277   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2278   SDLoc dl(N);
2279
2280   SDValue MemAddr, Align;
2281   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2282     return;
2283
2284   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2285   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2286
2287   SDValue Chain = N->getOperand(0);
2288   EVT VT = N->getValueType(0);
2289
2290   unsigned Alignment = 0;
2291   if (NumVecs != 3) {
2292     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2293     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2294     if (Alignment > NumBytes)
2295       Alignment = NumBytes;
2296     if (Alignment < 8 && Alignment < NumBytes)
2297       Alignment = 0;
2298     // Alignment must be a power of two; make sure of that.
2299     Alignment = (Alignment & -Alignment);
2300     if (Alignment == 1)
2301       Alignment = 0;
2302   }
2303   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2304
2305   unsigned Opc;
2306   switch (VT.getSimpleVT().SimpleTy) {
2307   default: llvm_unreachable("unhandled vld-dup type");
2308   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2309   case MVT::v16i8: Opc = QOpcodes[0]; break;
2310   case MVT::v4i16: Opc = DOpcodes[1]; break;
2311   case MVT::v8i16: Opc = QOpcodes[1]; break;
2312   case MVT::v2f32:
2313   case MVT::v2i32: Opc = DOpcodes[2]; break;
2314   case MVT::v4f32:
2315   case MVT::v4i32: Opc = QOpcodes[2]; break;
2316   }
2317
2318   SDValue Pred = getAL(CurDAG, dl);
2319   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2320   SmallVector<SDValue, 6> Ops;
2321   Ops.push_back(MemAddr);
2322   Ops.push_back(Align);
2323   if (isUpdating) {
2324     // fixed-stride update instructions don't have an explicit writeback
2325     // operand. It's implicit in the opcode itself.
2326     SDValue Inc = N->getOperand(2);
2327     bool IsImmUpdate =
2328         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2329     if (NumVecs <= 2 && !IsImmUpdate)
2330       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2331     if (!IsImmUpdate)
2332       Ops.push_back(Inc);
2333     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2334     else if (NumVecs > 2)
2335       Ops.push_back(Reg0);
2336   }
2337   Ops.push_back(Pred);
2338   Ops.push_back(Reg0);
2339   Ops.push_back(Chain);
2340
2341   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2342   std::vector<EVT> ResTys;
2343   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2344   if (isUpdating)
2345     ResTys.push_back(MVT::i32);
2346   ResTys.push_back(MVT::Other);
2347   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2348   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2349
2350   // Extract the subregisters.
2351   if (NumVecs == 1) {
2352     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2353   } else {
2354     SDValue SuperReg = SDValue(VLdDup, 0);
2355     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2356     unsigned SubIdx = ARM::dsub_0;
2357     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2358       ReplaceUses(SDValue(N, Vec),
2359                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2360   }
2361   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2362   if (isUpdating)
2363     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2364   CurDAG->RemoveDeadNode(N);
2365 }
2366
2367 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2368   if (!Subtarget->hasV6T2Ops())
2369     return false;
2370
2371   unsigned Opc = isSigned
2372     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2373     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2374   SDLoc dl(N);
2375
2376   // For unsigned extracts, check for a shift right and mask
2377   unsigned And_imm = 0;
2378   if (N->getOpcode() == ISD::AND) {
2379     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2380
2381       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2382       if (And_imm & (And_imm + 1))
2383         return false;
2384
2385       unsigned Srl_imm = 0;
2386       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2387                                 Srl_imm)) {
2388         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2389
2390         // Note: The width operand is encoded as width-1.
2391         unsigned Width = countTrailingOnes(And_imm) - 1;
2392         unsigned LSB = Srl_imm;
2393
2394         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2395
2396         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2397           // It's cheaper to use a right shift to extract the top bits.
2398           if (Subtarget->isThumb()) {
2399             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2400             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2401                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2402                               getAL(CurDAG, dl), Reg0, Reg0 };
2403             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2404             return true;
2405           }
2406
2407           // ARM models shift instructions as MOVsi with shifter operand.
2408           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2409           SDValue ShOpc =
2410             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2411                                       MVT::i32);
2412           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2413                             getAL(CurDAG, dl), Reg0, Reg0 };
2414           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2415           return true;
2416         }
2417
2418         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2419                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2420                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2421                           getAL(CurDAG, dl), Reg0 };
2422         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2423         return true;
2424       }
2425     }
2426     return false;
2427   }
2428
2429   // Otherwise, we're looking for a shift of a shift
2430   unsigned Shl_imm = 0;
2431   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2432     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2433     unsigned Srl_imm = 0;
2434     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2435       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2436       // Note: The width operand is encoded as width-1.
2437       unsigned Width = 32 - Srl_imm - 1;
2438       int LSB = Srl_imm - Shl_imm;
2439       if (LSB < 0)
2440         return false;
2441       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2442       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2443                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2444                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2445                         getAL(CurDAG, dl), Reg0 };
2446       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2447       return true;
2448     }
2449   }
2450
2451   // Or we are looking for a shift of an and, with a mask operand
2452   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2453       isShiftedMask_32(And_imm)) {
2454     unsigned Srl_imm = 0;
2455     unsigned LSB = countTrailingZeros(And_imm);
2456     // Shift must be the same as the ands lsb
2457     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2458       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2459       unsigned MSB = 31 - countLeadingZeros(And_imm);
2460       // Note: The width operand is encoded as width-1.
2461       unsigned Width = MSB - LSB;
2462       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2463       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2464                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2465                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2466                         getAL(CurDAG, dl), Reg0 };
2467       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2468       return true;
2469     }
2470   }
2471
2472   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2473     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2474     unsigned LSB = 0;
2475     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2476         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2477       return false;
2478
2479     if (LSB + Width > 32)
2480       return false;
2481
2482     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2483     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2484                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2485                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2486                       getAL(CurDAG, dl), Reg0 };
2487     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2488     return true;
2489   }
2490
2491   return false;
2492 }
2493
2494 /// Target-specific DAG combining for ISD::XOR.
2495 /// Target-independent combining lowers SELECT_CC nodes of the form
2496 /// select_cc setg[ge] X,  0,  X, -X
2497 /// select_cc setgt    X, -1,  X, -X
2498 /// select_cc setl[te] X,  0, -X,  X
2499 /// select_cc setlt    X,  1, -X,  X
2500 /// which represent Integer ABS into:
2501 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2502 /// ARM instruction selection detects the latter and matches it to
2503 /// ARM::ABS or ARM::t2ABS machine node.
2504 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2505   SDValue XORSrc0 = N->getOperand(0);
2506   SDValue XORSrc1 = N->getOperand(1);
2507   EVT VT = N->getValueType(0);
2508
2509   if (Subtarget->isThumb1Only())
2510     return false;
2511
2512   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2513     return false;
2514
2515   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2516   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2517   SDValue SRASrc0 = XORSrc1.getOperand(0);
2518   SDValue SRASrc1 = XORSrc1.getOperand(1);
2519   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2520   EVT XType = SRASrc0.getValueType();
2521   unsigned Size = XType.getSizeInBits() - 1;
2522
2523   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2524       XType.isInteger() && SRAConstant != nullptr &&
2525       Size == SRAConstant->getZExtValue()) {
2526     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2527     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2528     return true;
2529   }
2530
2531   return false;
2532 }
2533
2534 /// We've got special pseudo-instructions for these
2535 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2536   unsigned Opcode;
2537   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2538   if (MemTy == MVT::i8)
2539     Opcode = ARM::CMP_SWAP_8;
2540   else if (MemTy == MVT::i16)
2541     Opcode = ARM::CMP_SWAP_16;
2542   else if (MemTy == MVT::i32)
2543     Opcode = ARM::CMP_SWAP_32;
2544   else
2545     llvm_unreachable("Unknown AtomicCmpSwap type");
2546
2547   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2548                    N->getOperand(0)};
2549   SDNode *CmpSwap = CurDAG->getMachineNode(
2550       Opcode, SDLoc(N),
2551       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2552
2553   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2554   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2555   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2556
2557   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2558   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2559   CurDAG->RemoveDeadNode(N);
2560 }
2561
2562 static Optional<std::pair<unsigned, unsigned>>
2563 getContiguousRangeOfSetBits(const APInt &A) {
2564   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2565   unsigned LastOne = A.countTrailingZeros();
2566   if (A.countPopulation() != (FirstOne - LastOne + 1))
2567     return Optional<std::pair<unsigned,unsigned>>();
2568   return std::make_pair(FirstOne, LastOne);
2569 }
2570
2571 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2572   assert(N->getOpcode() == ARMISD::CMPZ);
2573   SwitchEQNEToPLMI = false;
2574
2575   if (!Subtarget->isThumb())
2576     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2577     // LSR don't exist as standalone instructions - they need the barrel shifter.
2578     return;
2579
2580   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2581   SDValue And = N->getOperand(0);
2582   if (!And->hasOneUse())
2583     return;
2584
2585   SDValue Zero = N->getOperand(1);
2586   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2587       And->getOpcode() != ISD::AND)
2588     return;
2589   SDValue X = And.getOperand(0);
2590   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2591
2592   if (!C || !X->hasOneUse())
2593     return;
2594   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2595   if (!Range)
2596     return;
2597
2598   // There are several ways to lower this:
2599   SDNode *NewN;
2600   SDLoc dl(N);
2601
2602   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2603     if (Subtarget->isThumb2()) {
2604       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2605       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2606                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2607                         CurDAG->getRegister(0, MVT::i32) };
2608       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2609     } else {
2610       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2611                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2612                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2613       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2614     }
2615   };
2616
2617   if (Range->second == 0) {
2618     //  1. Mask includes the LSB -> Simply shift the top N bits off
2619     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620     ReplaceNode(And.getNode(), NewN);
2621   } else if (Range->first == 31) {
2622     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2623     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2624     ReplaceNode(And.getNode(), NewN);
2625   } else if (Range->first == Range->second) {
2626     //  3. Only one bit is set. We can shift this into the sign bit and use a
2627     //     PL/MI comparison.
2628     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2629     ReplaceNode(And.getNode(), NewN);
2630
2631     SwitchEQNEToPLMI = true;
2632   } else if (!Subtarget->hasV6T2Ops()) {
2633     //  4. Do a double shift to clear bottom and top bits, but only in
2634     //     thumb-1 mode as in thumb-2 we can use UBFX.
2635     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2636     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2637                      Range->second + (31 - Range->first));
2638     ReplaceNode(And.getNode(), NewN);
2639   }
2640
2641 }
2642
2643 void ARMDAGToDAGISel::Select(SDNode *N) {
2644   SDLoc dl(N);
2645
2646   if (N->isMachineOpcode()) {
2647     N->setNodeId(-1);
2648     return;   // Already selected.
2649   }
2650
2651   switch (N->getOpcode()) {
2652   default: break;
2653   case ISD::WRITE_REGISTER:
2654     if (tryWriteRegister(N))
2655       return;
2656     break;
2657   case ISD::READ_REGISTER:
2658     if (tryReadRegister(N))
2659       return;
2660     break;
2661   case ISD::INLINEASM:
2662     if (tryInlineAsm(N))
2663       return;
2664     break;
2665   case ISD::XOR:
2666     // Select special operations if XOR node forms integer ABS pattern
2667     if (tryABSOp(N))
2668       return;
2669     // Other cases are autogenerated.
2670     break;
2671   case ISD::Constant: {
2672     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2673     // If we can't materialize the constant we need to use a literal pool
2674     if (ConstantMaterializationCost(Val) > 2) {
2675       SDValue CPIdx = CurDAG->getTargetConstantPool(
2676           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2677           TLI->getPointerTy(CurDAG->getDataLayout()));
2678
2679       SDNode *ResNode;
2680       if (Subtarget->isThumb()) {
2681         SDValue Pred = getAL(CurDAG, dl);
2682         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2683         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2684         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2685                                          Ops);
2686       } else {
2687         SDValue Ops[] = {
2688           CPIdx,
2689           CurDAG->getTargetConstant(0, dl, MVT::i32),
2690           getAL(CurDAG, dl),
2691           CurDAG->getRegister(0, MVT::i32),
2692           CurDAG->getEntryNode()
2693         };
2694         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2695                                          Ops);
2696       }
2697       ReplaceNode(N, ResNode);
2698       return;
2699     }
2700
2701     // Other cases are autogenerated.
2702     break;
2703   }
2704   case ISD::FrameIndex: {
2705     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2706     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2707     SDValue TFI = CurDAG->getTargetFrameIndex(
2708         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2709     if (Subtarget->isThumb1Only()) {
2710       // Set the alignment of the frame object to 4, to avoid having to generate
2711       // more than one ADD
2712       MachineFrameInfo &MFI = MF->getFrameInfo();
2713       if (MFI.getObjectAlignment(FI) < 4)
2714         MFI.setObjectAlignment(FI, 4);
2715       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2716                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2717       return;
2718     } else {
2719       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2720                       ARM::t2ADDri : ARM::ADDri);
2721       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2722                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2723                         CurDAG->getRegister(0, MVT::i32) };
2724       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2725       return;
2726     }
2727   }
2728   case ISD::SRL:
2729     if (tryV6T2BitfieldExtractOp(N, false))
2730       return;
2731     break;
2732   case ISD::SIGN_EXTEND_INREG:
2733   case ISD::SRA:
2734     if (tryV6T2BitfieldExtractOp(N, true))
2735       return;
2736     break;
2737   case ISD::MUL:
2738     if (Subtarget->isThumb1Only())
2739       break;
2740     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2741       unsigned RHSV = C->getZExtValue();
2742       if (!RHSV) break;
2743       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2744         unsigned ShImm = Log2_32(RHSV-1);
2745         if (ShImm >= 32)
2746           break;
2747         SDValue V = N->getOperand(0);
2748         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2749         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2750         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2751         if (Subtarget->isThumb()) {
2752           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2753           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2754           return;
2755         } else {
2756           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2757                             Reg0 };
2758           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2759           return;
2760         }
2761       }
2762       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2763         unsigned ShImm = Log2_32(RHSV+1);
2764         if (ShImm >= 32)
2765           break;
2766         SDValue V = N->getOperand(0);
2767         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2768         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2769         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2770         if (Subtarget->isThumb()) {
2771           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2772           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2773           return;
2774         } else {
2775           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2776                             Reg0 };
2777           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2778           return;
2779         }
2780       }
2781     }
2782     break;
2783   case ISD::AND: {
2784     // Check for unsigned bitfield extract
2785     if (tryV6T2BitfieldExtractOp(N, false))
2786       return;
2787
2788     // If an immediate is used in an AND node, it is possible that the immediate
2789     // can be more optimally materialized when negated. If this is the case we
2790     // can negate the immediate and use a BIC instead.
2791     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2792     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2793       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2794
2795       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2796       // immediate can be negated and fit in the immediate operand of
2797       // a t2BIC, don't do any manual transform here as this can be
2798       // handled by the generic ISel machinery.
2799       bool PreferImmediateEncoding =
2800         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2801       if (!PreferImmediateEncoding &&
2802           ConstantMaterializationCost(Imm) >
2803               ConstantMaterializationCost(~Imm)) {
2804         // The current immediate costs more to materialize than a negated
2805         // immediate, so negate the immediate and use a BIC.
2806         SDValue NewImm =
2807           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2808         // If the new constant didn't exist before, reposition it in the topological
2809         // ordering so it is just before N. Otherwise, don't touch its location.
2810         if (NewImm->getNodeId() == -1)
2811           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2812
2813         if (!Subtarget->hasThumb2()) {
2814           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2815                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2816                            CurDAG->getRegister(0, MVT::i32)};
2817           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2818           return;
2819         } else {
2820           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2821                            CurDAG->getRegister(0, MVT::i32),
2822                            CurDAG->getRegister(0, MVT::i32)};
2823           ReplaceNode(N,
2824                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2825           return;
2826         }
2827       }
2828     }
2829
2830     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2831     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2832     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2833     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2834     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2835     EVT VT = N->getValueType(0);
2836     if (VT != MVT::i32)
2837       break;
2838     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2839       ? ARM::t2MOVTi16
2840       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2841     if (!Opc)
2842       break;
2843     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2844     N1C = dyn_cast<ConstantSDNode>(N1);
2845     if (!N1C)
2846       break;
2847     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2848       SDValue N2 = N0.getOperand(1);
2849       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2850       if (!N2C)
2851         break;
2852       unsigned N1CVal = N1C->getZExtValue();
2853       unsigned N2CVal = N2C->getZExtValue();
2854       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2855           (N1CVal & 0xffffU) == 0xffffU &&
2856           (N2CVal & 0xffffU) == 0x0U) {
2857         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2858                                                   dl, MVT::i32);
2859         SDValue Ops[] = { N0.getOperand(0), Imm16,
2860                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2861         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2862         return;
2863       }
2864     }
2865
2866     break;
2867   }
2868   case ARMISD::UMAAL: {
2869     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2870     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2871                       N->getOperand(2), N->getOperand(3),
2872                       getAL(CurDAG, dl),
2873                       CurDAG->getRegister(0, MVT::i32) };
2874     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2875     return;
2876   }
2877   case ARMISD::UMLAL:{
2878     if (Subtarget->isThumb()) {
2879       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2880                         N->getOperand(3), getAL(CurDAG, dl),
2881                         CurDAG->getRegister(0, MVT::i32)};
2882       ReplaceNode(
2883           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2884       return;
2885     }else{
2886       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2887                         N->getOperand(3), getAL(CurDAG, dl),
2888                         CurDAG->getRegister(0, MVT::i32),
2889                         CurDAG->getRegister(0, MVT::i32) };
2890       ReplaceNode(N, CurDAG->getMachineNode(
2891                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2892                          MVT::i32, MVT::i32, Ops));
2893       return;
2894     }
2895   }
2896   case ARMISD::SMLAL:{
2897     if (Subtarget->isThumb()) {
2898       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2899                         N->getOperand(3), getAL(CurDAG, dl),
2900                         CurDAG->getRegister(0, MVT::i32)};
2901       ReplaceNode(
2902           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2903       return;
2904     }else{
2905       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2906                         N->getOperand(3), getAL(CurDAG, dl),
2907                         CurDAG->getRegister(0, MVT::i32),
2908                         CurDAG->getRegister(0, MVT::i32) };
2909       ReplaceNode(N, CurDAG->getMachineNode(
2910                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2911                          MVT::i32, MVT::i32, Ops));
2912       return;
2913     }
2914   }
2915   case ARMISD::SUBE: {
2916     if (!Subtarget->hasV6Ops())
2917       break;
2918     // Look for a pattern to match SMMLS
2919     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2920     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2921         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2922         !SDValue(N, 1).use_empty())
2923       break;
2924
2925     if (Subtarget->isThumb())
2926       assert(Subtarget->hasThumb2() &&
2927              "This pattern should not be generated for Thumb");
2928
2929     SDValue SmulLoHi = N->getOperand(1);
2930     SDValue Subc = N->getOperand(2);
2931     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2932
2933     if (!Zero || Zero->getZExtValue() != 0 ||
2934         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2935         N->getOperand(1) != SmulLoHi.getValue(1) ||
2936         N->getOperand(2) != Subc.getValue(1))
2937       break;
2938
2939     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2940     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2941                       N->getOperand(0), getAL(CurDAG, dl),
2942                       CurDAG->getRegister(0, MVT::i32) };
2943     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2944     return;
2945   }
2946   case ISD::LOAD: {
2947     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2948       if (tryT2IndexedLoad(N))
2949         return;
2950     } else if (Subtarget->isThumb()) {
2951       if (tryT1IndexedLoad(N))
2952         return;
2953     } else if (tryARMIndexedLoad(N))
2954       return;
2955     // Other cases are autogenerated.
2956     break;
2957   }
2958   case ARMISD::BRCOND: {
2959     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2960     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2961     // Pattern complexity = 6  cost = 1  size = 0
2962
2963     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2965     // Pattern complexity = 6  cost = 1  size = 0
2966
2967     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2969     // Pattern complexity = 6  cost = 1  size = 0
2970
2971     unsigned Opc = Subtarget->isThumb() ?
2972       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2973     SDValue Chain = N->getOperand(0);
2974     SDValue N1 = N->getOperand(1);
2975     SDValue N2 = N->getOperand(2);
2976     SDValue N3 = N->getOperand(3);
2977     SDValue InFlag = N->getOperand(4);
2978     assert(N1.getOpcode() == ISD::BasicBlock);
2979     assert(N2.getOpcode() == ISD::Constant);
2980     assert(N3.getOpcode() == ISD::Register);
2981
2982     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2983
2984     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2985       bool SwitchEQNEToPLMI;
2986       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2987       InFlag = N->getOperand(4);
2988
2989       if (SwitchEQNEToPLMI) {
2990         switch ((ARMCC::CondCodes)CC) {
2991         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2992         case ARMCC::NE:
2993           CC = (unsigned)ARMCC::MI;
2994           break;
2995         case ARMCC::EQ:
2996           CC = (unsigned)ARMCC::PL;
2997           break;
2998         }
2999       }
3000     }
3001
3002     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3003     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3004     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3005                                              MVT::Glue, Ops);
3006     Chain = SDValue(ResNode, 0);
3007     if (N->getNumValues() == 2) {
3008       InFlag = SDValue(ResNode, 1);
3009       ReplaceUses(SDValue(N, 1), InFlag);
3010     }
3011     ReplaceUses(SDValue(N, 0),
3012                 SDValue(Chain.getNode(), Chain.getResNo()));
3013     CurDAG->RemoveDeadNode(N);
3014     return;
3015   }
3016
3017   case ARMISD::CMPZ: {
3018     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3019     //   This allows us to avoid materializing the expensive negative constant.
3020     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3021     //   for its glue output.
3022     SDValue X = N->getOperand(0);
3023     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3024     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3025       int64_t Addend = -C->getSExtValue();
3026
3027       SDNode *Add = nullptr;
3028       // ADDS can be better than CMN if the immediate fits in a
3029       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3030       // Outside that range we can just use a CMN which is 32-bit but has a
3031       // 12-bit immediate range.
3032       if (Addend < 1<<8) {
3033         if (Subtarget->isThumb2()) {
3034           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3035                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3036                             CurDAG->getRegister(0, MVT::i32) };
3037           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3038         } else {
3039           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3040           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3041                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3042                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3043           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3044         }
3045       }
3046       if (Add) {
3047         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3048         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3049       }
3050     }
3051     // Other cases are autogenerated.
3052     break;
3053   }
3054
3055   case ARMISD::CMOV: {
3056     SDValue InFlag = N->getOperand(4);
3057
3058     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3059       bool SwitchEQNEToPLMI;
3060       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3061
3062       if (SwitchEQNEToPLMI) {
3063         SDValue ARMcc = N->getOperand(2);
3064         ARMCC::CondCodes CC =
3065           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3066
3067         switch (CC) {
3068         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3069         case ARMCC::NE:
3070           CC = ARMCC::MI;
3071           break;
3072         case ARMCC::EQ:
3073           CC = ARMCC::PL;
3074           break;
3075         }
3076         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3077         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3078                          N->getOperand(3), N->getOperand(4)};
3079         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3080       }
3081
3082     }
3083     // Other cases are autogenerated.
3084     break;
3085   }
3086
3087   case ARMISD::VZIP: {
3088     unsigned Opc = 0;
3089     EVT VT = N->getValueType(0);
3090     switch (VT.getSimpleVT().SimpleTy) {
3091     default: return;
3092     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3093     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3094     case MVT::v2f32:
3095     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3096     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3097     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3098     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3099     case MVT::v4f32:
3100     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3101     }
3102     SDValue Pred = getAL(CurDAG, dl);
3103     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3104     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3105     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3106     return;
3107   }
3108   case ARMISD::VUZP: {
3109     unsigned Opc = 0;
3110     EVT VT = N->getValueType(0);
3111     switch (VT.getSimpleVT().SimpleTy) {
3112     default: return;
3113     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3114     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3115     case MVT::v2f32:
3116     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3117     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3118     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3119     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3120     case MVT::v4f32:
3121     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3122     }
3123     SDValue Pred = getAL(CurDAG, dl);
3124     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3125     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3126     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3127     return;
3128   }
3129   case ARMISD::VTRN: {
3130     unsigned Opc = 0;
3131     EVT VT = N->getValueType(0);
3132     switch (VT.getSimpleVT().SimpleTy) {
3133     default: return;
3134     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3135     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3136     case MVT::v2f32:
3137     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3138     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3139     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3140     case MVT::v4f32:
3141     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3142     }
3143     SDValue Pred = getAL(CurDAG, dl);
3144     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3145     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3146     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3147     return;
3148   }
3149   case ARMISD::BUILD_VECTOR: {
3150     EVT VecVT = N->getValueType(0);
3151     EVT EltVT = VecVT.getVectorElementType();
3152     unsigned NumElts = VecVT.getVectorNumElements();
3153     if (EltVT == MVT::f64) {
3154       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3155       ReplaceNode(
3156           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3157       return;
3158     }
3159     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3160     if (NumElts == 2) {
3161       ReplaceNode(
3162           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3163       return;
3164     }
3165     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3166     ReplaceNode(N,
3167                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3168                                     N->getOperand(2), N->getOperand(3)));
3169     return;
3170   }
3171
3172   case ARMISD::VLD1DUP: {
3173     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3174                                          ARM::VLD1DUPd32 };
3175     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3176                                          ARM::VLD1DUPq32 };
3177     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3178     return;
3179   }
3180
3181   case ARMISD::VLD2DUP: {
3182     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3183                                         ARM::VLD2DUPd32 };
3184     SelectVLDDup(N, false, 2, Opcodes);
3185     return;
3186   }
3187
3188   case ARMISD::VLD3DUP: {
3189     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3190                                         ARM::VLD3DUPd16Pseudo,
3191                                         ARM::VLD3DUPd32Pseudo };
3192     SelectVLDDup(N, false, 3, Opcodes);
3193     return;
3194   }
3195
3196   case ARMISD::VLD4DUP: {
3197     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3198                                         ARM::VLD4DUPd16Pseudo,
3199                                         ARM::VLD4DUPd32Pseudo };
3200     SelectVLDDup(N, false, 4, Opcodes);
3201     return;
3202   }
3203
3204   case ARMISD::VLD1DUP_UPD: {
3205     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3206                                          ARM::VLD1DUPd16wb_fixed,
3207                                          ARM::VLD1DUPd32wb_fixed };
3208     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3209                                          ARM::VLD1DUPq16wb_fixed,
3210                                          ARM::VLD1DUPq32wb_fixed };
3211     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3212     return;
3213   }
3214
3215   case ARMISD::VLD2DUP_UPD: {
3216     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3217                                         ARM::VLD2DUPd16wb_fixed,
3218                                         ARM::VLD2DUPd32wb_fixed };
3219     SelectVLDDup(N, true, 2, Opcodes);
3220     return;
3221   }
3222
3223   case ARMISD::VLD3DUP_UPD: {
3224     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3225                                         ARM::VLD3DUPd16Pseudo_UPD,
3226                                         ARM::VLD3DUPd32Pseudo_UPD };
3227     SelectVLDDup(N, true, 3, Opcodes);
3228     return;
3229   }
3230
3231   case ARMISD::VLD4DUP_UPD: {
3232     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3233                                         ARM::VLD4DUPd16Pseudo_UPD,
3234                                         ARM::VLD4DUPd32Pseudo_UPD };
3235     SelectVLDDup(N, true, 4, Opcodes);
3236     return;
3237   }
3238
3239   case ARMISD::VLD1_UPD: {
3240     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3241                                          ARM::VLD1d16wb_fixed,
3242                                          ARM::VLD1d32wb_fixed,
3243                                          ARM::VLD1d64wb_fixed };
3244     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3245                                          ARM::VLD1q16wb_fixed,
3246                                          ARM::VLD1q32wb_fixed,
3247                                          ARM::VLD1q64wb_fixed };
3248     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3249     return;
3250   }
3251
3252   case ARMISD::VLD2_UPD: {
3253     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3254                                          ARM::VLD2d16wb_fixed,
3255                                          ARM::VLD2d32wb_fixed,
3256                                          ARM::VLD1q64wb_fixed};
3257     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3258                                          ARM::VLD2q16PseudoWB_fixed,
3259                                          ARM::VLD2q32PseudoWB_fixed };
3260     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3261     return;
3262   }
3263
3264   case ARMISD::VLD3_UPD: {
3265     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3266                                          ARM::VLD3d16Pseudo_UPD,
3267                                          ARM::VLD3d32Pseudo_UPD,
3268                                          ARM::VLD1d64TPseudoWB_fixed};
3269     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3270                                           ARM::VLD3q16Pseudo_UPD,
3271                                           ARM::VLD3q32Pseudo_UPD };
3272     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3273                                           ARM::VLD3q16oddPseudo_UPD,
3274                                           ARM::VLD3q32oddPseudo_UPD };
3275     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3276     return;
3277   }
3278
3279   case ARMISD::VLD4_UPD: {
3280     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3281                                          ARM::VLD4d16Pseudo_UPD,
3282                                          ARM::VLD4d32Pseudo_UPD,
3283                                          ARM::VLD1d64QPseudoWB_fixed};
3284     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3285                                           ARM::VLD4q16Pseudo_UPD,
3286                                           ARM::VLD4q32Pseudo_UPD };
3287     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3288                                           ARM::VLD4q16oddPseudo_UPD,
3289                                           ARM::VLD4q32oddPseudo_UPD };
3290     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3291     return;
3292   }
3293
3294   case ARMISD::VLD2LN_UPD: {
3295     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3296                                          ARM::VLD2LNd16Pseudo_UPD,
3297                                          ARM::VLD2LNd32Pseudo_UPD };
3298     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3299                                          ARM::VLD2LNq32Pseudo_UPD };
3300     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3301     return;
3302   }
3303
3304   case ARMISD::VLD3LN_UPD: {
3305     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3306                                          ARM::VLD3LNd16Pseudo_UPD,
3307                                          ARM::VLD3LNd32Pseudo_UPD };
3308     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3309                                          ARM::VLD3LNq32Pseudo_UPD };
3310     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3311     return;
3312   }
3313
3314   case ARMISD::VLD4LN_UPD: {
3315     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3316                                          ARM::VLD4LNd16Pseudo_UPD,
3317                                          ARM::VLD4LNd32Pseudo_UPD };
3318     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3319                                          ARM::VLD4LNq32Pseudo_UPD };
3320     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3321     return;
3322   }
3323
3324   case ARMISD::VST1_UPD: {
3325     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3326                                          ARM::VST1d16wb_fixed,
3327                                          ARM::VST1d32wb_fixed,
3328                                          ARM::VST1d64wb_fixed };
3329     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3330                                          ARM::VST1q16wb_fixed,
3331                                          ARM::VST1q32wb_fixed,
3332                                          ARM::VST1q64wb_fixed };
3333     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3334     return;
3335   }
3336
3337   case ARMISD::VST2_UPD: {
3338     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3339                                          ARM::VST2d16wb_fixed,
3340                                          ARM::VST2d32wb_fixed,
3341                                          ARM::VST1q64wb_fixed};
3342     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3343                                          ARM::VST2q16PseudoWB_fixed,
3344                                          ARM::VST2q32PseudoWB_fixed };
3345     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3346     return;
3347   }
3348
3349   case ARMISD::VST3_UPD: {
3350     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3351                                          ARM::VST3d16Pseudo_UPD,
3352                                          ARM::VST3d32Pseudo_UPD,
3353                                          ARM::VST1d64TPseudoWB_fixed};
3354     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3355                                           ARM::VST3q16Pseudo_UPD,
3356                                           ARM::VST3q32Pseudo_UPD };
3357     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3358                                           ARM::VST3q16oddPseudo_UPD,
3359                                           ARM::VST3q32oddPseudo_UPD };
3360     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3361     return;
3362   }
3363
3364   case ARMISD::VST4_UPD: {
3365     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3366                                          ARM::VST4d16Pseudo_UPD,
3367                                          ARM::VST4d32Pseudo_UPD,
3368                                          ARM::VST1d64QPseudoWB_fixed};
3369     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3370                                           ARM::VST4q16Pseudo_UPD,
3371                                           ARM::VST4q32Pseudo_UPD };
3372     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3373                                           ARM::VST4q16oddPseudo_UPD,
3374                                           ARM::VST4q32oddPseudo_UPD };
3375     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3376     return;
3377   }
3378
3379   case ARMISD::VST2LN_UPD: {
3380     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3381                                          ARM::VST2LNd16Pseudo_UPD,
3382                                          ARM::VST2LNd32Pseudo_UPD };
3383     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3384                                          ARM::VST2LNq32Pseudo_UPD };
3385     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3386     return;
3387   }
3388
3389   case ARMISD::VST3LN_UPD: {
3390     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3391                                          ARM::VST3LNd16Pseudo_UPD,
3392                                          ARM::VST3LNd32Pseudo_UPD };
3393     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3394                                          ARM::VST3LNq32Pseudo_UPD };
3395     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3396     return;
3397   }
3398
3399   case ARMISD::VST4LN_UPD: {
3400     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3401                                          ARM::VST4LNd16Pseudo_UPD,
3402                                          ARM::VST4LNd32Pseudo_UPD };
3403     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3404                                          ARM::VST4LNq32Pseudo_UPD };
3405     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3406     return;
3407   }
3408
3409   case ISD::INTRINSIC_VOID:
3410   case ISD::INTRINSIC_W_CHAIN: {
3411     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3412     switch (IntNo) {
3413     default:
3414       break;
3415
3416     case Intrinsic::arm_mrrc:
3417     case Intrinsic::arm_mrrc2: {
3418       SDLoc dl(N);
3419       SDValue Chain = N->getOperand(0);
3420       unsigned Opc;
3421
3422       if (Subtarget->isThumb())
3423         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3424       else
3425         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3426
3427       SmallVector<SDValue, 5> Ops;
3428       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3429       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3430       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3431
3432       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3433       // instruction will always be '1111' but it is possible in assembly language to specify
3434       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3435       if (Opc != ARM::MRRC2) {
3436         Ops.push_back(getAL(CurDAG, dl));
3437         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3438       }
3439
3440       Ops.push_back(Chain);
3441
3442       // Writes to two registers.
3443       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3444
3445       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3446       return;
3447     }
3448     case Intrinsic::arm_ldaexd:
3449     case Intrinsic::arm_ldrexd: {
3450       SDLoc dl(N);
3451       SDValue Chain = N->getOperand(0);
3452       SDValue MemAddr = N->getOperand(2);
3453       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3454
3455       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3456       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3457                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3458
3459       // arm_ldrexd returns a i64 value in {i32, i32}
3460       std::vector<EVT> ResTys;
3461       if (isThumb) {
3462         ResTys.push_back(MVT::i32);
3463         ResTys.push_back(MVT::i32);
3464       } else
3465         ResTys.push_back(MVT::Untyped);
3466       ResTys.push_back(MVT::Other);
3467
3468       // Place arguments in the right order.
3469       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3470                        CurDAG->getRegister(0, MVT::i32), Chain};
3471       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3472       // Transfer memoperands.
3473       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3474       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3475       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3476
3477       // Remap uses.
3478       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3479       if (!SDValue(N, 0).use_empty()) {
3480         SDValue Result;
3481         if (isThumb)
3482           Result = SDValue(Ld, 0);
3483         else {
3484           SDValue SubRegIdx =
3485             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3486           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3487               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3488           Result = SDValue(ResNode,0);
3489         }
3490         ReplaceUses(SDValue(N, 0), Result);
3491       }
3492       if (!SDValue(N, 1).use_empty()) {
3493         SDValue Result;
3494         if (isThumb)
3495           Result = SDValue(Ld, 1);
3496         else {
3497           SDValue SubRegIdx =
3498             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3499           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3500               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3501           Result = SDValue(ResNode,0);
3502         }
3503         ReplaceUses(SDValue(N, 1), Result);
3504       }
3505       ReplaceUses(SDValue(N, 2), OutChain);
3506       CurDAG->RemoveDeadNode(N);
3507       return;
3508     }
3509     case Intrinsic::arm_stlexd:
3510     case Intrinsic::arm_strexd: {
3511       SDLoc dl(N);
3512       SDValue Chain = N->getOperand(0);
3513       SDValue Val0 = N->getOperand(2);
3514       SDValue Val1 = N->getOperand(3);
3515       SDValue MemAddr = N->getOperand(4);
3516
3517       // Store exclusive double return a i32 value which is the return status
3518       // of the issued store.
3519       const EVT ResTys[] = {MVT::i32, MVT::Other};
3520
3521       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3522       // Place arguments in the right order.
3523       SmallVector<SDValue, 7> Ops;
3524       if (isThumb) {
3525         Ops.push_back(Val0);
3526         Ops.push_back(Val1);
3527       } else
3528         // arm_strexd uses GPRPair.
3529         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3530       Ops.push_back(MemAddr);
3531       Ops.push_back(getAL(CurDAG, dl));
3532       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3533       Ops.push_back(Chain);
3534
3535       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3536       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3537                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3538
3539       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3540       // Transfer memoperands.
3541       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3542       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3543       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3544
3545       ReplaceNode(N, St);
3546       return;
3547     }
3548
3549     case Intrinsic::arm_neon_vld1: {
3550       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3551                                            ARM::VLD1d32, ARM::VLD1d64 };
3552       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3553                                            ARM::VLD1q32, ARM::VLD1q64};
3554       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3555       return;
3556     }
3557
3558     case Intrinsic::arm_neon_vld2: {
3559       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3560                                            ARM::VLD2d32, ARM::VLD1q64 };
3561       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3562                                            ARM::VLD2q32Pseudo };
3563       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3564       return;
3565     }
3566
3567     case Intrinsic::arm_neon_vld3: {
3568       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3569                                            ARM::VLD3d16Pseudo,
3570                                            ARM::VLD3d32Pseudo,
3571                                            ARM::VLD1d64TPseudo };
3572       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3573                                             ARM::VLD3q16Pseudo_UPD,
3574                                             ARM::VLD3q32Pseudo_UPD };
3575       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3576                                             ARM::VLD3q16oddPseudo,
3577                                             ARM::VLD3q32oddPseudo };
3578       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3579       return;
3580     }
3581
3582     case Intrinsic::arm_neon_vld4: {
3583       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3584                                            ARM::VLD4d16Pseudo,
3585                                            ARM::VLD4d32Pseudo,
3586                                            ARM::VLD1d64QPseudo };
3587       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3588                                             ARM::VLD4q16Pseudo_UPD,
3589                                             ARM::VLD4q32Pseudo_UPD };
3590       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3591                                             ARM::VLD4q16oddPseudo,
3592                                             ARM::VLD4q32oddPseudo };
3593       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3594       return;
3595     }
3596
3597     case Intrinsic::arm_neon_vld2lane: {
3598       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3599                                            ARM::VLD2LNd16Pseudo,
3600                                            ARM::VLD2LNd32Pseudo };
3601       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3602                                            ARM::VLD2LNq32Pseudo };
3603       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3604       return;
3605     }
3606
3607     case Intrinsic::arm_neon_vld3lane: {
3608       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3609                                            ARM::VLD3LNd16Pseudo,
3610                                            ARM::VLD3LNd32Pseudo };
3611       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3612                                            ARM::VLD3LNq32Pseudo };
3613       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3614       return;
3615     }
3616
3617     case Intrinsic::arm_neon_vld4lane: {
3618       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3619                                            ARM::VLD4LNd16Pseudo,
3620                                            ARM::VLD4LNd32Pseudo };
3621       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3622                                            ARM::VLD4LNq32Pseudo };
3623       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3624       return;
3625     }
3626
3627     case Intrinsic::arm_neon_vst1: {
3628       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3629                                            ARM::VST1d32, ARM::VST1d64 };
3630       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3631                                            ARM::VST1q32, ARM::VST1q64 };
3632       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3633       return;
3634     }
3635
3636     case Intrinsic::arm_neon_vst2: {
3637       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3638                                            ARM::VST2d32, ARM::VST1q64 };
3639       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3640                                            ARM::VST2q32Pseudo };
3641       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3642       return;
3643     }
3644
3645     case Intrinsic::arm_neon_vst3: {
3646       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3647                                            ARM::VST3d16Pseudo,
3648                                            ARM::VST3d32Pseudo,
3649                                            ARM::VST1d64TPseudo };
3650       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3651                                             ARM::VST3q16Pseudo_UPD,
3652                                             ARM::VST3q32Pseudo_UPD };
3653       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3654                                             ARM::VST3q16oddPseudo,
3655                                             ARM::VST3q32oddPseudo };
3656       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3657       return;
3658     }
3659
3660     case Intrinsic::arm_neon_vst4: {
3661       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3662                                            ARM::VST4d16Pseudo,
3663                                            ARM::VST4d32Pseudo,
3664                                            ARM::VST1d64QPseudo };
3665       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3666                                             ARM::VST4q16Pseudo_UPD,
3667                                             ARM::VST4q32Pseudo_UPD };
3668       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3669                                             ARM::VST4q16oddPseudo,
3670                                             ARM::VST4q32oddPseudo };
3671       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3672       return;
3673     }
3674
3675     case Intrinsic::arm_neon_vst2lane: {
3676       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3677                                            ARM::VST2LNd16Pseudo,
3678                                            ARM::VST2LNd32Pseudo };
3679       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3680                                            ARM::VST2LNq32Pseudo };
3681       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3682       return;
3683     }
3684
3685     case Intrinsic::arm_neon_vst3lane: {
3686       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3687                                            ARM::VST3LNd16Pseudo,
3688                                            ARM::VST3LNd32Pseudo };
3689       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3690                                            ARM::VST3LNq32Pseudo };
3691       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3692       return;
3693     }
3694
3695     case Intrinsic::arm_neon_vst4lane: {
3696       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3697                                            ARM::VST4LNd16Pseudo,
3698                                            ARM::VST4LNd32Pseudo };
3699       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3700                                            ARM::VST4LNq32Pseudo };
3701       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3702       return;
3703     }
3704     }
3705     break;
3706   }
3707
3708   case ISD::ATOMIC_CMP_SWAP:
3709     SelectCMP_SWAP(N);
3710     return;
3711   }
3712
3713   SelectCode(N);
3714 }
3715
3716 // Inspect a register string of the form
3717 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3718 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3719 // and obtain the integer operands from them, adding these operands to the
3720 // provided vector.
3721 static void getIntOperandsFromRegisterString(StringRef RegString,
3722                                              SelectionDAG *CurDAG,
3723                                              const SDLoc &DL,
3724                                              std::vector<SDValue> &Ops) {
3725   SmallVector<StringRef, 5> Fields;
3726   RegString.split(Fields, ':');
3727
3728   if (Fields.size() > 1) {
3729     bool AllIntFields = true;
3730
3731     for (StringRef Field : Fields) {
3732       // Need to trim out leading 'cp' characters and get the integer field.
3733       unsigned IntField;
3734       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3735       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3736     }
3737
3738     assert(AllIntFields &&
3739             "Unexpected non-integer value in special register string.");
3740   }
3741 }
3742
3743 // Maps a Banked Register string to its mask value. The mask value returned is
3744 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3745 // mask operand, which expresses which register is to be used, e.g. r8, and in
3746 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3747 // was invalid.
3748 static inline int getBankedRegisterMask(StringRef RegString) {
3749   return StringSwitch<int>(RegString.lower())
3750           .Case("r8_usr", 0x00)
3751           .Case("r9_usr", 0x01)
3752           .Case("r10_usr", 0x02)
3753           .Case("r11_usr", 0x03)
3754           .Case("r12_usr", 0x04)
3755           .Case("sp_usr", 0x05)
3756           .Case("lr_usr", 0x06)
3757           .Case("r8_fiq", 0x08)
3758           .Case("r9_fiq", 0x09)
3759           .Case("r10_fiq", 0x0a)
3760           .Case("r11_fiq", 0x0b)
3761           .Case("r12_fiq", 0x0c)
3762           .Case("sp_fiq", 0x0d)
3763           .Case("lr_fiq", 0x0e)
3764           .Case("lr_irq", 0x10)
3765           .Case("sp_irq", 0x11)
3766           .Case("lr_svc", 0x12)
3767           .Case("sp_svc", 0x13)
3768           .Case("lr_abt", 0x14)
3769           .Case("sp_abt", 0x15)
3770           .Case("lr_und", 0x16)
3771           .Case("sp_und", 0x17)
3772           .Case("lr_mon", 0x1c)
3773           .Case("sp_mon", 0x1d)
3774           .Case("elr_hyp", 0x1e)
3775           .Case("sp_hyp", 0x1f)
3776           .Case("spsr_fiq", 0x2e)
3777           .Case("spsr_irq", 0x30)
3778           .Case("spsr_svc", 0x32)
3779           .Case("spsr_abt", 0x34)
3780           .Case("spsr_und", 0x36)
3781           .Case("spsr_mon", 0x3c)
3782           .Case("spsr_hyp", 0x3e)
3783           .Default(-1);
3784 }
3785
3786 // Maps a MClass special register string to its value for use in the
3787 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3788 // Returns -1 to signify that the string was invalid.
3789 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3790   return StringSwitch<int>(RegString.lower())
3791           .Case("apsr", 0x0)
3792           .Case("iapsr", 0x1)
3793           .Case("eapsr", 0x2)
3794           .Case("xpsr", 0x3)
3795           .Case("ipsr", 0x5)
3796           .Case("epsr", 0x6)
3797           .Case("iepsr", 0x7)
3798           .Case("msp", 0x8)
3799           .Case("psp", 0x9)
3800           .Case("primask", 0x10)
3801           .Case("basepri", 0x11)
3802           .Case("basepri_max", 0x12)
3803           .Case("faultmask", 0x13)
3804           .Case("control", 0x14)
3805           .Case("msplim", 0x0a)
3806           .Case("psplim", 0x0b)
3807           .Case("sp", 0x18)
3808           .Default(-1);
3809 }
3810
3811 // The flags here are common to those allowed for apsr in the A class cores and
3812 // those allowed for the special registers in the M class cores. Returns a
3813 // value representing which flags were present, -1 if invalid.
3814 static inline int getMClassFlagsMask(StringRef Flags) {
3815   return StringSwitch<int>(Flags)
3816           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3817                          // correct when flags are not permitted
3818           .Case("g", 0x1)
3819           .Case("nzcvq", 0x2)
3820           .Case("nzcvqg", 0x3)
3821           .Default(-1);
3822 }
3823
3824 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3825                                  const ARMSubtarget *Subtarget) {
3826   // Ensure that the register (without flags) was a valid M Class special
3827   // register.
3828   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3829   if (SYSmvalue == -1)
3830     return -1;
3831
3832   // basepri, basepri_max and faultmask are only valid for V7m.
3833   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3834     return -1;
3835
3836   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3837     Flags = "";
3838     SYSmvalue |= 0x80;
3839   }
3840
3841   if (!Subtarget->has8MSecExt() &&
3842       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3843     return -1;
3844
3845   if (!Subtarget->hasV8MMainlineOps() &&
3846       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3847        SYSmvalue == 0x93))
3848     return -1;
3849
3850   // If it was a read then we won't be expecting flags and so at this point
3851   // we can return the mask.
3852   if (IsRead) {
3853     if (Flags.empty())
3854       return SYSmvalue;
3855     else
3856       return -1;
3857   }
3858
3859   // We know we are now handling a write so need to get the mask for the flags.
3860   int Mask = getMClassFlagsMask(Flags);
3861
3862   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3863   // shouldn't have flags present.
3864   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3865     return -1;
3866
3867   // The _g and _nzcvqg versions are only valid if the DSP extension is
3868   // available.
3869   if (!Subtarget->hasDSP() && (Mask & 0x1))
3870     return -1;
3871
3872   // The register was valid so need to put the mask in the correct place
3873   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3874   // construct the operand for the instruction node.
3875   return SYSmvalue | Mask << 10;
3876 }
3877
3878 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3879   // The mask operand contains the special register (R Bit) in bit 4, whether
3880   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3881   // bits 3-0 contains the fields to be accessed in the special register, set by
3882   // the flags provided with the register.
3883   int Mask = 0;
3884   if (Reg == "apsr") {
3885     // The flags permitted for apsr are the same flags that are allowed in
3886     // M class registers. We get the flag value and then shift the flags into
3887     // the correct place to combine with the mask.
3888     Mask = getMClassFlagsMask(Flags);
3889     if (Mask == -1)
3890       return -1;
3891     return Mask << 2;
3892   }
3893
3894   if (Reg != "cpsr" && Reg != "spsr") {
3895     return -1;
3896   }
3897
3898   // This is the same as if the flags were "fc"
3899   if (Flags.empty() || Flags == "all")
3900     return Mask | 0x9;
3901
3902   // Inspect the supplied flags string and set the bits in the mask for
3903   // the relevant and valid flags allowed for cpsr and spsr.
3904   for (char Flag : Flags) {
3905     int FlagVal;
3906     switch (Flag) {
3907       case 'c':
3908         FlagVal = 0x1;
3909         break;
3910       case 'x':
3911         FlagVal = 0x2;
3912         break;
3913       case 's':
3914         FlagVal = 0x4;
3915         break;
3916       case 'f':
3917         FlagVal = 0x8;
3918         break;
3919       default:
3920         FlagVal = 0;
3921     }
3922
3923     // This avoids allowing strings where the same flag bit appears twice.
3924     if (!FlagVal || (Mask & FlagVal))
3925       return -1;
3926     Mask |= FlagVal;
3927   }
3928
3929   // If the register is spsr then we need to set the R bit.
3930   if (Reg == "spsr")
3931     Mask |= 0x10;
3932
3933   return Mask;
3934 }
3935
3936 // Lower the read_register intrinsic to ARM specific DAG nodes
3937 // using the supplied metadata string to select the instruction node to use
3938 // and the registers/masks to construct as operands for the node.
3939 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3940   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3941   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3942   bool IsThumb2 = Subtarget->isThumb2();
3943   SDLoc DL(N);
3944
3945   std::vector<SDValue> Ops;
3946   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3947
3948   if (!Ops.empty()) {
3949     // If the special register string was constructed of fields (as defined
3950     // in the ACLE) then need to lower to MRC node (32 bit) or
3951     // MRRC node(64 bit), we can make the distinction based on the number of
3952     // operands we have.
3953     unsigned Opcode;
3954     SmallVector<EVT, 3> ResTypes;
3955     if (Ops.size() == 5){
3956       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3957       ResTypes.append({ MVT::i32, MVT::Other });
3958     } else {
3959       assert(Ops.size() == 3 &&
3960               "Invalid number of fields in special register string.");
3961       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3962       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3963     }
3964
3965     Ops.push_back(getAL(CurDAG, DL));
3966     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3967     Ops.push_back(N->getOperand(0));
3968     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3969     return true;
3970   }
3971
3972   std::string SpecialReg = RegString->getString().lower();
3973
3974   int BankedReg = getBankedRegisterMask(SpecialReg);
3975   if (BankedReg != -1) {
3976     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3977             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3978             N->getOperand(0) };
3979     ReplaceNode(
3980         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3981                                   DL, MVT::i32, MVT::Other, Ops));
3982     return true;
3983   }
3984
3985   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3986   // corresponding to the register that is being read from. So we switch on the
3987   // string to find which opcode we need to use.
3988   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3989                     .Case("fpscr", ARM::VMRS)
3990                     .Case("fpexc", ARM::VMRS_FPEXC)
3991                     .Case("fpsid", ARM::VMRS_FPSID)
3992                     .Case("mvfr0", ARM::VMRS_MVFR0)
3993                     .Case("mvfr1", ARM::VMRS_MVFR1)
3994                     .Case("mvfr2", ARM::VMRS_MVFR2)
3995                     .Case("fpinst", ARM::VMRS_FPINST)
3996                     .Case("fpinst2", ARM::VMRS_FPINST2)
3997                     .Default(0);
3998
3999   // If an opcode was found then we can lower the read to a VFP instruction.
4000   if (Opcode) {
4001     if (!Subtarget->hasVFP2())
4002       return false;
4003     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4004       return false;
4005
4006     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4007             N->getOperand(0) };
4008     ReplaceNode(N,
4009                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4010     return true;
4011   }
4012
4013   // If the target is M Class then need to validate that the register string
4014   // is an acceptable value, so check that a mask can be constructed from the
4015   // string.
4016   if (Subtarget->isMClass()) {
4017     StringRef Flags = "", Reg = SpecialReg;
4018     if (Reg.endswith("_ns")) {
4019       Flags = "ns";
4020       Reg = Reg.drop_back(3);
4021     }
4022
4023     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4024     if (SYSmValue == -1)
4025       return false;
4026
4027     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4028                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4029                       N->getOperand(0) };
4030     ReplaceNode(
4031         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4032     return true;
4033   }
4034
4035   // Here we know the target is not M Class so we need to check if it is one
4036   // of the remaining possible values which are apsr, cpsr or spsr.
4037   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4038     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4039             N->getOperand(0) };
4040     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4041                                           DL, MVT::i32, MVT::Other, Ops));
4042     return true;
4043   }
4044
4045   if (SpecialReg == "spsr") {
4046     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4047             N->getOperand(0) };
4048     ReplaceNode(
4049         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4050                                   MVT::i32, MVT::Other, Ops));
4051     return true;
4052   }
4053
4054   return false;
4055 }
4056
4057 // Lower the write_register intrinsic to ARM specific DAG nodes
4058 // using the supplied metadata string to select the instruction node to use
4059 // and the registers/masks to use in the nodes
4060 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4061   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4062   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4063   bool IsThumb2 = Subtarget->isThumb2();
4064   SDLoc DL(N);
4065
4066   std::vector<SDValue> Ops;
4067   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4068
4069   if (!Ops.empty()) {
4070     // If the special register string was constructed of fields (as defined
4071     // in the ACLE) then need to lower to MCR node (32 bit) or
4072     // MCRR node(64 bit), we can make the distinction based on the number of
4073     // operands we have.
4074     unsigned Opcode;
4075     if (Ops.size() == 5) {
4076       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4077       Ops.insert(Ops.begin()+2, N->getOperand(2));
4078     } else {
4079       assert(Ops.size() == 3 &&
4080               "Invalid number of fields in special register string.");
4081       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4082       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4083       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4084     }
4085
4086     Ops.push_back(getAL(CurDAG, DL));
4087     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4088     Ops.push_back(N->getOperand(0));
4089
4090     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4091     return true;
4092   }
4093
4094   std::string SpecialReg = RegString->getString().lower();
4095   int BankedReg = getBankedRegisterMask(SpecialReg);
4096   if (BankedReg != -1) {
4097     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4098             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4099             N->getOperand(0) };
4100     ReplaceNode(
4101         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4102                                   DL, MVT::Other, Ops));
4103     return true;
4104   }
4105
4106   // The VFP registers are written to by creating SelectionDAG nodes with
4107   // opcodes corresponding to the register that is being written. So we switch
4108   // on the string to find which opcode we need to use.
4109   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4110                     .Case("fpscr", ARM::VMSR)
4111                     .Case("fpexc", ARM::VMSR_FPEXC)
4112                     .Case("fpsid", ARM::VMSR_FPSID)
4113                     .Case("fpinst", ARM::VMSR_FPINST)
4114                     .Case("fpinst2", ARM::VMSR_FPINST2)
4115                     .Default(0);
4116
4117   if (Opcode) {
4118     if (!Subtarget->hasVFP2())
4119       return false;
4120     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4121             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4122     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4123     return true;
4124   }
4125
4126   std::pair<StringRef, StringRef> Fields;
4127   Fields = StringRef(SpecialReg).rsplit('_');
4128   std::string Reg = Fields.first.str();
4129   StringRef Flags = Fields.second;
4130
4131   // If the target was M Class then need to validate the special register value
4132   // and retrieve the mask for use in the instruction node.
4133   if (Subtarget->isMClass()) {
4134     // basepri_max gets split so need to correct Reg and Flags.
4135     if (SpecialReg == "basepri_max") {
4136       Reg = SpecialReg;
4137       Flags = "";
4138     }
4139     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4140     if (SYSmValue == -1)
4141       return false;
4142
4143     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4144                       N->getOperand(2), getAL(CurDAG, DL),
4145                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4146     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4147     return true;
4148   }
4149
4150   // We then check to see if a valid mask can be constructed for one of the
4151   // register string values permitted for the A and R class cores. These values
4152   // are apsr, spsr and cpsr; these are also valid on older cores.
4153   int Mask = getARClassRegisterMask(Reg, Flags);
4154   if (Mask != -1) {
4155     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4156             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4157             N->getOperand(0) };
4158     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4159                                           DL, MVT::Other, Ops));
4160     return true;
4161   }
4162
4163   return false;
4164 }
4165
4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4167   std::vector<SDValue> AsmNodeOperands;
4168   unsigned Flag, Kind;
4169   bool Changed = false;
4170   unsigned NumOps = N->getNumOperands();
4171
4172   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4173   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4174   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4175   // respectively. Since there is no constraint to explicitly specify a
4176   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4177   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4178   // them into a GPRPair.
4179
4180   SDLoc dl(N);
4181   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4182                                    : SDValue(nullptr,0);
4183
4184   SmallVector<bool, 8> OpChanged;
4185   // Glue node will be appended late.
4186   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4187     SDValue op = N->getOperand(i);
4188     AsmNodeOperands.push_back(op);
4189
4190     if (i < InlineAsm::Op_FirstOperand)
4191       continue;
4192
4193     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4194       Flag = C->getZExtValue();
4195       Kind = InlineAsm::getKind(Flag);
4196     }
4197     else
4198       continue;
4199
4200     // Immediate operands to inline asm in the SelectionDAG are modeled with
4201     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4202     // the second is a constant with the value of the immediate. If we get here
4203     // and we have a Kind_Imm, skip the next operand, and continue.
4204     if (Kind == InlineAsm::Kind_Imm) {
4205       SDValue op = N->getOperand(++i);
4206       AsmNodeOperands.push_back(op);
4207       continue;
4208     }
4209
4210     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4211     if (NumRegs)
4212       OpChanged.push_back(false);
4213
4214     unsigned DefIdx = 0;
4215     bool IsTiedToChangedOp = false;
4216     // If it's a use that is tied with a previous def, it has no
4217     // reg class constraint.
4218     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4219       IsTiedToChangedOp = OpChanged[DefIdx];
4220
4221     // Memory operands to inline asm in the SelectionDAG are modeled with two
4222     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4223     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4224     // it doesn't get misinterpreted), and continue. We do this here because
4225     // it's important to update the OpChanged array correctly before moving on.
4226     if (Kind == InlineAsm::Kind_Mem) {
4227       SDValue op = N->getOperand(++i);
4228       AsmNodeOperands.push_back(op);
4229       continue;
4230     }
4231
4232     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4233         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4234       continue;
4235
4236     unsigned RC;
4237     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4238     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4239         || NumRegs != 2)
4240       continue;
4241
4242     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4243     SDValue V0 = N->getOperand(i+1);
4244     SDValue V1 = N->getOperand(i+2);
4245     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4246     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4247     SDValue PairedReg;
4248     MachineRegisterInfo &MRI = MF->getRegInfo();
4249
4250     if (Kind == InlineAsm::Kind_RegDef ||
4251         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4252       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4253       // the original GPRs.
4254
4255       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4256       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4257       SDValue Chain = SDValue(N,0);
4258
4259       SDNode *GU = N->getGluedUser();
4260       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4261                                                Chain.getValue(1));
4262
4263       // Extract values from a GPRPair reg and copy to the original GPR reg.
4264       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4265                                                     RegCopy);
4266       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4267                                                     RegCopy);
4268       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4269                                         RegCopy.getValue(1));
4270       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4271
4272       // Update the original glue user.
4273       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4274       Ops.push_back(T1.getValue(1));
4275       CurDAG->UpdateNodeOperands(GU, Ops);
4276     }
4277     else {
4278       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4279       // GPRPair and then pass the GPRPair to the inline asm.
4280       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4281
4282       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4283       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4284                                           Chain.getValue(1));
4285       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4286                                           T0.getValue(1));
4287       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4288
4289       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4290       // i32 VRs of inline asm with it.
4291       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4292       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4293       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4294
4295       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4296       Glue = Chain.getValue(1);
4297     }
4298
4299     Changed = true;
4300
4301     if(PairedReg.getNode()) {
4302       OpChanged[OpChanged.size() -1 ] = true;
4303       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4304       if (IsTiedToChangedOp)
4305         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4306       else
4307         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4308       // Replace the current flag.
4309       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4310           Flag, dl, MVT::i32);
4311       // Add the new register node and skip the original two GPRs.
4312       AsmNodeOperands.push_back(PairedReg);
4313       // Skip the next two GPRs.
4314       i += 2;
4315     }
4316   }
4317
4318   if (Glue.getNode())
4319     AsmNodeOperands.push_back(Glue);
4320   if (!Changed)
4321     return false;
4322
4323   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4324       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4325   New->setNodeId(-1);
4326   ReplaceNode(N, New.getNode());
4327   return true;
4328 }
4329
4330
4331 bool ARMDAGToDAGISel::
4332 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4333                              std::vector<SDValue> &OutOps) {
4334   switch(ConstraintID) {
4335   default:
4336     llvm_unreachable("Unexpected asm memory constraint");
4337   case InlineAsm::Constraint_i:
4338     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4339     //        be an immediate and not a memory constraint.
4340     LLVM_FALLTHROUGH;
4341   case InlineAsm::Constraint_m:
4342   case InlineAsm::Constraint_o:
4343   case InlineAsm::Constraint_Q:
4344   case InlineAsm::Constraint_Um:
4345   case InlineAsm::Constraint_Un:
4346   case InlineAsm::Constraint_Uq:
4347   case InlineAsm::Constraint_Us:
4348   case InlineAsm::Constraint_Ut:
4349   case InlineAsm::Constraint_Uv:
4350   case InlineAsm::Constraint_Uy:
4351     // Require the address to be in a register.  That is safe for all ARM
4352     // variants and it is hard to do anything much smarter without knowing
4353     // how the operand is used.
4354     OutOps.push_back(Op);
4355     return false;
4356   }
4357   return true;
4358 }
4359
4360 /// createARMISelDag - This pass converts a legalized DAG into a
4361 /// ARM-specific DAG, ready for instruction scheduling.
4362 ///
4363 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4364                                      CodeGenOpt::Level OptLevel) {
4365   return new ARMDAGToDAGISel(TM, OptLevel);
4366 }