contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// Try to select SBFX/UBFX instructions for ARM.
 232   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 233
 234   // Select special operations if node forms integer ABS pattern
 235   bool tryABSOp(SDNode *N);
 236
 237   bool tryReadRegister(SDNode *N);
 238   bool tryWriteRegister(SDNode *N);
 239
 240   bool tryInlineAsm(SDNode *N);
 241
 242   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 243
 244   void SelectCMP_SWAP(SDNode *N);
 245
 246   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 247   /// inline asm expressions.
 248   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 249                                     std::vector<SDValue> &OutOps) override;
 250
 251   // Form pairs of consecutive R, S, D, or Q registers.
 252   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 253   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 254   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 255   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 256
 257   // Form sequences of 4 consecutive S, D, or Q registers.
 258   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 259   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 260   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 261
 262   // Get the alignment operand for a NEON VLD or VST instruction.
 263   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 264                         bool is64BitVector);
 265
 266   /// Returns the number of instructions required to materialize the given
 267   /// constant in a register, or 3 if a literal pool load is needed.
 268   unsigned ConstantMaterializationCost(unsigned Val) const;
 269
 270   /// Checks if N is a multiplication by a constant where we can extract out a
 271   /// power of two from the constant so that it can be used in a shift, but only
 272   /// if it simplifies the materialization of the constant. Returns true if it
 273   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 274   /// out and to NewMulConst the new constant to be multiplied by.
 275   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 276                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 277
 278   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 279   /// selected when N would have been selected.
 280   void replaceDAGValue(const SDValue &N, SDValue M);
 281 };
 282 }
 283
 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 285 /// operand. If so Imm will receive the 32-bit value.
 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 287   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 288     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 289     return true;
 290   }
 291   return false;
 292 }
 293
 294 // isInt32Immediate - This method tests to see if a constant operand.
 295 // If so Imm will receive the 32 bit value.
 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 297   return isInt32Immediate(N.getNode(), Imm);
 298 }
 299
 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 301 // opcode and that it has a immediate integer right operand.
 302 // If so Imm will receive the 32 bit value.
 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 304   return N->getOpcode() == Opc &&
 305          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 306 }
 307
 308 /// \brief Check whether a particular node is a constant value representable as
 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 310 ///
 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 312 static bool isScaledConstantInRange(SDValue Node, int Scale,
 313                                     int RangeMin, int RangeMax,
 314                                     int &ScaledConstant) {
 315   assert(Scale > 0 && "Invalid scale!");
 316
 317   // Check that this is a constant.
 318   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 319   if (!C)
 320     return false;
 321
 322   ScaledConstant = (int) C->getZExtValue();
 323   if ((ScaledConstant % Scale) != 0)
 324     return false;
 325
 326   ScaledConstant /= Scale;
 327   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 328 }
 329
 330 void ARMDAGToDAGISel::PreprocessISelDAG() {
 331   if (!Subtarget->hasV6T2Ops())
 332     return;
 333
 334   bool isThumb2 = Subtarget->isThumb();
 335   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 336        E = CurDAG->allnodes_end(); I != E; ) {
 337     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 338
 339     if (N->getOpcode() != ISD::ADD)
 340       continue;
 341
 342     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 343     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 344     // trailing zeros, e.g. 1020.
 345     // Transform the expression to
 346     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 347     // of trailing zeros of c2. The left shift would be folded as an shifter
 348     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 349     // node (UBFX).
 350
 351     SDValue N0 = N->getOperand(0);
 352     SDValue N1 = N->getOperand(1);
 353     unsigned And_imm = 0;
 354     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 355       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 356         std::swap(N0, N1);
 357     }
 358     if (!And_imm)
 359       continue;
 360
 361     // Check if the AND mask is an immediate of the form: 000.....1111111100
 362     unsigned TZ = countTrailingZeros(And_imm);
 363     if (TZ != 1 && TZ != 2)
 364       // Be conservative here. Shifter operands aren't always free. e.g. On
 365       // Swift, left shifter operand of 1 / 2 for free but others are not.
 366       // e.g.
 367       //  ubfx   r3, r1, #16, #8
 368       //  ldr.w  r3, [r0, r3, lsl #2]
 369       // vs.
 370       //  mov.w  r9, #1020
 371       //  and.w  r2, r9, r1, lsr #14
 372       //  ldr    r2, [r0, r2]
 373       continue;
 374     And_imm >>= TZ;
 375     if (And_imm & (And_imm + 1))
 376       continue;
 377
 378     // Look for (and (srl X, c1), c2).
 379     SDValue Srl = N1.getOperand(0);
 380     unsigned Srl_imm = 0;
 381     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 382         (Srl_imm <= 2))
 383       continue;
 384
 385     // Make sure first operand is not a shifter operand which would prevent
 386     // folding of the left shift.
 387     SDValue CPTmp0;
 388     SDValue CPTmp1;
 389     SDValue CPTmp2;
 390     if (isThumb2) {
 391       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 392         continue;
 393     } else {
 394       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 395           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 396         continue;
 397     }
 398
 399     // Now make the transformation.
 400     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 401                           Srl.getOperand(0),
 402                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 403                                               MVT::i32));
 404     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 405                          Srl,
 406                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!Subtarget->hasVMLxHazards())
 421     return true;
 422
 423   if (!N->hasOneUse())
 424     return false;
 425
 426   SDNode *Use = *N->use_begin();
 427   if (Use->getOpcode() == ISD::CopyToReg)
 428     return true;
 429   if (Use->isMachineOpcode()) {
 430     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 431         CurDAG->getSubtarget().getInstrInfo());
 432
 433     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 434     if (MCID.mayStore())
 435       return true;
 436     unsigned Opcode = MCID.getOpcode();
 437     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 438       return true;
 439     // vmlx feeding into another vmlx. We actually want to unfold
 440     // the use later in the MLxExpansion pass. e.g.
 441     // vmla
 442     // vmla (stall 8 cycles)
 443     //
 444     // vmul (5 cycles)
 445     // vadd (5 cycles)
 446     // vmla
 447     // This adds up to about 18 - 19 cycles.
 448     //
 449     // vmla
 450     // vmul (stall 4 cycles)
 451     // vadd adds up to about 14 cycles.
 452     return TII->isFpMLxInstruction(Opcode);
 453   }
 454
 455   return false;
 456 }
 457
 458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 459                                             ARM_AM::ShiftOpc ShOpcVal,
 460                                             unsigned ShAmt) {
 461   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 462     return true;
 463   if (Shift.hasOneUse())
 464     return true;
 465   // R << 2 is free.
 466   return ShOpcVal == ARM_AM::lsl &&
 467          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 468 }
 469
 470 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 471   if (Subtarget->isThumb()) {
 472     if (Val <= 255) return 1;                               // MOV
 473     if (Subtarget->hasV6T2Ops() &&
 474         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 475       return 1; // MOVW
 476     if (Val <= 510) return 2;                               // MOV + ADDi8
 477     if (~Val <= 255) return 2;                              // MOV + MVN
 478     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 479   } else {
 480     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 481     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 482     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 483     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 484   }
 485   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 486   return 3; // Literal pool load
 487 }
 488
 489 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 490                                              unsigned MaxShift,
 491                                              unsigned &PowerOfTwo,
 492                                              SDValue &NewMulConst) const {
 493   assert(N.getOpcode() == ISD::MUL);
 494   assert(MaxShift > 0);
 495
 496   // If the multiply is used in more than one place then changing the constant
 497   // will make other uses incorrect, so don't.
 498   if (!N.hasOneUse()) return false;
 499   // Check if the multiply is by a constant
 500   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 501   if (!MulConst) return false;
 502   // If the constant is used in more than one place then modifying it will mean
 503   // we need to materialize two constants instead of one, which is a bad idea.
 504   if (!MulConst->hasOneUse()) return false;
 505   unsigned MulConstVal = MulConst->getZExtValue();
 506   if (MulConstVal == 0) return false;
 507
 508   // Find the largest power of 2 that MulConstVal is a multiple of
 509   PowerOfTwo = MaxShift;
 510   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 511     --PowerOfTwo;
 512     if (PowerOfTwo == 0) return false;
 513   }
 514
 515   // Only optimise if the new cost is better
 516   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 517   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 518   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 519   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 520   return NewCost < OldCost;
 521 }
 522
 523 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 524   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 525   CurDAG->ReplaceAllUsesWith(N, M);
 526 }
 527
 528 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 529                                               SDValue &BaseReg,
 530                                               SDValue &Opc,
 531                                               bool CheckProfitability) {
 532   if (DisableShifterOp)
 533     return false;
 534
 535   // If N is a multiply-by-constant and it's profitable to extract a shift and
 536   // use it in a shifted operand do so.
 537   if (N.getOpcode() == ISD::MUL) {
 538     unsigned PowerOfTwo = 0;
 539     SDValue NewMulConst;
 540     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 541       HandleSDNode Handle(N);
 542       SDLoc Loc(N);
 543       replaceDAGValue(N.getOperand(1), NewMulConst);
 544       BaseReg = Handle.getValue();
 545       Opc = CurDAG->getTargetConstant(
 546           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 547       return true;
 548     }
 549   }
 550
 551   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 552
 553   // Don't match base register only case. That is matched to a separate
 554   // lower complexity pattern with explicit register operand.
 555   if (ShOpcVal == ARM_AM::no_shift) return false;
 556
 557   BaseReg = N.getOperand(0);
 558   unsigned ShImmVal = 0;
 559   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 560   if (!RHS) return false;
 561   ShImmVal = RHS->getZExtValue() & 31;
 562   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 563                                   SDLoc(N), MVT::i32);
 564   return true;
 565 }
 566
 567 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 568                                               SDValue &BaseReg,
 569                                               SDValue &ShReg,
 570                                               SDValue &Opc,
 571                                               bool CheckProfitability) {
 572   if (DisableShifterOp)
 573     return false;
 574
 575   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 576
 577   // Don't match base register only case. That is matched to a separate
 578   // lower complexity pattern with explicit register operand.
 579   if (ShOpcVal == ARM_AM::no_shift) return false;
 580
 581   BaseReg = N.getOperand(0);
 582   unsigned ShImmVal = 0;
 583   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 584   if (RHS) return false;
 585
 586   ShReg = N.getOperand(1);
 587   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 588     return false;
 589   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 590                                   SDLoc(N), MVT::i32);
 591   return true;
 592 }
 593
 594
 595 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 596                                           SDValue &Base,
 597                                           SDValue &OffImm) {
 598   // Match simple R + imm12 operands.
 599
 600   // Base only.
 601   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 602       !CurDAG->isBaseWithConstantOffset(N)) {
 603     if (N.getOpcode() == ISD::FrameIndex) {
 604       // Match frame index.
 605       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 606       Base = CurDAG->getTargetFrameIndex(
 607           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 608       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 609       return true;
 610     }
 611
 612     if (N.getOpcode() == ARMISD::Wrapper &&
 613         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 614         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 615         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 616       Base = N.getOperand(0);
 617     } else
 618       Base = N;
 619     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 620     return true;
 621   }
 622
 623   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 624     int RHSC = (int)RHS->getSExtValue();
 625     if (N.getOpcode() == ISD::SUB)
 626       RHSC = -RHSC;
 627
 628     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 629       Base   = N.getOperand(0);
 630       if (Base.getOpcode() == ISD::FrameIndex) {
 631         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 632         Base = CurDAG->getTargetFrameIndex(
 633             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 634       }
 635       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 636       return true;
 637     }
 638   }
 639
 640   // Base only.
 641   Base = N;
 642   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 643   return true;
 644 }
 645
 646
 647
 648 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 649                                       SDValue &Opc) {
 650   if (N.getOpcode() == ISD::MUL &&
 651       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 652     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 653       // X * [3,5,9] -> X + X * [2,4,8] etc.
 654       int RHSC = (int)RHS->getZExtValue();
 655       if (RHSC & 1) {
 656         RHSC = RHSC & ~1;
 657         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 658         if (RHSC < 0) {
 659           AddSub = ARM_AM::sub;
 660           RHSC = - RHSC;
 661         }
 662         if (isPowerOf2_32(RHSC)) {
 663           unsigned ShAmt = Log2_32(RHSC);
 664           Base = Offset = N.getOperand(0);
 665           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 666                                                             ARM_AM::lsl),
 667                                           SDLoc(N), MVT::i32);
 668           return true;
 669         }
 670       }
 671     }
 672   }
 673
 674   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 675       // ISD::OR that is equivalent to an ISD::ADD.
 676       !CurDAG->isBaseWithConstantOffset(N))
 677     return false;
 678
 679   // Leave simple R +/- imm12 operands for LDRi12
 680   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 681     int RHSC;
 682     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 683                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 684       return false;
 685   }
 686
 687   // Otherwise this is R +/- [possibly shifted] R.
 688   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 689   ARM_AM::ShiftOpc ShOpcVal =
 690     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 691   unsigned ShAmt = 0;
 692
 693   Base   = N.getOperand(0);
 694   Offset = N.getOperand(1);
 695
 696   if (ShOpcVal != ARM_AM::no_shift) {
 697     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 698     // it.
 699     if (ConstantSDNode *Sh =
 700            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 701       ShAmt = Sh->getZExtValue();
 702       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 703         Offset = N.getOperand(1).getOperand(0);
 704       else {
 705         ShAmt = 0;
 706         ShOpcVal = ARM_AM::no_shift;
 707       }
 708     } else {
 709       ShOpcVal = ARM_AM::no_shift;
 710     }
 711   }
 712
 713   // Try matching (R shl C) + (R).
 714   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 715       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 716         N.getOperand(0).hasOneUse())) {
 717     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 718     if (ShOpcVal != ARM_AM::no_shift) {
 719       // Check to see if the RHS of the shift is a constant, if not, we can't
 720       // fold it.
 721       if (ConstantSDNode *Sh =
 722           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 723         ShAmt = Sh->getZExtValue();
 724         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 725           Offset = N.getOperand(0).getOperand(0);
 726           Base = N.getOperand(1);
 727         } else {
 728           ShAmt = 0;
 729           ShOpcVal = ARM_AM::no_shift;
 730         }
 731       } else {
 732         ShOpcVal = ARM_AM::no_shift;
 733       }
 734     }
 735   }
 736
 737   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 738   // and use it in a shifted operand do so.
 739   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 740     unsigned PowerOfTwo = 0;
 741     SDValue NewMulConst;
 742     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 743       HandleSDNode Handle(Offset);
 744       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 745       Offset = Handle.getValue();
 746       ShAmt = PowerOfTwo;
 747       ShOpcVal = ARM_AM::lsl;
 748     }
 749   }
 750
 751   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 752                                   SDLoc(N), MVT::i32);
 753   return true;
 754 }
 755
 756
 757 //-----
 758
 759 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 760                                                      SDValue &Base,
 761                                                      SDValue &Offset,
 762                                                      SDValue &Opc) {
 763   if (N.getOpcode() == ISD::MUL &&
 764       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 765     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 766       // X * [3,5,9] -> X + X * [2,4,8] etc.
 767       int RHSC = (int)RHS->getZExtValue();
 768       if (RHSC & 1) {
 769         RHSC = RHSC & ~1;
 770         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 771         if (RHSC < 0) {
 772           AddSub = ARM_AM::sub;
 773           RHSC = - RHSC;
 774         }
 775         if (isPowerOf2_32(RHSC)) {
 776           unsigned ShAmt = Log2_32(RHSC);
 777           Base = Offset = N.getOperand(0);
 778           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 779                                                             ARM_AM::lsl),
 780                                           SDLoc(N), MVT::i32);
 781           return AM2_SHOP;
 782         }
 783       }
 784     }
 785   }
 786
 787   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 788       // ISD::OR that is equivalent to an ADD.
 789       !CurDAG->isBaseWithConstantOffset(N)) {
 790     Base = N;
 791     if (N.getOpcode() == ISD::FrameIndex) {
 792       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 793       Base = CurDAG->getTargetFrameIndex(
 794           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 795     } else if (N.getOpcode() == ARMISD::Wrapper &&
 796                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 797                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 798                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 799       Base = N.getOperand(0);
 800     }
 801     Offset = CurDAG->getRegister(0, MVT::i32);
 802     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 803                                                       ARM_AM::no_shift),
 804                                     SDLoc(N), MVT::i32);
 805     return AM2_BASE;
 806   }
 807
 808   // Match simple R +/- imm12 operands.
 809   if (N.getOpcode() != ISD::SUB) {
 810     int RHSC;
 811     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 812                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 813       Base = N.getOperand(0);
 814       if (Base.getOpcode() == ISD::FrameIndex) {
 815         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 816         Base = CurDAG->getTargetFrameIndex(
 817             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 818       }
 819       Offset = CurDAG->getRegister(0, MVT::i32);
 820
 821       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 822       if (RHSC < 0) {
 823         AddSub = ARM_AM::sub;
 824         RHSC = - RHSC;
 825       }
 826       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 827                                                         ARM_AM::no_shift),
 828                                       SDLoc(N), MVT::i32);
 829       return AM2_BASE;
 830     }
 831   }
 832
 833   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 834     // Compute R +/- (R << N) and reuse it.
 835     Base = N;
 836     Offset = CurDAG->getRegister(0, MVT::i32);
 837     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 838                                                       ARM_AM::no_shift),
 839                                     SDLoc(N), MVT::i32);
 840     return AM2_BASE;
 841   }
 842
 843   // Otherwise this is R +/- [possibly shifted] R.
 844   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 845   ARM_AM::ShiftOpc ShOpcVal =
 846     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 847   unsigned ShAmt = 0;
 848
 849   Base   = N.getOperand(0);
 850   Offset = N.getOperand(1);
 851
 852   if (ShOpcVal != ARM_AM::no_shift) {
 853     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 854     // it.
 855     if (ConstantSDNode *Sh =
 856            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 857       ShAmt = Sh->getZExtValue();
 858       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 859         Offset = N.getOperand(1).getOperand(0);
 860       else {
 861         ShAmt = 0;
 862         ShOpcVal = ARM_AM::no_shift;
 863       }
 864     } else {
 865       ShOpcVal = ARM_AM::no_shift;
 866     }
 867   }
 868
 869   // Try matching (R shl C) + (R).
 870   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 871       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 872         N.getOperand(0).hasOneUse())) {
 873     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 874     if (ShOpcVal != ARM_AM::no_shift) {
 875       // Check to see if the RHS of the shift is a constant, if not, we can't
 876       // fold it.
 877       if (ConstantSDNode *Sh =
 878           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 879         ShAmt = Sh->getZExtValue();
 880         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 881           Offset = N.getOperand(0).getOperand(0);
 882           Base = N.getOperand(1);
 883         } else {
 884           ShAmt = 0;
 885           ShOpcVal = ARM_AM::no_shift;
 886         }
 887       } else {
 888         ShOpcVal = ARM_AM::no_shift;
 889       }
 890     }
 891   }
 892
 893   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 894                                   SDLoc(N), MVT::i32);
 895   return AM2_SHOP;
 896 }
 897
 898 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 899                                             SDValue &Offset, SDValue &Opc) {
 900   unsigned Opcode = Op->getOpcode();
 901   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 902     ? cast<LoadSDNode>(Op)->getAddressingMode()
 903     : cast<StoreSDNode>(Op)->getAddressingMode();
 904   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 905     ? ARM_AM::add : ARM_AM::sub;
 906   int Val;
 907   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 908     return false;
 909
 910   Offset = N;
 911   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 912   unsigned ShAmt = 0;
 913   if (ShOpcVal != ARM_AM::no_shift) {
 914     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 915     // it.
 916     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 917       ShAmt = Sh->getZExtValue();
 918       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 919         Offset = N.getOperand(0);
 920       else {
 921         ShAmt = 0;
 922         ShOpcVal = ARM_AM::no_shift;
 923       }
 924     } else {
 925       ShOpcVal = ARM_AM::no_shift;
 926     }
 927   }
 928
 929   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 930                                   SDLoc(N), MVT::i32);
 931   return true;
 932 }
 933
 934 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 935                                             SDValue &Offset, SDValue &Opc) {
 936   unsigned Opcode = Op->getOpcode();
 937   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 938     ? cast<LoadSDNode>(Op)->getAddressingMode()
 939     : cast<StoreSDNode>(Op)->getAddressingMode();
 940   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 941     ? ARM_AM::add : ARM_AM::sub;
 942   int Val;
 943   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 944     if (AddSub == ARM_AM::sub) Val *= -1;
 945     Offset = CurDAG->getRegister(0, MVT::i32);
 946     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 947     return true;
 948   }
 949
 950   return false;
 951 }
 952
 953
 954 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 955                                             SDValue &Offset, SDValue &Opc) {
 956   unsigned Opcode = Op->getOpcode();
 957   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 958     ? cast<LoadSDNode>(Op)->getAddressingMode()
 959     : cast<StoreSDNode>(Op)->getAddressingMode();
 960   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 961     ? ARM_AM::add : ARM_AM::sub;
 962   int Val;
 963   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 964     Offset = CurDAG->getRegister(0, MVT::i32);
 965     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 966                                                       ARM_AM::no_shift),
 967                                     SDLoc(Op), MVT::i32);
 968     return true;
 969   }
 970
 971   return false;
 972 }
 973
 974 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 975   Base = N;
 976   return true;
 977 }
 978
 979 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 980                                       SDValue &Base, SDValue &Offset,
 981                                       SDValue &Opc) {
 982   if (N.getOpcode() == ISD::SUB) {
 983     // X - C  is canonicalize to X + -C, no need to handle it here.
 984     Base = N.getOperand(0);
 985     Offset = N.getOperand(1);
 986     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 987                                     MVT::i32);
 988     return true;
 989   }
 990
 991   if (!CurDAG->isBaseWithConstantOffset(N)) {
 992     Base = N;
 993     if (N.getOpcode() == ISD::FrameIndex) {
 994       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 995       Base = CurDAG->getTargetFrameIndex(
 996           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 997     }
 998     Offset = CurDAG->getRegister(0, MVT::i32);
 999     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1000                                     MVT::i32);
1001     return true;
1002   }
1003
1004   // If the RHS is +/- imm8, fold into addr mode.
1005   int RHSC;
1006   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1007                               -256 + 1, 256, RHSC)) { // 8 bits.
1008     Base = N.getOperand(0);
1009     if (Base.getOpcode() == ISD::FrameIndex) {
1010       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1011       Base = CurDAG->getTargetFrameIndex(
1012           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1013     }
1014     Offset = CurDAG->getRegister(0, MVT::i32);
1015
1016     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1017     if (RHSC < 0) {
1018       AddSub = ARM_AM::sub;
1019       RHSC = -RHSC;
1020     }
1021     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1022                                     MVT::i32);
1023     return true;
1024   }
1025
1026   Base = N.getOperand(0);
1027   Offset = N.getOperand(1);
1028   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1029                                   MVT::i32);
1030   return true;
1031 }
1032
1033 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1034                                             SDValue &Offset, SDValue &Opc) {
1035   unsigned Opcode = Op->getOpcode();
1036   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1037     ? cast<LoadSDNode>(Op)->getAddressingMode()
1038     : cast<StoreSDNode>(Op)->getAddressingMode();
1039   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1040     ? ARM_AM::add : ARM_AM::sub;
1041   int Val;
1042   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1043     Offset = CurDAG->getRegister(0, MVT::i32);
1044     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1045                                     MVT::i32);
1046     return true;
1047   }
1048
1049   Offset = N;
1050   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1051                                   MVT::i32);
1052   return true;
1053 }
1054
1055 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1056                                       SDValue &Base, SDValue &Offset) {
1057   if (!CurDAG->isBaseWithConstantOffset(N)) {
1058     Base = N;
1059     if (N.getOpcode() == ISD::FrameIndex) {
1060       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1061       Base = CurDAG->getTargetFrameIndex(
1062           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1063     } else if (N.getOpcode() == ARMISD::Wrapper &&
1064                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1065                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1066                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1067       Base = N.getOperand(0);
1068     }
1069     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1070                                        SDLoc(N), MVT::i32);
1071     return true;
1072   }
1073
1074   // If the RHS is +/- imm8, fold into addr mode.
1075   int RHSC;
1076   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1077                               -256 + 1, 256, RHSC)) {
1078     Base = N.getOperand(0);
1079     if (Base.getOpcode() == ISD::FrameIndex) {
1080       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1081       Base = CurDAG->getTargetFrameIndex(
1082           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1083     }
1084
1085     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1086     if (RHSC < 0) {
1087       AddSub = ARM_AM::sub;
1088       RHSC = -RHSC;
1089     }
1090     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1091                                        SDLoc(N), MVT::i32);
1092     return true;
1093   }
1094
1095   Base = N;
1096   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1097                                      SDLoc(N), MVT::i32);
1098   return true;
1099 }
1100
1101 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1102                                       SDValue &Align) {
1103   Addr = N;
1104
1105   unsigned Alignment = 0;
1106
1107   MemSDNode *MemN = cast<MemSDNode>(Parent);
1108
1109   if (isa<LSBaseSDNode>(MemN) ||
1110       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1111         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1112        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1113     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1114     // The maximum alignment is equal to the memory size being referenced.
1115     unsigned MMOAlign = MemN->getAlignment();
1116     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1117     if (MMOAlign >= MemSize && MemSize > 1)
1118       Alignment = MemSize;
1119   } else {
1120     // All other uses of addrmode6 are for intrinsics.  For now just record
1121     // the raw alignment value; it will be refined later based on the legal
1122     // alignment operands for the intrinsic.
1123     Alignment = MemN->getAlignment();
1124   }
1125
1126   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1127   return true;
1128 }
1129
1130 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1131                                             SDValue &Offset) {
1132   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1133   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1134   if (AM != ISD::POST_INC)
1135     return false;
1136   Offset = N;
1137   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1138     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1139       Offset = CurDAG->getRegister(0, MVT::i32);
1140   }
1141   return true;
1142 }
1143
1144 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1145                                        SDValue &Offset, SDValue &Label) {
1146   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1147     Offset = N.getOperand(0);
1148     SDValue N1 = N.getOperand(1);
1149     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1150                                       SDLoc(N), MVT::i32);
1151     return true;
1152   }
1153
1154   return false;
1155 }
1156
1157
1158 //===----------------------------------------------------------------------===//
1159 //                         Thumb Addressing Modes
1160 //===----------------------------------------------------------------------===//
1161
1162 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1163                                             SDValue &Base, SDValue &Offset){
1164   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1165     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1166     if (!NC || !NC->isNullValue())
1167       return false;
1168
1169     Base = Offset = N;
1170     return true;
1171   }
1172
1173   Base = N.getOperand(0);
1174   Offset = N.getOperand(1);
1175   return true;
1176 }
1177
1178 bool
1179 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1180                                           SDValue &Base, SDValue &OffImm) {
1181   if (!CurDAG->isBaseWithConstantOffset(N)) {
1182     if (N.getOpcode() == ISD::ADD) {
1183       return false; // We want to select register offset instead
1184     } else if (N.getOpcode() == ARMISD::Wrapper &&
1185         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1186         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1187         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1189       Base = N.getOperand(0);
1190     } else {
1191       Base = N;
1192     }
1193
1194     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1195     return true;
1196   }
1197
1198   // If the RHS is + imm5 * scale, fold into addr mode.
1199   int RHSC;
1200   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1201     Base = N.getOperand(0);
1202     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1203     return true;
1204   }
1205
1206   // Offset is too large, so use register offset instead.
1207   return false;
1208 }
1209
1210 bool
1211 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1212                                            SDValue &OffImm) {
1213   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1214 }
1215
1216 bool
1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1218                                            SDValue &OffImm) {
1219   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1220 }
1221
1222 bool
1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1224                                            SDValue &OffImm) {
1225   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226 }
1227
1228 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1229                                             SDValue &Base, SDValue &OffImm) {
1230   if (N.getOpcode() == ISD::FrameIndex) {
1231     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1232     // Only multiples of 4 are allowed for the offset, so the frame object
1233     // alignment must be at least 4.
1234     MachineFrameInfo &MFI = MF->getFrameInfo();
1235     if (MFI.getObjectAlignment(FI) < 4)
1236       MFI.setObjectAlignment(FI, 4);
1237     Base = CurDAG->getTargetFrameIndex(
1238         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1240     return true;
1241   }
1242
1243   if (!CurDAG->isBaseWithConstantOffset(N))
1244     return false;
1245
1246   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1247   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1248       (LHSR && LHSR->getReg() == ARM::SP)) {
1249     // If the RHS is + imm8 * scale, fold into addr mode.
1250     int RHSC;
1251     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1252       Base = N.getOperand(0);
1253       if (Base.getOpcode() == ISD::FrameIndex) {
1254         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1256         // indexed by the LHS must be 4-byte aligned.
1257         MachineFrameInfo &MFI = MF->getFrameInfo();
1258         if (MFI.getObjectAlignment(FI) < 4)
1259           MFI.setObjectAlignment(FI, 4);
1260         Base = CurDAG->getTargetFrameIndex(
1261             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1262       }
1263       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1264       return true;
1265     }
1266   }
1267
1268   return false;
1269 }
1270
1271
1272 //===----------------------------------------------------------------------===//
1273 //                        Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1275
1276
1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278                                             SDValue &Base, SDValue &OffImm) {
1279   // Match simple R + imm12 operands.
1280
1281   // Base only.
1282   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283       !CurDAG->isBaseWithConstantOffset(N)) {
1284     if (N.getOpcode() == ISD::FrameIndex) {
1285       // Match frame index.
1286       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287       Base = CurDAG->getTargetFrameIndex(
1288           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290       return true;
1291     }
1292
1293     if (N.getOpcode() == ARMISD::Wrapper &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297       Base = N.getOperand(0);
1298       if (Base.getOpcode() == ISD::TargetConstantPool)
1299         return false;  // We want to select t2LDRpci instead.
1300     } else
1301       Base = N;
1302     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303     return true;
1304   }
1305
1306   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307     if (SelectT2AddrModeImm8(N, Base, OffImm))
1308       // Let t2LDRi8 handle (R - imm8).
1309       return false;
1310
1311     int RHSC = (int)RHS->getZExtValue();
1312     if (N.getOpcode() == ISD::SUB)
1313       RHSC = -RHSC;
1314
1315     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316       Base   = N.getOperand(0);
1317       if (Base.getOpcode() == ISD::FrameIndex) {
1318         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319         Base = CurDAG->getTargetFrameIndex(
1320             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321       }
1322       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323       return true;
1324     }
1325   }
1326
1327   // Base only.
1328   Base = N;
1329   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330   return true;
1331 }
1332
1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1334                                            SDValue &Base, SDValue &OffImm) {
1335   // Match simple R - imm8 operands.
1336   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1337       !CurDAG->isBaseWithConstantOffset(N))
1338     return false;
1339
1340   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341     int RHSC = (int)RHS->getSExtValue();
1342     if (N.getOpcode() == ISD::SUB)
1343       RHSC = -RHSC;
1344
1345     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1346       Base = N.getOperand(0);
1347       if (Base.getOpcode() == ISD::FrameIndex) {
1348         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1349         Base = CurDAG->getTargetFrameIndex(
1350             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351       }
1352       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1353       return true;
1354     }
1355   }
1356
1357   return false;
1358 }
1359
1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1361                                                  SDValue &OffImm){
1362   unsigned Opcode = Op->getOpcode();
1363   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1364     ? cast<LoadSDNode>(Op)->getAddressingMode()
1365     : cast<StoreSDNode>(Op)->getAddressingMode();
1366   int RHSC;
1367   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1368     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1369       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1370       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1371     return true;
1372   }
1373
1374   return false;
1375 }
1376
1377 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1378                                             SDValue &Base,
1379                                             SDValue &OffReg, SDValue &ShImm) {
1380   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1381   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382     return false;
1383
1384   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1385   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1386     int RHSC = (int)RHS->getZExtValue();
1387     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1388       return false;
1389     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1390       return false;
1391   }
1392
1393   // Look for (R + R) or (R + (R << [1,2,3])).
1394   unsigned ShAmt = 0;
1395   Base   = N.getOperand(0);
1396   OffReg = N.getOperand(1);
1397
1398   // Swap if it is ((R << c) + R).
1399   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1400   if (ShOpcVal != ARM_AM::lsl) {
1401     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1402     if (ShOpcVal == ARM_AM::lsl)
1403       std::swap(Base, OffReg);
1404   }
1405
1406   if (ShOpcVal == ARM_AM::lsl) {
1407     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1408     // it.
1409     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1410       ShAmt = Sh->getZExtValue();
1411       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1412         OffReg = OffReg.getOperand(0);
1413       else {
1414         ShAmt = 0;
1415       }
1416     }
1417   }
1418
1419   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1420   // and use it in a shifted operand do so.
1421   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1422     unsigned PowerOfTwo = 0;
1423     SDValue NewMulConst;
1424     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1425       HandleSDNode Handle(OffReg);
1426       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1427       OffReg = Handle.getValue();
1428       ShAmt = PowerOfTwo;
1429     }
1430   }
1431
1432   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1433
1434   return true;
1435 }
1436
1437 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1438                                                 SDValue &OffImm) {
1439   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1440   // instructions.
1441   Base = N;
1442   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1443
1444   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1445     return true;
1446
1447   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1448   if (!RHS)
1449     return true;
1450
1451   uint32_t RHSC = (int)RHS->getZExtValue();
1452   if (RHSC > 1020 || RHSC % 4 != 0)
1453     return true;
1454
1455   Base = N.getOperand(0);
1456   if (Base.getOpcode() == ISD::FrameIndex) {
1457     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1458     Base = CurDAG->getTargetFrameIndex(
1459         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1460   }
1461
1462   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1463   return true;
1464 }
1465
1466 //===--------------------------------------------------------------------===//
1467
1468 /// getAL - Returns a ARMCC::AL immediate node.
1469 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1470   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1471 }
1472
1473 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1474   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1475   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1476   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1477 }
1478
1479 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1480   LoadSDNode *LD = cast<LoadSDNode>(N);
1481   ISD::MemIndexedMode AM = LD->getAddressingMode();
1482   if (AM == ISD::UNINDEXED)
1483     return false;
1484
1485   EVT LoadedVT = LD->getMemoryVT();
1486   SDValue Offset, AMOpc;
1487   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1488   unsigned Opcode = 0;
1489   bool Match = false;
1490   if (LoadedVT == MVT::i32 && isPre &&
1491       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1492     Opcode = ARM::LDR_PRE_IMM;
1493     Match = true;
1494   } else if (LoadedVT == MVT::i32 && !isPre &&
1495       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1496     Opcode = ARM::LDR_POST_IMM;
1497     Match = true;
1498   } else if (LoadedVT == MVT::i32 &&
1499       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1500     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1501     Match = true;
1502
1503   } else if (LoadedVT == MVT::i16 &&
1504              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1505     Match = true;
1506     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1507       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1508       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1509   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1510     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1511       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1512         Match = true;
1513         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1514       }
1515     } else {
1516       if (isPre &&
1517           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1518         Match = true;
1519         Opcode = ARM::LDRB_PRE_IMM;
1520       } else if (!isPre &&
1521                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1522         Match = true;
1523         Opcode = ARM::LDRB_POST_IMM;
1524       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1525         Match = true;
1526         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1527       }
1528     }
1529   }
1530
1531   if (Match) {
1532     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1533       SDValue Chain = LD->getChain();
1534       SDValue Base = LD->getBasePtr();
1535       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1536                        CurDAG->getRegister(0, MVT::i32), Chain };
1537       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1538                                            MVT::Other, Ops);
1539       transferMemOperands(N, New);
1540       ReplaceNode(N, New);
1541       return true;
1542     } else {
1543       SDValue Chain = LD->getChain();
1544       SDValue Base = LD->getBasePtr();
1545       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1546                        CurDAG->getRegister(0, MVT::i32), Chain };
1547       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1548                                            MVT::Other, Ops);
1549       transferMemOperands(N, New);
1550       ReplaceNode(N, New);
1551       return true;
1552     }
1553   }
1554
1555   return false;
1556 }
1557
1558 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1559   LoadSDNode *LD = cast<LoadSDNode>(N);
1560   EVT LoadedVT = LD->getMemoryVT();
1561   ISD::MemIndexedMode AM = LD->getAddressingMode();
1562   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1563       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1564     return false;
1565
1566   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1567   if (!COffs || COffs->getZExtValue() != 4)
1568     return false;
1569
1570   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1571   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1572   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1573   // ISel.
1574   SDValue Chain = LD->getChain();
1575   SDValue Base = LD->getBasePtr();
1576   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1577                    CurDAG->getRegister(0, MVT::i32), Chain };
1578   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1579                                        MVT::i32, MVT::Other, Ops);
1580   transferMemOperands(N, New);
1581   ReplaceNode(N, New);
1582   return true;
1583 }
1584
1585 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1586   LoadSDNode *LD = cast<LoadSDNode>(N);
1587   ISD::MemIndexedMode AM = LD->getAddressingMode();
1588   if (AM == ISD::UNINDEXED)
1589     return false;
1590
1591   EVT LoadedVT = LD->getMemoryVT();
1592   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1593   SDValue Offset;
1594   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1595   unsigned Opcode = 0;
1596   bool Match = false;
1597   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1598     switch (LoadedVT.getSimpleVT().SimpleTy) {
1599     case MVT::i32:
1600       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1601       break;
1602     case MVT::i16:
1603       if (isSExtLd)
1604         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1605       else
1606         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1607       break;
1608     case MVT::i8:
1609     case MVT::i1:
1610       if (isSExtLd)
1611         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1612       else
1613         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1614       break;
1615     default:
1616       return false;
1617     }
1618     Match = true;
1619   }
1620
1621   if (Match) {
1622     SDValue Chain = LD->getChain();
1623     SDValue Base = LD->getBasePtr();
1624     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1625                      CurDAG->getRegister(0, MVT::i32), Chain };
1626     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1627                                          MVT::Other, Ops);
1628     transferMemOperands(N, New);
1629     ReplaceNode(N, New);
1630     return true;
1631   }
1632
1633   return false;
1634 }
1635
1636 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1637 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1638   SDLoc dl(V0.getNode());
1639   SDValue RegClass =
1640     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1641   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1642   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1643   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1644   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1645 }
1646
1647 /// \brief Form a D register from a pair of S registers.
1648 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1649   SDLoc dl(V0.getNode());
1650   SDValue RegClass =
1651     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1652   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1653   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1654   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1655   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1656 }
1657
1658 /// \brief Form a quad register from a pair of D registers.
1659 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1660   SDLoc dl(V0.getNode());
1661   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1662                                                MVT::i32);
1663   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1664   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1665   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1666   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667 }
1668
1669 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1670 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1671   SDLoc dl(V0.getNode());
1672   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1673                                                MVT::i32);
1674   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1675   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1676   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1677   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1678 }
1679
1680 /// \brief Form 4 consecutive S registers.
1681 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1682                                    SDValue V2, SDValue V3) {
1683   SDLoc dl(V0.getNode());
1684   SDValue RegClass =
1685     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1686   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1687   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1688   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1689   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1690   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1691                                     V2, SubReg2, V3, SubReg3 };
1692   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1693 }
1694
1695 /// \brief Form 4 consecutive D registers.
1696 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1697                                    SDValue V2, SDValue V3) {
1698   SDLoc dl(V0.getNode());
1699   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1700                                                MVT::i32);
1701   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1702   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1703   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1704   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1705   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1706                                     V2, SubReg2, V3, SubReg3 };
1707   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1708 }
1709
1710 /// \brief Form 4 consecutive Q registers.
1711 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1712                                    SDValue V2, SDValue V3) {
1713   SDLoc dl(V0.getNode());
1714   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1715                                                MVT::i32);
1716   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1717   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1718   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1719   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1720   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1721                                     V2, SubReg2, V3, SubReg3 };
1722   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1723 }
1724
1725 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1726 /// of a NEON VLD or VST instruction.  The supported values depend on the
1727 /// number of registers being loaded.
1728 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1729                                        unsigned NumVecs, bool is64BitVector) {
1730   unsigned NumRegs = NumVecs;
1731   if (!is64BitVector && NumVecs < 3)
1732     NumRegs *= 2;
1733
1734   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1735   if (Alignment >= 32 && NumRegs == 4)
1736     Alignment = 32;
1737   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1738     Alignment = 16;
1739   else if (Alignment >= 8)
1740     Alignment = 8;
1741   else
1742     Alignment = 0;
1743
1744   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1745 }
1746
1747 static bool isVLDfixed(unsigned Opc)
1748 {
1749   switch (Opc) {
1750   default: return false;
1751   case ARM::VLD1d8wb_fixed : return true;
1752   case ARM::VLD1d16wb_fixed : return true;
1753   case ARM::VLD1d64Qwb_fixed : return true;
1754   case ARM::VLD1d32wb_fixed : return true;
1755   case ARM::VLD1d64wb_fixed : return true;
1756   case ARM::VLD1d64TPseudoWB_fixed : return true;
1757   case ARM::VLD1d64QPseudoWB_fixed : return true;
1758   case ARM::VLD1q8wb_fixed : return true;
1759   case ARM::VLD1q16wb_fixed : return true;
1760   case ARM::VLD1q32wb_fixed : return true;
1761   case ARM::VLD1q64wb_fixed : return true;
1762   case ARM::VLD1DUPd8wb_fixed : return true;
1763   case ARM::VLD1DUPd16wb_fixed : return true;
1764   case ARM::VLD1DUPd32wb_fixed : return true;
1765   case ARM::VLD1DUPq8wb_fixed : return true;
1766   case ARM::VLD1DUPq16wb_fixed : return true;
1767   case ARM::VLD1DUPq32wb_fixed : return true;
1768   case ARM::VLD2d8wb_fixed : return true;
1769   case ARM::VLD2d16wb_fixed : return true;
1770   case ARM::VLD2d32wb_fixed : return true;
1771   case ARM::VLD2q8PseudoWB_fixed : return true;
1772   case ARM::VLD2q16PseudoWB_fixed : return true;
1773   case ARM::VLD2q32PseudoWB_fixed : return true;
1774   case ARM::VLD2DUPd8wb_fixed : return true;
1775   case ARM::VLD2DUPd16wb_fixed : return true;
1776   case ARM::VLD2DUPd32wb_fixed : return true;
1777   }
1778 }
1779
1780 static bool isVSTfixed(unsigned Opc)
1781 {
1782   switch (Opc) {
1783   default: return false;
1784   case ARM::VST1d8wb_fixed : return true;
1785   case ARM::VST1d16wb_fixed : return true;
1786   case ARM::VST1d32wb_fixed : return true;
1787   case ARM::VST1d64wb_fixed : return true;
1788   case ARM::VST1q8wb_fixed : return true;
1789   case ARM::VST1q16wb_fixed : return true;
1790   case ARM::VST1q32wb_fixed : return true;
1791   case ARM::VST1q64wb_fixed : return true;
1792   case ARM::VST1d64TPseudoWB_fixed : return true;
1793   case ARM::VST1d64QPseudoWB_fixed : return true;
1794   case ARM::VST2d8wb_fixed : return true;
1795   case ARM::VST2d16wb_fixed : return true;
1796   case ARM::VST2d32wb_fixed : return true;
1797   case ARM::VST2q8PseudoWB_fixed : return true;
1798   case ARM::VST2q16PseudoWB_fixed : return true;
1799   case ARM::VST2q32PseudoWB_fixed : return true;
1800   }
1801 }
1802
1803 // Get the register stride update opcode of a VLD/VST instruction that
1804 // is otherwise equivalent to the given fixed stride updating instruction.
1805 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1806   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1807     && "Incorrect fixed stride updating instruction.");
1808   switch (Opc) {
1809   default: break;
1810   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1811   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1812   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1813   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1814   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1815   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1816   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1817   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1818   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1819   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1820   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1821   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1822   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1823   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1824   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1825   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1826   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1827   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1828
1829   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1830   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1831   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1832   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1833   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1834   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1835   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1836   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1837   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1838   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1839
1840   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1841   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1842   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1843   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1844   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1845   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1846
1847   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1848   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1849   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1850   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1851   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1852   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1853
1854   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1855   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1856   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1857   }
1858   return Opc; // If not one we handle, return it unchanged.
1859 }
1860
1861 /// Returns true if the given increment is a Constant known to be equal to the
1862 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1863 /// be used.
1864 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1865   auto C = dyn_cast<ConstantSDNode>(Inc);
1866   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1867 }
1868
1869 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1870                                 const uint16_t *DOpcodes,
1871                                 const uint16_t *QOpcodes0,
1872                                 const uint16_t *QOpcodes1) {
1873   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1874   SDLoc dl(N);
1875
1876   SDValue MemAddr, Align;
1877   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1878   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1879     return;
1880
1881   SDValue Chain = N->getOperand(0);
1882   EVT VT = N->getValueType(0);
1883   bool is64BitVector = VT.is64BitVector();
1884   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1885
1886   unsigned OpcodeIndex;
1887   switch (VT.getSimpleVT().SimpleTy) {
1888   default: llvm_unreachable("unhandled vld type");
1889     // Double-register operations:
1890   case MVT::v8i8:  OpcodeIndex = 0; break;
1891   case MVT::v4i16: OpcodeIndex = 1; break;
1892   case MVT::v2f32:
1893   case MVT::v2i32: OpcodeIndex = 2; break;
1894   case MVT::v1i64: OpcodeIndex = 3; break;
1895     // Quad-register operations:
1896   case MVT::v16i8: OpcodeIndex = 0; break;
1897   case MVT::v8i16: OpcodeIndex = 1; break;
1898   case MVT::v4f32:
1899   case MVT::v4i32: OpcodeIndex = 2; break;
1900   case MVT::v2f64:
1901   case MVT::v2i64: OpcodeIndex = 3;
1902     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1903     break;
1904   }
1905
1906   EVT ResTy;
1907   if (NumVecs == 1)
1908     ResTy = VT;
1909   else {
1910     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1911     if (!is64BitVector)
1912       ResTyElts *= 2;
1913     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1914   }
1915   std::vector<EVT> ResTys;
1916   ResTys.push_back(ResTy);
1917   if (isUpdating)
1918     ResTys.push_back(MVT::i32);
1919   ResTys.push_back(MVT::Other);
1920
1921   SDValue Pred = getAL(CurDAG, dl);
1922   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1923   SDNode *VLd;
1924   SmallVector<SDValue, 7> Ops;
1925
1926   // Double registers and VLD1/VLD2 quad registers are directly supported.
1927   if (is64BitVector || NumVecs <= 2) {
1928     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1929                     QOpcodes0[OpcodeIndex]);
1930     Ops.push_back(MemAddr);
1931     Ops.push_back(Align);
1932     if (isUpdating) {
1933       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1934       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1935       // case entirely when the rest are updated to that form, too.
1936       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1937       if ((NumVecs <= 2) && !IsImmUpdate)
1938         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1939       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1940       // check for that explicitly too. Horribly hacky, but temporary.
1941       if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
1942         Ops.push_back(IsImmUpdate ? Reg0 : Inc);
1943     }
1944     Ops.push_back(Pred);
1945     Ops.push_back(Reg0);
1946     Ops.push_back(Chain);
1947     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1948
1949   } else {
1950     // Otherwise, quad registers are loaded with two separate instructions,
1951     // where one loads the even registers and the other loads the odd registers.
1952     EVT AddrTy = MemAddr.getValueType();
1953
1954     // Load the even subregs.  This is always an updating load, so that it
1955     // provides the address to the second load for the odd subregs.
1956     SDValue ImplDef =
1957       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1958     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1959     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1960                                           ResTy, AddrTy, MVT::Other, OpsA);
1961     Chain = SDValue(VLdA, 2);
1962
1963     // Load the odd subregs.
1964     Ops.push_back(SDValue(VLdA, 1));
1965     Ops.push_back(Align);
1966     if (isUpdating) {
1967       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1968       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1969              "only constant post-increment update allowed for VLD3/4");
1970       (void)Inc;
1971       Ops.push_back(Reg0);
1972     }
1973     Ops.push_back(SDValue(VLdA, 0));
1974     Ops.push_back(Pred);
1975     Ops.push_back(Reg0);
1976     Ops.push_back(Chain);
1977     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1978   }
1979
1980   // Transfer memoperands.
1981   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1982   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1983   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1984
1985   if (NumVecs == 1) {
1986     ReplaceNode(N, VLd);
1987     return;
1988   }
1989
1990   // Extract out the subregisters.
1991   SDValue SuperReg = SDValue(VLd, 0);
1992   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1993                     ARM::qsub_3 == ARM::qsub_0 + 3,
1994                 "Unexpected subreg numbering");
1995   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1996   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1997     ReplaceUses(SDValue(N, Vec),
1998                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1999   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2000   if (isUpdating)
2001     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2002   CurDAG->RemoveDeadNode(N);
2003 }
2004
2005 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2006                                 const uint16_t *DOpcodes,
2007                                 const uint16_t *QOpcodes0,
2008                                 const uint16_t *QOpcodes1) {
2009   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2010   SDLoc dl(N);
2011
2012   SDValue MemAddr, Align;
2013   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2014   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2015   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2016     return;
2017
2018   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2019   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2020
2021   SDValue Chain = N->getOperand(0);
2022   EVT VT = N->getOperand(Vec0Idx).getValueType();
2023   bool is64BitVector = VT.is64BitVector();
2024   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2025
2026   unsigned OpcodeIndex;
2027   switch (VT.getSimpleVT().SimpleTy) {
2028   default: llvm_unreachable("unhandled vst type");
2029     // Double-register operations:
2030   case MVT::v8i8:  OpcodeIndex = 0; break;
2031   case MVT::v4i16: OpcodeIndex = 1; break;
2032   case MVT::v2f32:
2033   case MVT::v2i32: OpcodeIndex = 2; break;
2034   case MVT::v1i64: OpcodeIndex = 3; break;
2035     // Quad-register operations:
2036   case MVT::v16i8: OpcodeIndex = 0; break;
2037   case MVT::v8i16: OpcodeIndex = 1; break;
2038   case MVT::v4f32:
2039   case MVT::v4i32: OpcodeIndex = 2; break;
2040   case MVT::v2f64:
2041   case MVT::v2i64: OpcodeIndex = 3;
2042     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2043     break;
2044   }
2045
2046   std::vector<EVT> ResTys;
2047   if (isUpdating)
2048     ResTys.push_back(MVT::i32);
2049   ResTys.push_back(MVT::Other);
2050
2051   SDValue Pred = getAL(CurDAG, dl);
2052   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2053   SmallVector<SDValue, 7> Ops;
2054
2055   // Double registers and VST1/VST2 quad registers are directly supported.
2056   if (is64BitVector || NumVecs <= 2) {
2057     SDValue SrcReg;
2058     if (NumVecs == 1) {
2059       SrcReg = N->getOperand(Vec0Idx);
2060     } else if (is64BitVector) {
2061       // Form a REG_SEQUENCE to force register allocation.
2062       SDValue V0 = N->getOperand(Vec0Idx + 0);
2063       SDValue V1 = N->getOperand(Vec0Idx + 1);
2064       if (NumVecs == 2)
2065         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2066       else {
2067         SDValue V2 = N->getOperand(Vec0Idx + 2);
2068         // If it's a vst3, form a quad D-register and leave the last part as
2069         // an undef.
2070         SDValue V3 = (NumVecs == 3)
2071           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2072           : N->getOperand(Vec0Idx + 3);
2073         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2074       }
2075     } else {
2076       // Form a QQ register.
2077       SDValue Q0 = N->getOperand(Vec0Idx);
2078       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2079       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2080     }
2081
2082     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2083                     QOpcodes0[OpcodeIndex]);
2084     Ops.push_back(MemAddr);
2085     Ops.push_back(Align);
2086     if (isUpdating) {
2087       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2088       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2089       // case entirely when the rest are updated to that form, too.
2090       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2091       if (NumVecs <= 2 && !IsImmUpdate)
2092         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2093       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2094       // check for that explicitly too. Horribly hacky, but temporary.
2095       if  (!IsImmUpdate)
2096         Ops.push_back(Inc);
2097       else if (NumVecs > 2 && !isVSTfixed(Opc))
2098         Ops.push_back(Reg0);
2099     }
2100     Ops.push_back(SrcReg);
2101     Ops.push_back(Pred);
2102     Ops.push_back(Reg0);
2103     Ops.push_back(Chain);
2104     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2105
2106     // Transfer memoperands.
2107     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2108
2109     ReplaceNode(N, VSt);
2110     return;
2111   }
2112
2113   // Otherwise, quad registers are stored with two separate instructions,
2114   // where one stores the even registers and the other stores the odd registers.
2115
2116   // Form the QQQQ REG_SEQUENCE.
2117   SDValue V0 = N->getOperand(Vec0Idx + 0);
2118   SDValue V1 = N->getOperand(Vec0Idx + 1);
2119   SDValue V2 = N->getOperand(Vec0Idx + 2);
2120   SDValue V3 = (NumVecs == 3)
2121     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2122     : N->getOperand(Vec0Idx + 3);
2123   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2124
2125   // Store the even D registers.  This is always an updating store, so that it
2126   // provides the address to the second store for the odd subregs.
2127   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2128   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2129                                         MemAddr.getValueType(),
2130                                         MVT::Other, OpsA);
2131   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2132   Chain = SDValue(VStA, 1);
2133
2134   // Store the odd D registers.
2135   Ops.push_back(SDValue(VStA, 0));
2136   Ops.push_back(Align);
2137   if (isUpdating) {
2138     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2139     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2140            "only constant post-increment update allowed for VST3/4");
2141     (void)Inc;
2142     Ops.push_back(Reg0);
2143   }
2144   Ops.push_back(RegSeq);
2145   Ops.push_back(Pred);
2146   Ops.push_back(Reg0);
2147   Ops.push_back(Chain);
2148   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2149                                         Ops);
2150   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2151   ReplaceNode(N, VStB);
2152 }
2153
2154 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2155                                       unsigned NumVecs,
2156                                       const uint16_t *DOpcodes,
2157                                       const uint16_t *QOpcodes) {
2158   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2159   SDLoc dl(N);
2160
2161   SDValue MemAddr, Align;
2162   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2163   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2164   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2165     return;
2166
2167   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2168   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2169
2170   SDValue Chain = N->getOperand(0);
2171   unsigned Lane =
2172     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2173   EVT VT = N->getOperand(Vec0Idx).getValueType();
2174   bool is64BitVector = VT.is64BitVector();
2175
2176   unsigned Alignment = 0;
2177   if (NumVecs != 3) {
2178     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2179     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2180     if (Alignment > NumBytes)
2181       Alignment = NumBytes;
2182     if (Alignment < 8 && Alignment < NumBytes)
2183       Alignment = 0;
2184     // Alignment must be a power of two; make sure of that.
2185     Alignment = (Alignment & -Alignment);
2186     if (Alignment == 1)
2187       Alignment = 0;
2188   }
2189   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2190
2191   unsigned OpcodeIndex;
2192   switch (VT.getSimpleVT().SimpleTy) {
2193   default: llvm_unreachable("unhandled vld/vst lane type");
2194     // Double-register operations:
2195   case MVT::v8i8:  OpcodeIndex = 0; break;
2196   case MVT::v4i16: OpcodeIndex = 1; break;
2197   case MVT::v2f32:
2198   case MVT::v2i32: OpcodeIndex = 2; break;
2199     // Quad-register operations:
2200   case MVT::v8i16: OpcodeIndex = 0; break;
2201   case MVT::v4f32:
2202   case MVT::v4i32: OpcodeIndex = 1; break;
2203   }
2204
2205   std::vector<EVT> ResTys;
2206   if (IsLoad) {
2207     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2208     if (!is64BitVector)
2209       ResTyElts *= 2;
2210     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2211                                       MVT::i64, ResTyElts));
2212   }
2213   if (isUpdating)
2214     ResTys.push_back(MVT::i32);
2215   ResTys.push_back(MVT::Other);
2216
2217   SDValue Pred = getAL(CurDAG, dl);
2218   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2219
2220   SmallVector<SDValue, 8> Ops;
2221   Ops.push_back(MemAddr);
2222   Ops.push_back(Align);
2223   if (isUpdating) {
2224     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2225     bool IsImmUpdate =
2226         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2227     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2228   }
2229
2230   SDValue SuperReg;
2231   SDValue V0 = N->getOperand(Vec0Idx + 0);
2232   SDValue V1 = N->getOperand(Vec0Idx + 1);
2233   if (NumVecs == 2) {
2234     if (is64BitVector)
2235       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2236     else
2237       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2238   } else {
2239     SDValue V2 = N->getOperand(Vec0Idx + 2);
2240     SDValue V3 = (NumVecs == 3)
2241       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2242       : N->getOperand(Vec0Idx + 3);
2243     if (is64BitVector)
2244       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2245     else
2246       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2247   }
2248   Ops.push_back(SuperReg);
2249   Ops.push_back(getI32Imm(Lane, dl));
2250   Ops.push_back(Pred);
2251   Ops.push_back(Reg0);
2252   Ops.push_back(Chain);
2253
2254   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2255                                   QOpcodes[OpcodeIndex]);
2256   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2257   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2258   if (!IsLoad) {
2259     ReplaceNode(N, VLdLn);
2260     return;
2261   }
2262
2263   // Extract the subregisters.
2264   SuperReg = SDValue(VLdLn, 0);
2265   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2266                     ARM::qsub_3 == ARM::qsub_0 + 3,
2267                 "Unexpected subreg numbering");
2268   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2269   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2270     ReplaceUses(SDValue(N, Vec),
2271                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2272   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2273   if (isUpdating)
2274     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2275   CurDAG->RemoveDeadNode(N);
2276 }
2277
2278 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2279                                    const uint16_t *DOpcodes,
2280                                    const uint16_t *QOpcodes) {
2281   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2282   SDLoc dl(N);
2283
2284   SDValue MemAddr, Align;
2285   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2286     return;
2287
2288   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2289   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2290
2291   SDValue Chain = N->getOperand(0);
2292   EVT VT = N->getValueType(0);
2293
2294   unsigned Alignment = 0;
2295   if (NumVecs != 3) {
2296     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2297     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2298     if (Alignment > NumBytes)
2299       Alignment = NumBytes;
2300     if (Alignment < 8 && Alignment < NumBytes)
2301       Alignment = 0;
2302     // Alignment must be a power of two; make sure of that.
2303     Alignment = (Alignment & -Alignment);
2304     if (Alignment == 1)
2305       Alignment = 0;
2306   }
2307   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2308
2309   unsigned Opc;
2310   switch (VT.getSimpleVT().SimpleTy) {
2311   default: llvm_unreachable("unhandled vld-dup type");
2312   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2313   case MVT::v16i8: Opc = QOpcodes[0]; break;
2314   case MVT::v4i16: Opc = DOpcodes[1]; break;
2315   case MVT::v8i16: Opc = QOpcodes[1]; break;
2316   case MVT::v2f32:
2317   case MVT::v2i32: Opc = DOpcodes[2]; break;
2318   case MVT::v4f32:
2319   case MVT::v4i32: Opc = QOpcodes[2]; break;
2320   }
2321
2322   SDValue Pred = getAL(CurDAG, dl);
2323   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2324   SmallVector<SDValue, 6> Ops;
2325   Ops.push_back(MemAddr);
2326   Ops.push_back(Align);
2327   if (isUpdating) {
2328     // fixed-stride update instructions don't have an explicit writeback
2329     // operand. It's implicit in the opcode itself.
2330     SDValue Inc = N->getOperand(2);
2331     bool IsImmUpdate =
2332         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2333     if (NumVecs <= 2 && !IsImmUpdate)
2334       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2335     if (!IsImmUpdate)
2336       Ops.push_back(Inc);
2337     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2338     else if (NumVecs > 2)
2339       Ops.push_back(Reg0);
2340   }
2341   Ops.push_back(Pred);
2342   Ops.push_back(Reg0);
2343   Ops.push_back(Chain);
2344
2345   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2346   std::vector<EVT> ResTys;
2347   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2348   if (isUpdating)
2349     ResTys.push_back(MVT::i32);
2350   ResTys.push_back(MVT::Other);
2351   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2352   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2353
2354   // Extract the subregisters.
2355   if (NumVecs == 1) {
2356     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2357   } else {
2358     SDValue SuperReg = SDValue(VLdDup, 0);
2359     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2360     unsigned SubIdx = ARM::dsub_0;
2361     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2362       ReplaceUses(SDValue(N, Vec),
2363                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2364   }
2365   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2366   if (isUpdating)
2367     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2368   CurDAG->RemoveDeadNode(N);
2369 }
2370
2371 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2372   if (!Subtarget->hasV6T2Ops())
2373     return false;
2374
2375   unsigned Opc = isSigned
2376     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2377     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2378   SDLoc dl(N);
2379
2380   // For unsigned extracts, check for a shift right and mask
2381   unsigned And_imm = 0;
2382   if (N->getOpcode() == ISD::AND) {
2383     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2384
2385       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2386       if (And_imm & (And_imm + 1))
2387         return false;
2388
2389       unsigned Srl_imm = 0;
2390       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2391                                 Srl_imm)) {
2392         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2393
2394         // Note: The width operand is encoded as width-1.
2395         unsigned Width = countTrailingOnes(And_imm) - 1;
2396         unsigned LSB = Srl_imm;
2397
2398         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2399
2400         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2401           // It's cheaper to use a right shift to extract the top bits.
2402           if (Subtarget->isThumb()) {
2403             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2404             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2405                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2406                               getAL(CurDAG, dl), Reg0, Reg0 };
2407             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2408             return true;
2409           }
2410
2411           // ARM models shift instructions as MOVsi with shifter operand.
2412           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2413           SDValue ShOpc =
2414             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2415                                       MVT::i32);
2416           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2417                             getAL(CurDAG, dl), Reg0, Reg0 };
2418           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2419           return true;
2420         }
2421
2422         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2423                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2424                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2425                           getAL(CurDAG, dl), Reg0 };
2426         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2427         return true;
2428       }
2429     }
2430     return false;
2431   }
2432
2433   // Otherwise, we're looking for a shift of a shift
2434   unsigned Shl_imm = 0;
2435   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2436     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2437     unsigned Srl_imm = 0;
2438     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2439       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2440       // Note: The width operand is encoded as width-1.
2441       unsigned Width = 32 - Srl_imm - 1;
2442       int LSB = Srl_imm - Shl_imm;
2443       if (LSB < 0)
2444         return false;
2445       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2446       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2447                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2448                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2449                         getAL(CurDAG, dl), Reg0 };
2450       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2451       return true;
2452     }
2453   }
2454
2455   // Or we are looking for a shift of an and, with a mask operand
2456   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2457       isShiftedMask_32(And_imm)) {
2458     unsigned Srl_imm = 0;
2459     unsigned LSB = countTrailingZeros(And_imm);
2460     // Shift must be the same as the ands lsb
2461     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2462       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2463       unsigned MSB = 31 - countLeadingZeros(And_imm);
2464       // Note: The width operand is encoded as width-1.
2465       unsigned Width = MSB - LSB;
2466       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2467       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2468                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2469                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2470                         getAL(CurDAG, dl), Reg0 };
2471       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2472       return true;
2473     }
2474   }
2475
2476   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2477     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2478     unsigned LSB = 0;
2479     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2480         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2481       return false;
2482
2483     if (LSB + Width > 32)
2484       return false;
2485
2486     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2487     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2488                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2489                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2490                       getAL(CurDAG, dl), Reg0 };
2491     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2492     return true;
2493   }
2494
2495   return false;
2496 }
2497
2498 /// Target-specific DAG combining for ISD::XOR.
2499 /// Target-independent combining lowers SELECT_CC nodes of the form
2500 /// select_cc setg[ge] X,  0,  X, -X
2501 /// select_cc setgt    X, -1,  X, -X
2502 /// select_cc setl[te] X,  0, -X,  X
2503 /// select_cc setlt    X,  1, -X,  X
2504 /// which represent Integer ABS into:
2505 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2506 /// ARM instruction selection detects the latter and matches it to
2507 /// ARM::ABS or ARM::t2ABS machine node.
2508 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2509   SDValue XORSrc0 = N->getOperand(0);
2510   SDValue XORSrc1 = N->getOperand(1);
2511   EVT VT = N->getValueType(0);
2512
2513   if (Subtarget->isThumb1Only())
2514     return false;
2515
2516   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2517     return false;
2518
2519   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2520   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2521   SDValue SRASrc0 = XORSrc1.getOperand(0);
2522   SDValue SRASrc1 = XORSrc1.getOperand(1);
2523   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2524   EVT XType = SRASrc0.getValueType();
2525   unsigned Size = XType.getSizeInBits() - 1;
2526
2527   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2528       XType.isInteger() && SRAConstant != nullptr &&
2529       Size == SRAConstant->getZExtValue()) {
2530     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2531     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2532     return true;
2533   }
2534
2535   return false;
2536 }
2537
2538 /// We've got special pseudo-instructions for these
2539 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2540   unsigned Opcode;
2541   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2542   if (MemTy == MVT::i8)
2543     Opcode = ARM::CMP_SWAP_8;
2544   else if (MemTy == MVT::i16)
2545     Opcode = ARM::CMP_SWAP_16;
2546   else if (MemTy == MVT::i32)
2547     Opcode = ARM::CMP_SWAP_32;
2548   else
2549     llvm_unreachable("Unknown AtomicCmpSwap type");
2550
2551   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2552                    N->getOperand(0)};
2553   SDNode *CmpSwap = CurDAG->getMachineNode(
2554       Opcode, SDLoc(N),
2555       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2556
2557   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2558   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2559   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2560
2561   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2562   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2563   CurDAG->RemoveDeadNode(N);
2564 }
2565
2566 static Optional<std::pair<unsigned, unsigned>>
2567 getContiguousRangeOfSetBits(const APInt &A) {
2568   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2569   unsigned LastOne = A.countTrailingZeros();
2570   if (A.countPopulation() != (FirstOne - LastOne + 1))
2571     return Optional<std::pair<unsigned,unsigned>>();
2572   return std::make_pair(FirstOne, LastOne);
2573 }
2574
2575 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2576   assert(N->getOpcode() == ARMISD::CMPZ);
2577   SwitchEQNEToPLMI = false;
2578
2579   if (!Subtarget->isThumb())
2580     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2581     // LSR don't exist as standalone instructions - they need the barrel shifter.
2582     return;
2583
2584   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2585   SDValue And = N->getOperand(0);
2586   if (!And->hasOneUse())
2587     return;
2588
2589   SDValue Zero = N->getOperand(1);
2590   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2591       And->getOpcode() != ISD::AND)
2592     return;
2593   SDValue X = And.getOperand(0);
2594   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2595
2596   if (!C || !X->hasOneUse())
2597     return;
2598   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2599   if (!Range)
2600     return;
2601
2602   // There are several ways to lower this:
2603   SDNode *NewN;
2604   SDLoc dl(N);
2605
2606   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2607     if (Subtarget->isThumb2()) {
2608       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2609       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2610                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2611                         CurDAG->getRegister(0, MVT::i32) };
2612       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2613     } else {
2614       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2615                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2616                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2617       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2618     }
2619   };
2620
2621   if (Range->second == 0) {
2622     //  1. Mask includes the LSB -> Simply shift the top N bits off
2623     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2624     ReplaceNode(And.getNode(), NewN);
2625   } else if (Range->first == 31) {
2626     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2627     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2628     ReplaceNode(And.getNode(), NewN);
2629   } else if (Range->first == Range->second) {
2630     //  3. Only one bit is set. We can shift this into the sign bit and use a
2631     //     PL/MI comparison.
2632     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2633     ReplaceNode(And.getNode(), NewN);
2634
2635     SwitchEQNEToPLMI = true;
2636   } else if (!Subtarget->hasV6T2Ops()) {
2637     //  4. Do a double shift to clear bottom and top bits, but only in
2638     //     thumb-1 mode as in thumb-2 we can use UBFX.
2639     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2640     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2641                      Range->second + (31 - Range->first));
2642     ReplaceNode(And.getNode(), NewN);
2643   }
2644
2645 }
2646
2647 void ARMDAGToDAGISel::Select(SDNode *N) {
2648   SDLoc dl(N);
2649
2650   if (N->isMachineOpcode()) {
2651     N->setNodeId(-1);
2652     return;   // Already selected.
2653   }
2654
2655   switch (N->getOpcode()) {
2656   default: break;
2657   case ISD::WRITE_REGISTER:
2658     if (tryWriteRegister(N))
2659       return;
2660     break;
2661   case ISD::READ_REGISTER:
2662     if (tryReadRegister(N))
2663       return;
2664     break;
2665   case ISD::INLINEASM:
2666     if (tryInlineAsm(N))
2667       return;
2668     break;
2669   case ISD::XOR:
2670     // Select special operations if XOR node forms integer ABS pattern
2671     if (tryABSOp(N))
2672       return;
2673     // Other cases are autogenerated.
2674     break;
2675   case ISD::Constant: {
2676     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2677     // If we can't materialize the constant we need to use a literal pool
2678     if (ConstantMaterializationCost(Val) > 2) {
2679       SDValue CPIdx = CurDAG->getTargetConstantPool(
2680           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2681           TLI->getPointerTy(CurDAG->getDataLayout()));
2682
2683       SDNode *ResNode;
2684       if (Subtarget->isThumb()) {
2685         SDValue Pred = getAL(CurDAG, dl);
2686         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2687         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2688         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2689                                          Ops);
2690       } else {
2691         SDValue Ops[] = {
2692           CPIdx,
2693           CurDAG->getTargetConstant(0, dl, MVT::i32),
2694           getAL(CurDAG, dl),
2695           CurDAG->getRegister(0, MVT::i32),
2696           CurDAG->getEntryNode()
2697         };
2698         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2699                                          Ops);
2700       }
2701       ReplaceNode(N, ResNode);
2702       return;
2703     }
2704
2705     // Other cases are autogenerated.
2706     break;
2707   }
2708   case ISD::FrameIndex: {
2709     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2710     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2711     SDValue TFI = CurDAG->getTargetFrameIndex(
2712         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2713     if (Subtarget->isThumb1Only()) {
2714       // Set the alignment of the frame object to 4, to avoid having to generate
2715       // more than one ADD
2716       MachineFrameInfo &MFI = MF->getFrameInfo();
2717       if (MFI.getObjectAlignment(FI) < 4)
2718         MFI.setObjectAlignment(FI, 4);
2719       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2720                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2721       return;
2722     } else {
2723       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2724                       ARM::t2ADDri : ARM::ADDri);
2725       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2726                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2727                         CurDAG->getRegister(0, MVT::i32) };
2728       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2729       return;
2730     }
2731   }
2732   case ISD::SRL:
2733     if (tryV6T2BitfieldExtractOp(N, false))
2734       return;
2735     break;
2736   case ISD::SIGN_EXTEND_INREG:
2737   case ISD::SRA:
2738     if (tryV6T2BitfieldExtractOp(N, true))
2739       return;
2740     break;
2741   case ISD::MUL:
2742     if (Subtarget->isThumb1Only())
2743       break;
2744     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2745       unsigned RHSV = C->getZExtValue();
2746       if (!RHSV) break;
2747       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2748         unsigned ShImm = Log2_32(RHSV-1);
2749         if (ShImm >= 32)
2750           break;
2751         SDValue V = N->getOperand(0);
2752         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2753         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2754         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2755         if (Subtarget->isThumb()) {
2756           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2757           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2758           return;
2759         } else {
2760           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2761                             Reg0 };
2762           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2763           return;
2764         }
2765       }
2766       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2767         unsigned ShImm = Log2_32(RHSV+1);
2768         if (ShImm >= 32)
2769           break;
2770         SDValue V = N->getOperand(0);
2771         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2772         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2773         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2774         if (Subtarget->isThumb()) {
2775           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2776           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2777           return;
2778         } else {
2779           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2780                             Reg0 };
2781           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2782           return;
2783         }
2784       }
2785     }
2786     break;
2787   case ISD::AND: {
2788     // Check for unsigned bitfield extract
2789     if (tryV6T2BitfieldExtractOp(N, false))
2790       return;
2791
2792     // If an immediate is used in an AND node, it is possible that the immediate
2793     // can be more optimally materialized when negated. If this is the case we
2794     // can negate the immediate and use a BIC instead.
2795     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2796     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2797       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2798
2799       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2800       // immediate can be negated and fit in the immediate operand of
2801       // a t2BIC, don't do any manual transform here as this can be
2802       // handled by the generic ISel machinery.
2803       bool PreferImmediateEncoding =
2804         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2805       if (!PreferImmediateEncoding &&
2806           ConstantMaterializationCost(Imm) >
2807               ConstantMaterializationCost(~Imm)) {
2808         // The current immediate costs more to materialize than a negated
2809         // immediate, so negate the immediate and use a BIC.
2810         SDValue NewImm =
2811           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2812         // If the new constant didn't exist before, reposition it in the topological
2813         // ordering so it is just before N. Otherwise, don't touch its location.
2814         if (NewImm->getNodeId() == -1)
2815           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2816
2817         if (!Subtarget->hasThumb2()) {
2818           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2819                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2820                            CurDAG->getRegister(0, MVT::i32)};
2821           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2822           return;
2823         } else {
2824           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2825                            CurDAG->getRegister(0, MVT::i32),
2826                            CurDAG->getRegister(0, MVT::i32)};
2827           ReplaceNode(N,
2828                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2829           return;
2830         }
2831       }
2832     }
2833
2834     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2835     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2836     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2837     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2838     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2839     EVT VT = N->getValueType(0);
2840     if (VT != MVT::i32)
2841       break;
2842     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2843       ? ARM::t2MOVTi16
2844       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2845     if (!Opc)
2846       break;
2847     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2848     N1C = dyn_cast<ConstantSDNode>(N1);
2849     if (!N1C)
2850       break;
2851     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2852       SDValue N2 = N0.getOperand(1);
2853       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2854       if (!N2C)
2855         break;
2856       unsigned N1CVal = N1C->getZExtValue();
2857       unsigned N2CVal = N2C->getZExtValue();
2858       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2859           (N1CVal & 0xffffU) == 0xffffU &&
2860           (N2CVal & 0xffffU) == 0x0U) {
2861         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2862                                                   dl, MVT::i32);
2863         SDValue Ops[] = { N0.getOperand(0), Imm16,
2864                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2865         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2866         return;
2867       }
2868     }
2869
2870     break;
2871   }
2872   case ARMISD::UMAAL: {
2873     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2874     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2875                       N->getOperand(2), N->getOperand(3),
2876                       getAL(CurDAG, dl),
2877                       CurDAG->getRegister(0, MVT::i32) };
2878     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2879     return;
2880   }
2881   case ARMISD::UMLAL:{
2882     if (Subtarget->isThumb()) {
2883       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2884                         N->getOperand(3), getAL(CurDAG, dl),
2885                         CurDAG->getRegister(0, MVT::i32)};
2886       ReplaceNode(
2887           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2888       return;
2889     }else{
2890       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2891                         N->getOperand(3), getAL(CurDAG, dl),
2892                         CurDAG->getRegister(0, MVT::i32),
2893                         CurDAG->getRegister(0, MVT::i32) };
2894       ReplaceNode(N, CurDAG->getMachineNode(
2895                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2896                          MVT::i32, MVT::i32, Ops));
2897       return;
2898     }
2899   }
2900   case ARMISD::SMLAL:{
2901     if (Subtarget->isThumb()) {
2902       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2903                         N->getOperand(3), getAL(CurDAG, dl),
2904                         CurDAG->getRegister(0, MVT::i32)};
2905       ReplaceNode(
2906           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2907       return;
2908     }else{
2909       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2910                         N->getOperand(3), getAL(CurDAG, dl),
2911                         CurDAG->getRegister(0, MVT::i32),
2912                         CurDAG->getRegister(0, MVT::i32) };
2913       ReplaceNode(N, CurDAG->getMachineNode(
2914                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2915                          MVT::i32, MVT::i32, Ops));
2916       return;
2917     }
2918   }
2919   case ARMISD::SUBE: {
2920     if (!Subtarget->hasV6Ops())
2921       break;
2922     // Look for a pattern to match SMMLS
2923     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2924     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2925         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2926         !SDValue(N, 1).use_empty())
2927       break;
2928
2929     if (Subtarget->isThumb())
2930       assert(Subtarget->hasThumb2() &&
2931              "This pattern should not be generated for Thumb");
2932
2933     SDValue SmulLoHi = N->getOperand(1);
2934     SDValue Subc = N->getOperand(2);
2935     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2936
2937     if (!Zero || Zero->getZExtValue() != 0 ||
2938         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2939         N->getOperand(1) != SmulLoHi.getValue(1) ||
2940         N->getOperand(2) != Subc.getValue(1))
2941       break;
2942
2943     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2944     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2945                       N->getOperand(0), getAL(CurDAG, dl),
2946                       CurDAG->getRegister(0, MVT::i32) };
2947     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2948     return;
2949   }
2950   case ISD::LOAD: {
2951     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2952       if (tryT2IndexedLoad(N))
2953         return;
2954     } else if (Subtarget->isThumb()) {
2955       if (tryT1IndexedLoad(N))
2956         return;
2957     } else if (tryARMIndexedLoad(N))
2958       return;
2959     // Other cases are autogenerated.
2960     break;
2961   }
2962   case ARMISD::BRCOND: {
2963     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2965     // Pattern complexity = 6  cost = 1  size = 0
2966
2967     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2969     // Pattern complexity = 6  cost = 1  size = 0
2970
2971     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2972     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2973     // Pattern complexity = 6  cost = 1  size = 0
2974
2975     unsigned Opc = Subtarget->isThumb() ?
2976       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2977     SDValue Chain = N->getOperand(0);
2978     SDValue N1 = N->getOperand(1);
2979     SDValue N2 = N->getOperand(2);
2980     SDValue N3 = N->getOperand(3);
2981     SDValue InFlag = N->getOperand(4);
2982     assert(N1.getOpcode() == ISD::BasicBlock);
2983     assert(N2.getOpcode() == ISD::Constant);
2984     assert(N3.getOpcode() == ISD::Register);
2985
2986     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2987
2988     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2989       bool SwitchEQNEToPLMI;
2990       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2991       InFlag = N->getOperand(4);
2992
2993       if (SwitchEQNEToPLMI) {
2994         switch ((ARMCC::CondCodes)CC) {
2995         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2996         case ARMCC::NE:
2997           CC = (unsigned)ARMCC::MI;
2998           break;
2999         case ARMCC::EQ:
3000           CC = (unsigned)ARMCC::PL;
3001           break;
3002         }
3003       }
3004     }
3005
3006     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3007     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3008     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3009                                              MVT::Glue, Ops);
3010     Chain = SDValue(ResNode, 0);
3011     if (N->getNumValues() == 2) {
3012       InFlag = SDValue(ResNode, 1);
3013       ReplaceUses(SDValue(N, 1), InFlag);
3014     }
3015     ReplaceUses(SDValue(N, 0),
3016                 SDValue(Chain.getNode(), Chain.getResNo()));
3017     CurDAG->RemoveDeadNode(N);
3018     return;
3019   }
3020
3021   case ARMISD::CMPZ: {
3022     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3023     //   This allows us to avoid materializing the expensive negative constant.
3024     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3025     //   for its glue output.
3026     SDValue X = N->getOperand(0);
3027     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3028     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3029       int64_t Addend = -C->getSExtValue();
3030
3031       SDNode *Add = nullptr;
3032       // ADDS can be better than CMN if the immediate fits in a
3033       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3034       // Outside that range we can just use a CMN which is 32-bit but has a
3035       // 12-bit immediate range.
3036       if (Addend < 1<<8) {
3037         if (Subtarget->isThumb2()) {
3038           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3039                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3040                             CurDAG->getRegister(0, MVT::i32) };
3041           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3042         } else {
3043           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3044           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3045                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3046                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3047           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3048         }
3049       }
3050       if (Add) {
3051         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3052         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3053       }
3054     }
3055     // Other cases are autogenerated.
3056     break;
3057   }
3058
3059   case ARMISD::CMOV: {
3060     SDValue InFlag = N->getOperand(4);
3061
3062     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3063       bool SwitchEQNEToPLMI;
3064       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3065
3066       if (SwitchEQNEToPLMI) {
3067         SDValue ARMcc = N->getOperand(2);
3068         ARMCC::CondCodes CC =
3069           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3070
3071         switch (CC) {
3072         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3073         case ARMCC::NE:
3074           CC = ARMCC::MI;
3075           break;
3076         case ARMCC::EQ:
3077           CC = ARMCC::PL;
3078           break;
3079         }
3080         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3081         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3082                          N->getOperand(3), N->getOperand(4)};
3083         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3084       }
3085
3086     }
3087     // Other cases are autogenerated.
3088     break;
3089   }
3090
3091   case ARMISD::VZIP: {
3092     unsigned Opc = 0;
3093     EVT VT = N->getValueType(0);
3094     switch (VT.getSimpleVT().SimpleTy) {
3095     default: return;
3096     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3097     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3098     case MVT::v2f32:
3099     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3100     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3101     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3102     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3103     case MVT::v4f32:
3104     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3105     }
3106     SDValue Pred = getAL(CurDAG, dl);
3107     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3108     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3109     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3110     return;
3111   }
3112   case ARMISD::VUZP: {
3113     unsigned Opc = 0;
3114     EVT VT = N->getValueType(0);
3115     switch (VT.getSimpleVT().SimpleTy) {
3116     default: return;
3117     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3118     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3119     case MVT::v2f32:
3120     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3121     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3122     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3123     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3124     case MVT::v4f32:
3125     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3126     }
3127     SDValue Pred = getAL(CurDAG, dl);
3128     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3129     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3130     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3131     return;
3132   }
3133   case ARMISD::VTRN: {
3134     unsigned Opc = 0;
3135     EVT VT = N->getValueType(0);
3136     switch (VT.getSimpleVT().SimpleTy) {
3137     default: return;
3138     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3139     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3140     case MVT::v2f32:
3141     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3142     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3143     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3144     case MVT::v4f32:
3145     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3146     }
3147     SDValue Pred = getAL(CurDAG, dl);
3148     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3149     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3150     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3151     return;
3152   }
3153   case ARMISD::BUILD_VECTOR: {
3154     EVT VecVT = N->getValueType(0);
3155     EVT EltVT = VecVT.getVectorElementType();
3156     unsigned NumElts = VecVT.getVectorNumElements();
3157     if (EltVT == MVT::f64) {
3158       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3159       ReplaceNode(
3160           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3161       return;
3162     }
3163     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3164     if (NumElts == 2) {
3165       ReplaceNode(
3166           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3167       return;
3168     }
3169     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3170     ReplaceNode(N,
3171                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3172                                     N->getOperand(2), N->getOperand(3)));
3173     return;
3174   }
3175
3176   case ARMISD::VLD1DUP: {
3177     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3178                                          ARM::VLD1DUPd32 };
3179     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3180                                          ARM::VLD1DUPq32 };
3181     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3182     return;
3183   }
3184
3185   case ARMISD::VLD2DUP: {
3186     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3187                                         ARM::VLD2DUPd32 };
3188     SelectVLDDup(N, false, 2, Opcodes);
3189     return;
3190   }
3191
3192   case ARMISD::VLD3DUP: {
3193     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3194                                         ARM::VLD3DUPd16Pseudo,
3195                                         ARM::VLD3DUPd32Pseudo };
3196     SelectVLDDup(N, false, 3, Opcodes);
3197     return;
3198   }
3199
3200   case ARMISD::VLD4DUP: {
3201     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3202                                         ARM::VLD4DUPd16Pseudo,
3203                                         ARM::VLD4DUPd32Pseudo };
3204     SelectVLDDup(N, false, 4, Opcodes);
3205     return;
3206   }
3207
3208   case ARMISD::VLD1DUP_UPD: {
3209     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3210                                          ARM::VLD1DUPd16wb_fixed,
3211                                          ARM::VLD1DUPd32wb_fixed };
3212     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3213                                          ARM::VLD1DUPq16wb_fixed,
3214                                          ARM::VLD1DUPq32wb_fixed };
3215     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3216     return;
3217   }
3218
3219   case ARMISD::VLD2DUP_UPD: {
3220     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3221                                         ARM::VLD2DUPd16wb_fixed,
3222                                         ARM::VLD2DUPd32wb_fixed };
3223     SelectVLDDup(N, true, 2, Opcodes);
3224     return;
3225   }
3226
3227   case ARMISD::VLD3DUP_UPD: {
3228     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3229                                         ARM::VLD3DUPd16Pseudo_UPD,
3230                                         ARM::VLD3DUPd32Pseudo_UPD };
3231     SelectVLDDup(N, true, 3, Opcodes);
3232     return;
3233   }
3234
3235   case ARMISD::VLD4DUP_UPD: {
3236     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3237                                         ARM::VLD4DUPd16Pseudo_UPD,
3238                                         ARM::VLD4DUPd32Pseudo_UPD };
3239     SelectVLDDup(N, true, 4, Opcodes);
3240     return;
3241   }
3242
3243   case ARMISD::VLD1_UPD: {
3244     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3245                                          ARM::VLD1d16wb_fixed,
3246                                          ARM::VLD1d32wb_fixed,
3247                                          ARM::VLD1d64wb_fixed };
3248     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3249                                          ARM::VLD1q16wb_fixed,
3250                                          ARM::VLD1q32wb_fixed,
3251                                          ARM::VLD1q64wb_fixed };
3252     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3253     return;
3254   }
3255
3256   case ARMISD::VLD2_UPD: {
3257     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3258                                          ARM::VLD2d16wb_fixed,
3259                                          ARM::VLD2d32wb_fixed,
3260                                          ARM::VLD1q64wb_fixed};
3261     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3262                                          ARM::VLD2q16PseudoWB_fixed,
3263                                          ARM::VLD2q32PseudoWB_fixed };
3264     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3265     return;
3266   }
3267
3268   case ARMISD::VLD3_UPD: {
3269     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3270                                          ARM::VLD3d16Pseudo_UPD,
3271                                          ARM::VLD3d32Pseudo_UPD,
3272                                          ARM::VLD1d64TPseudoWB_fixed};
3273     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3274                                           ARM::VLD3q16Pseudo_UPD,
3275                                           ARM::VLD3q32Pseudo_UPD };
3276     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3277                                           ARM::VLD3q16oddPseudo_UPD,
3278                                           ARM::VLD3q32oddPseudo_UPD };
3279     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3280     return;
3281   }
3282
3283   case ARMISD::VLD4_UPD: {
3284     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3285                                          ARM::VLD4d16Pseudo_UPD,
3286                                          ARM::VLD4d32Pseudo_UPD,
3287                                          ARM::VLD1d64QPseudoWB_fixed};
3288     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3289                                           ARM::VLD4q16Pseudo_UPD,
3290                                           ARM::VLD4q32Pseudo_UPD };
3291     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3292                                           ARM::VLD4q16oddPseudo_UPD,
3293                                           ARM::VLD4q32oddPseudo_UPD };
3294     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3295     return;
3296   }
3297
3298   case ARMISD::VLD2LN_UPD: {
3299     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3300                                          ARM::VLD2LNd16Pseudo_UPD,
3301                                          ARM::VLD2LNd32Pseudo_UPD };
3302     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3303                                          ARM::VLD2LNq32Pseudo_UPD };
3304     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3305     return;
3306   }
3307
3308   case ARMISD::VLD3LN_UPD: {
3309     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3310                                          ARM::VLD3LNd16Pseudo_UPD,
3311                                          ARM::VLD3LNd32Pseudo_UPD };
3312     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3313                                          ARM::VLD3LNq32Pseudo_UPD };
3314     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3315     return;
3316   }
3317
3318   case ARMISD::VLD4LN_UPD: {
3319     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3320                                          ARM::VLD4LNd16Pseudo_UPD,
3321                                          ARM::VLD4LNd32Pseudo_UPD };
3322     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3323                                          ARM::VLD4LNq32Pseudo_UPD };
3324     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3325     return;
3326   }
3327
3328   case ARMISD::VST1_UPD: {
3329     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3330                                          ARM::VST1d16wb_fixed,
3331                                          ARM::VST1d32wb_fixed,
3332                                          ARM::VST1d64wb_fixed };
3333     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3334                                          ARM::VST1q16wb_fixed,
3335                                          ARM::VST1q32wb_fixed,
3336                                          ARM::VST1q64wb_fixed };
3337     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3338     return;
3339   }
3340
3341   case ARMISD::VST2_UPD: {
3342     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3343                                          ARM::VST2d16wb_fixed,
3344                                          ARM::VST2d32wb_fixed,
3345                                          ARM::VST1q64wb_fixed};
3346     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3347                                          ARM::VST2q16PseudoWB_fixed,
3348                                          ARM::VST2q32PseudoWB_fixed };
3349     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3350     return;
3351   }
3352
3353   case ARMISD::VST3_UPD: {
3354     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3355                                          ARM::VST3d16Pseudo_UPD,
3356                                          ARM::VST3d32Pseudo_UPD,
3357                                          ARM::VST1d64TPseudoWB_fixed};
3358     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3359                                           ARM::VST3q16Pseudo_UPD,
3360                                           ARM::VST3q32Pseudo_UPD };
3361     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3362                                           ARM::VST3q16oddPseudo_UPD,
3363                                           ARM::VST3q32oddPseudo_UPD };
3364     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3365     return;
3366   }
3367
3368   case ARMISD::VST4_UPD: {
3369     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3370                                          ARM::VST4d16Pseudo_UPD,
3371                                          ARM::VST4d32Pseudo_UPD,
3372                                          ARM::VST1d64QPseudoWB_fixed};
3373     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3374                                           ARM::VST4q16Pseudo_UPD,
3375                                           ARM::VST4q32Pseudo_UPD };
3376     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3377                                           ARM::VST4q16oddPseudo_UPD,
3378                                           ARM::VST4q32oddPseudo_UPD };
3379     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3380     return;
3381   }
3382
3383   case ARMISD::VST2LN_UPD: {
3384     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3385                                          ARM::VST2LNd16Pseudo_UPD,
3386                                          ARM::VST2LNd32Pseudo_UPD };
3387     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3388                                          ARM::VST2LNq32Pseudo_UPD };
3389     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3390     return;
3391   }
3392
3393   case ARMISD::VST3LN_UPD: {
3394     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3395                                          ARM::VST3LNd16Pseudo_UPD,
3396                                          ARM::VST3LNd32Pseudo_UPD };
3397     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3398                                          ARM::VST3LNq32Pseudo_UPD };
3399     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3400     return;
3401   }
3402
3403   case ARMISD::VST4LN_UPD: {
3404     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3405                                          ARM::VST4LNd16Pseudo_UPD,
3406                                          ARM::VST4LNd32Pseudo_UPD };
3407     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3408                                          ARM::VST4LNq32Pseudo_UPD };
3409     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3410     return;
3411   }
3412
3413   case ISD::INTRINSIC_VOID:
3414   case ISD::INTRINSIC_W_CHAIN: {
3415     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3416     switch (IntNo) {
3417     default:
3418       break;
3419
3420     case Intrinsic::arm_mrrc:
3421     case Intrinsic::arm_mrrc2: {
3422       SDLoc dl(N);
3423       SDValue Chain = N->getOperand(0);
3424       unsigned Opc;
3425
3426       if (Subtarget->isThumb())
3427         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3428       else
3429         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3430
3431       SmallVector<SDValue, 5> Ops;
3432       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3433       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3434       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3435
3436       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3437       // instruction will always be '1111' but it is possible in assembly language to specify
3438       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3439       if (Opc != ARM::MRRC2) {
3440         Ops.push_back(getAL(CurDAG, dl));
3441         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3442       }
3443
3444       Ops.push_back(Chain);
3445
3446       // Writes to two registers.
3447       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3448
3449       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3450       return;
3451     }
3452     case Intrinsic::arm_ldaexd:
3453     case Intrinsic::arm_ldrexd: {
3454       SDLoc dl(N);
3455       SDValue Chain = N->getOperand(0);
3456       SDValue MemAddr = N->getOperand(2);
3457       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3458
3459       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3460       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3461                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3462
3463       // arm_ldrexd returns a i64 value in {i32, i32}
3464       std::vector<EVT> ResTys;
3465       if (isThumb) {
3466         ResTys.push_back(MVT::i32);
3467         ResTys.push_back(MVT::i32);
3468       } else
3469         ResTys.push_back(MVT::Untyped);
3470       ResTys.push_back(MVT::Other);
3471
3472       // Place arguments in the right order.
3473       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3474                        CurDAG->getRegister(0, MVT::i32), Chain};
3475       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3476       // Transfer memoperands.
3477       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3478       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3479       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3480
3481       // Remap uses.
3482       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3483       if (!SDValue(N, 0).use_empty()) {
3484         SDValue Result;
3485         if (isThumb)
3486           Result = SDValue(Ld, 0);
3487         else {
3488           SDValue SubRegIdx =
3489             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3490           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3491               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3492           Result = SDValue(ResNode,0);
3493         }
3494         ReplaceUses(SDValue(N, 0), Result);
3495       }
3496       if (!SDValue(N, 1).use_empty()) {
3497         SDValue Result;
3498         if (isThumb)
3499           Result = SDValue(Ld, 1);
3500         else {
3501           SDValue SubRegIdx =
3502             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3503           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3504               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3505           Result = SDValue(ResNode,0);
3506         }
3507         ReplaceUses(SDValue(N, 1), Result);
3508       }
3509       ReplaceUses(SDValue(N, 2), OutChain);
3510       CurDAG->RemoveDeadNode(N);
3511       return;
3512     }
3513     case Intrinsic::arm_stlexd:
3514     case Intrinsic::arm_strexd: {
3515       SDLoc dl(N);
3516       SDValue Chain = N->getOperand(0);
3517       SDValue Val0 = N->getOperand(2);
3518       SDValue Val1 = N->getOperand(3);
3519       SDValue MemAddr = N->getOperand(4);
3520
3521       // Store exclusive double return a i32 value which is the return status
3522       // of the issued store.
3523       const EVT ResTys[] = {MVT::i32, MVT::Other};
3524
3525       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3526       // Place arguments in the right order.
3527       SmallVector<SDValue, 7> Ops;
3528       if (isThumb) {
3529         Ops.push_back(Val0);
3530         Ops.push_back(Val1);
3531       } else
3532         // arm_strexd uses GPRPair.
3533         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3534       Ops.push_back(MemAddr);
3535       Ops.push_back(getAL(CurDAG, dl));
3536       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3537       Ops.push_back(Chain);
3538
3539       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3540       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3541                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3542
3543       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3544       // Transfer memoperands.
3545       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3546       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3547       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3548
3549       ReplaceNode(N, St);
3550       return;
3551     }
3552
3553     case Intrinsic::arm_neon_vld1: {
3554       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3555                                            ARM::VLD1d32, ARM::VLD1d64 };
3556       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3557                                            ARM::VLD1q32, ARM::VLD1q64};
3558       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3559       return;
3560     }
3561
3562     case Intrinsic::arm_neon_vld2: {
3563       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3564                                            ARM::VLD2d32, ARM::VLD1q64 };
3565       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3566                                            ARM::VLD2q32Pseudo };
3567       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3568       return;
3569     }
3570
3571     case Intrinsic::arm_neon_vld3: {
3572       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3573                                            ARM::VLD3d16Pseudo,
3574                                            ARM::VLD3d32Pseudo,
3575                                            ARM::VLD1d64TPseudo };
3576       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3577                                             ARM::VLD3q16Pseudo_UPD,
3578                                             ARM::VLD3q32Pseudo_UPD };
3579       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3580                                             ARM::VLD3q16oddPseudo,
3581                                             ARM::VLD3q32oddPseudo };
3582       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3583       return;
3584     }
3585
3586     case Intrinsic::arm_neon_vld4: {
3587       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3588                                            ARM::VLD4d16Pseudo,
3589                                            ARM::VLD4d32Pseudo,
3590                                            ARM::VLD1d64QPseudo };
3591       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3592                                             ARM::VLD4q16Pseudo_UPD,
3593                                             ARM::VLD4q32Pseudo_UPD };
3594       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3595                                             ARM::VLD4q16oddPseudo,
3596                                             ARM::VLD4q32oddPseudo };
3597       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3598       return;
3599     }
3600
3601     case Intrinsic::arm_neon_vld2lane: {
3602       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3603                                            ARM::VLD2LNd16Pseudo,
3604                                            ARM::VLD2LNd32Pseudo };
3605       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3606                                            ARM::VLD2LNq32Pseudo };
3607       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3608       return;
3609     }
3610
3611     case Intrinsic::arm_neon_vld3lane: {
3612       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3613                                            ARM::VLD3LNd16Pseudo,
3614                                            ARM::VLD3LNd32Pseudo };
3615       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3616                                            ARM::VLD3LNq32Pseudo };
3617       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3618       return;
3619     }
3620
3621     case Intrinsic::arm_neon_vld4lane: {
3622       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3623                                            ARM::VLD4LNd16Pseudo,
3624                                            ARM::VLD4LNd32Pseudo };
3625       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3626                                            ARM::VLD4LNq32Pseudo };
3627       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3628       return;
3629     }
3630
3631     case Intrinsic::arm_neon_vst1: {
3632       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3633                                            ARM::VST1d32, ARM::VST1d64 };
3634       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3635                                            ARM::VST1q32, ARM::VST1q64 };
3636       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3637       return;
3638     }
3639
3640     case Intrinsic::arm_neon_vst2: {
3641       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3642                                            ARM::VST2d32, ARM::VST1q64 };
3643       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3644                                            ARM::VST2q32Pseudo };
3645       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3646       return;
3647     }
3648
3649     case Intrinsic::arm_neon_vst3: {
3650       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3651                                            ARM::VST3d16Pseudo,
3652                                            ARM::VST3d32Pseudo,
3653                                            ARM::VST1d64TPseudo };
3654       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3655                                             ARM::VST3q16Pseudo_UPD,
3656                                             ARM::VST3q32Pseudo_UPD };
3657       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3658                                             ARM::VST3q16oddPseudo,
3659                                             ARM::VST3q32oddPseudo };
3660       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3661       return;
3662     }
3663
3664     case Intrinsic::arm_neon_vst4: {
3665       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3666                                            ARM::VST4d16Pseudo,
3667                                            ARM::VST4d32Pseudo,
3668                                            ARM::VST1d64QPseudo };
3669       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3670                                             ARM::VST4q16Pseudo_UPD,
3671                                             ARM::VST4q32Pseudo_UPD };
3672       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3673                                             ARM::VST4q16oddPseudo,
3674                                             ARM::VST4q32oddPseudo };
3675       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3676       return;
3677     }
3678
3679     case Intrinsic::arm_neon_vst2lane: {
3680       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3681                                            ARM::VST2LNd16Pseudo,
3682                                            ARM::VST2LNd32Pseudo };
3683       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3684                                            ARM::VST2LNq32Pseudo };
3685       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3686       return;
3687     }
3688
3689     case Intrinsic::arm_neon_vst3lane: {
3690       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3691                                            ARM::VST3LNd16Pseudo,
3692                                            ARM::VST3LNd32Pseudo };
3693       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3694                                            ARM::VST3LNq32Pseudo };
3695       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3696       return;
3697     }
3698
3699     case Intrinsic::arm_neon_vst4lane: {
3700       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3701                                            ARM::VST4LNd16Pseudo,
3702                                            ARM::VST4LNd32Pseudo };
3703       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3704                                            ARM::VST4LNq32Pseudo };
3705       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3706       return;
3707     }
3708     }
3709     break;
3710   }
3711
3712   case ISD::ATOMIC_CMP_SWAP:
3713     SelectCMP_SWAP(N);
3714     return;
3715   }
3716
3717   SelectCode(N);
3718 }
3719
3720 // Inspect a register string of the form
3721 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3722 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3723 // and obtain the integer operands from them, adding these operands to the
3724 // provided vector.
3725 static void getIntOperandsFromRegisterString(StringRef RegString,
3726                                              SelectionDAG *CurDAG,
3727                                              const SDLoc &DL,
3728                                              std::vector<SDValue> &Ops) {
3729   SmallVector<StringRef, 5> Fields;
3730   RegString.split(Fields, ':');
3731
3732   if (Fields.size() > 1) {
3733     bool AllIntFields = true;
3734
3735     for (StringRef Field : Fields) {
3736       // Need to trim out leading 'cp' characters and get the integer field.
3737       unsigned IntField;
3738       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3739       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3740     }
3741
3742     assert(AllIntFields &&
3743             "Unexpected non-integer value in special register string.");
3744   }
3745 }
3746
3747 // Maps a Banked Register string to its mask value. The mask value returned is
3748 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3749 // mask operand, which expresses which register is to be used, e.g. r8, and in
3750 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3751 // was invalid.
3752 static inline int getBankedRegisterMask(StringRef RegString) {
3753   return StringSwitch<int>(RegString.lower())
3754           .Case("r8_usr", 0x00)
3755           .Case("r9_usr", 0x01)
3756           .Case("r10_usr", 0x02)
3757           .Case("r11_usr", 0x03)
3758           .Case("r12_usr", 0x04)
3759           .Case("sp_usr", 0x05)
3760           .Case("lr_usr", 0x06)
3761           .Case("r8_fiq", 0x08)
3762           .Case("r9_fiq", 0x09)
3763           .Case("r10_fiq", 0x0a)
3764           .Case("r11_fiq", 0x0b)
3765           .Case("r12_fiq", 0x0c)
3766           .Case("sp_fiq", 0x0d)
3767           .Case("lr_fiq", 0x0e)
3768           .Case("lr_irq", 0x10)
3769           .Case("sp_irq", 0x11)
3770           .Case("lr_svc", 0x12)
3771           .Case("sp_svc", 0x13)
3772           .Case("lr_abt", 0x14)
3773           .Case("sp_abt", 0x15)
3774           .Case("lr_und", 0x16)
3775           .Case("sp_und", 0x17)
3776           .Case("lr_mon", 0x1c)
3777           .Case("sp_mon", 0x1d)
3778           .Case("elr_hyp", 0x1e)
3779           .Case("sp_hyp", 0x1f)
3780           .Case("spsr_fiq", 0x2e)
3781           .Case("spsr_irq", 0x30)
3782           .Case("spsr_svc", 0x32)
3783           .Case("spsr_abt", 0x34)
3784           .Case("spsr_und", 0x36)
3785           .Case("spsr_mon", 0x3c)
3786           .Case("spsr_hyp", 0x3e)
3787           .Default(-1);
3788 }
3789
3790 // Maps a MClass special register string to its value for use in the
3791 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3792 // Returns -1 to signify that the string was invalid.
3793 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3794   return StringSwitch<int>(RegString.lower())
3795           .Case("apsr", 0x0)
3796           .Case("iapsr", 0x1)
3797           .Case("eapsr", 0x2)
3798           .Case("xpsr", 0x3)
3799           .Case("ipsr", 0x5)
3800           .Case("epsr", 0x6)
3801           .Case("iepsr", 0x7)
3802           .Case("msp", 0x8)
3803           .Case("psp", 0x9)
3804           .Case("primask", 0x10)
3805           .Case("basepri", 0x11)
3806           .Case("basepri_max", 0x12)
3807           .Case("faultmask", 0x13)
3808           .Case("control", 0x14)
3809           .Case("msplim", 0x0a)
3810           .Case("psplim", 0x0b)
3811           .Case("sp", 0x18)
3812           .Default(-1);
3813 }
3814
3815 // The flags here are common to those allowed for apsr in the A class cores and
3816 // those allowed for the special registers in the M class cores. Returns a
3817 // value representing which flags were present, -1 if invalid.
3818 static inline int getMClassFlagsMask(StringRef Flags) {
3819   return StringSwitch<int>(Flags)
3820           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3821                          // correct when flags are not permitted
3822           .Case("g", 0x1)
3823           .Case("nzcvq", 0x2)
3824           .Case("nzcvqg", 0x3)
3825           .Default(-1);
3826 }
3827
3828 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3829                                  const ARMSubtarget *Subtarget) {
3830   // Ensure that the register (without flags) was a valid M Class special
3831   // register.
3832   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3833   if (SYSmvalue == -1)
3834     return -1;
3835
3836   // basepri, basepri_max and faultmask are only valid for V7m.
3837   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3838     return -1;
3839
3840   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3841     Flags = "";
3842     SYSmvalue |= 0x80;
3843   }
3844
3845   if (!Subtarget->has8MSecExt() &&
3846       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3847     return -1;
3848
3849   if (!Subtarget->hasV8MMainlineOps() &&
3850       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3851        SYSmvalue == 0x93))
3852     return -1;
3853
3854   // If it was a read then we won't be expecting flags and so at this point
3855   // we can return the mask.
3856   if (IsRead) {
3857     if (Flags.empty())
3858       return SYSmvalue;
3859     else
3860       return -1;
3861   }
3862
3863   // We know we are now handling a write so need to get the mask for the flags.
3864   int Mask = getMClassFlagsMask(Flags);
3865
3866   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3867   // shouldn't have flags present.
3868   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3869     return -1;
3870
3871   // The _g and _nzcvqg versions are only valid if the DSP extension is
3872   // available.
3873   if (!Subtarget->hasDSP() && (Mask & 0x1))
3874     return -1;
3875
3876   // The register was valid so need to put the mask in the correct place
3877   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3878   // construct the operand for the instruction node.
3879   return SYSmvalue | Mask << 10;
3880 }
3881
3882 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3883   // The mask operand contains the special register (R Bit) in bit 4, whether
3884   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3885   // bits 3-0 contains the fields to be accessed in the special register, set by
3886   // the flags provided with the register.
3887   int Mask = 0;
3888   if (Reg == "apsr") {
3889     // The flags permitted for apsr are the same flags that are allowed in
3890     // M class registers. We get the flag value and then shift the flags into
3891     // the correct place to combine with the mask.
3892     Mask = getMClassFlagsMask(Flags);
3893     if (Mask == -1)
3894       return -1;
3895     return Mask << 2;
3896   }
3897
3898   if (Reg != "cpsr" && Reg != "spsr") {
3899     return -1;
3900   }
3901
3902   // This is the same as if the flags were "fc"
3903   if (Flags.empty() || Flags == "all")
3904     return Mask | 0x9;
3905
3906   // Inspect the supplied flags string and set the bits in the mask for
3907   // the relevant and valid flags allowed for cpsr and spsr.
3908   for (char Flag : Flags) {
3909     int FlagVal;
3910     switch (Flag) {
3911       case 'c':
3912         FlagVal = 0x1;
3913         break;
3914       case 'x':
3915         FlagVal = 0x2;
3916         break;
3917       case 's':
3918         FlagVal = 0x4;
3919         break;
3920       case 'f':
3921         FlagVal = 0x8;
3922         break;
3923       default:
3924         FlagVal = 0;
3925     }
3926
3927     // This avoids allowing strings where the same flag bit appears twice.
3928     if (!FlagVal || (Mask & FlagVal))
3929       return -1;
3930     Mask |= FlagVal;
3931   }
3932
3933   // If the register is spsr then we need to set the R bit.
3934   if (Reg == "spsr")
3935     Mask |= 0x10;
3936
3937   return Mask;
3938 }
3939
3940 // Lower the read_register intrinsic to ARM specific DAG nodes
3941 // using the supplied metadata string to select the instruction node to use
3942 // and the registers/masks to construct as operands for the node.
3943 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3944   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3945   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3946   bool IsThumb2 = Subtarget->isThumb2();
3947   SDLoc DL(N);
3948
3949   std::vector<SDValue> Ops;
3950   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3951
3952   if (!Ops.empty()) {
3953     // If the special register string was constructed of fields (as defined
3954     // in the ACLE) then need to lower to MRC node (32 bit) or
3955     // MRRC node(64 bit), we can make the distinction based on the number of
3956     // operands we have.
3957     unsigned Opcode;
3958     SmallVector<EVT, 3> ResTypes;
3959     if (Ops.size() == 5){
3960       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3961       ResTypes.append({ MVT::i32, MVT::Other });
3962     } else {
3963       assert(Ops.size() == 3 &&
3964               "Invalid number of fields in special register string.");
3965       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3966       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3967     }
3968
3969     Ops.push_back(getAL(CurDAG, DL));
3970     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3971     Ops.push_back(N->getOperand(0));
3972     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3973     return true;
3974   }
3975
3976   std::string SpecialReg = RegString->getString().lower();
3977
3978   int BankedReg = getBankedRegisterMask(SpecialReg);
3979   if (BankedReg != -1) {
3980     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3981             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3982             N->getOperand(0) };
3983     ReplaceNode(
3984         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3985                                   DL, MVT::i32, MVT::Other, Ops));
3986     return true;
3987   }
3988
3989   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3990   // corresponding to the register that is being read from. So we switch on the
3991   // string to find which opcode we need to use.
3992   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3993                     .Case("fpscr", ARM::VMRS)
3994                     .Case("fpexc", ARM::VMRS_FPEXC)
3995                     .Case("fpsid", ARM::VMRS_FPSID)
3996                     .Case("mvfr0", ARM::VMRS_MVFR0)
3997                     .Case("mvfr1", ARM::VMRS_MVFR1)
3998                     .Case("mvfr2", ARM::VMRS_MVFR2)
3999                     .Case("fpinst", ARM::VMRS_FPINST)
4000                     .Case("fpinst2", ARM::VMRS_FPINST2)
4001                     .Default(0);
4002
4003   // If an opcode was found then we can lower the read to a VFP instruction.
4004   if (Opcode) {
4005     if (!Subtarget->hasVFP2())
4006       return false;
4007     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4008       return false;
4009
4010     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4011             N->getOperand(0) };
4012     ReplaceNode(N,
4013                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4014     return true;
4015   }
4016
4017   // If the target is M Class then need to validate that the register string
4018   // is an acceptable value, so check that a mask can be constructed from the
4019   // string.
4020   if (Subtarget->isMClass()) {
4021     StringRef Flags = "", Reg = SpecialReg;
4022     if (Reg.endswith("_ns")) {
4023       Flags = "ns";
4024       Reg = Reg.drop_back(3);
4025     }
4026
4027     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4028     if (SYSmValue == -1)
4029       return false;
4030
4031     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4032                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4033                       N->getOperand(0) };
4034     ReplaceNode(
4035         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4036     return true;
4037   }
4038
4039   // Here we know the target is not M Class so we need to check if it is one
4040   // of the remaining possible values which are apsr, cpsr or spsr.
4041   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4042     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4043             N->getOperand(0) };
4044     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4045                                           DL, MVT::i32, MVT::Other, Ops));
4046     return true;
4047   }
4048
4049   if (SpecialReg == "spsr") {
4050     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4051             N->getOperand(0) };
4052     ReplaceNode(
4053         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4054                                   MVT::i32, MVT::Other, Ops));
4055     return true;
4056   }
4057
4058   return false;
4059 }
4060
4061 // Lower the write_register intrinsic to ARM specific DAG nodes
4062 // using the supplied metadata string to select the instruction node to use
4063 // and the registers/masks to use in the nodes
4064 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4065   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4066   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4067   bool IsThumb2 = Subtarget->isThumb2();
4068   SDLoc DL(N);
4069
4070   std::vector<SDValue> Ops;
4071   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4072
4073   if (!Ops.empty()) {
4074     // If the special register string was constructed of fields (as defined
4075     // in the ACLE) then need to lower to MCR node (32 bit) or
4076     // MCRR node(64 bit), we can make the distinction based on the number of
4077     // operands we have.
4078     unsigned Opcode;
4079     if (Ops.size() == 5) {
4080       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4081       Ops.insert(Ops.begin()+2, N->getOperand(2));
4082     } else {
4083       assert(Ops.size() == 3 &&
4084               "Invalid number of fields in special register string.");
4085       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4086       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4087       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4088     }
4089
4090     Ops.push_back(getAL(CurDAG, DL));
4091     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4092     Ops.push_back(N->getOperand(0));
4093
4094     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4095     return true;
4096   }
4097
4098   std::string SpecialReg = RegString->getString().lower();
4099   int BankedReg = getBankedRegisterMask(SpecialReg);
4100   if (BankedReg != -1) {
4101     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4102             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4103             N->getOperand(0) };
4104     ReplaceNode(
4105         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4106                                   DL, MVT::Other, Ops));
4107     return true;
4108   }
4109
4110   // The VFP registers are written to by creating SelectionDAG nodes with
4111   // opcodes corresponding to the register that is being written. So we switch
4112   // on the string to find which opcode we need to use.
4113   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4114                     .Case("fpscr", ARM::VMSR)
4115                     .Case("fpexc", ARM::VMSR_FPEXC)
4116                     .Case("fpsid", ARM::VMSR_FPSID)
4117                     .Case("fpinst", ARM::VMSR_FPINST)
4118                     .Case("fpinst2", ARM::VMSR_FPINST2)
4119                     .Default(0);
4120
4121   if (Opcode) {
4122     if (!Subtarget->hasVFP2())
4123       return false;
4124     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4125             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4126     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4127     return true;
4128   }
4129
4130   std::pair<StringRef, StringRef> Fields;
4131   Fields = StringRef(SpecialReg).rsplit('_');
4132   std::string Reg = Fields.first.str();
4133   StringRef Flags = Fields.second;
4134
4135   // If the target was M Class then need to validate the special register value
4136   // and retrieve the mask for use in the instruction node.
4137   if (Subtarget->isMClass()) {
4138     // basepri_max gets split so need to correct Reg and Flags.
4139     if (SpecialReg == "basepri_max") {
4140       Reg = SpecialReg;
4141       Flags = "";
4142     }
4143     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4144     if (SYSmValue == -1)
4145       return false;
4146
4147     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4148                       N->getOperand(2), getAL(CurDAG, DL),
4149                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4150     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4151     return true;
4152   }
4153
4154   // We then check to see if a valid mask can be constructed for one of the
4155   // register string values permitted for the A and R class cores. These values
4156   // are apsr, spsr and cpsr; these are also valid on older cores.
4157   int Mask = getARClassRegisterMask(Reg, Flags);
4158   if (Mask != -1) {
4159     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4160             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4161             N->getOperand(0) };
4162     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4163                                           DL, MVT::Other, Ops));
4164     return true;
4165   }
4166
4167   return false;
4168 }
4169
4170 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4171   std::vector<SDValue> AsmNodeOperands;
4172   unsigned Flag, Kind;
4173   bool Changed = false;
4174   unsigned NumOps = N->getNumOperands();
4175
4176   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4177   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4178   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4179   // respectively. Since there is no constraint to explicitly specify a
4180   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4181   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4182   // them into a GPRPair.
4183
4184   SDLoc dl(N);
4185   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4186                                    : SDValue(nullptr,0);
4187
4188   SmallVector<bool, 8> OpChanged;
4189   // Glue node will be appended late.
4190   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4191     SDValue op = N->getOperand(i);
4192     AsmNodeOperands.push_back(op);
4193
4194     if (i < InlineAsm::Op_FirstOperand)
4195       continue;
4196
4197     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4198       Flag = C->getZExtValue();
4199       Kind = InlineAsm::getKind(Flag);
4200     }
4201     else
4202       continue;
4203
4204     // Immediate operands to inline asm in the SelectionDAG are modeled with
4205     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4206     // the second is a constant with the value of the immediate. If we get here
4207     // and we have a Kind_Imm, skip the next operand, and continue.
4208     if (Kind == InlineAsm::Kind_Imm) {
4209       SDValue op = N->getOperand(++i);
4210       AsmNodeOperands.push_back(op);
4211       continue;
4212     }
4213
4214     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4215     if (NumRegs)
4216       OpChanged.push_back(false);
4217
4218     unsigned DefIdx = 0;
4219     bool IsTiedToChangedOp = false;
4220     // If it's a use that is tied with a previous def, it has no
4221     // reg class constraint.
4222     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4223       IsTiedToChangedOp = OpChanged[DefIdx];
4224
4225     // Memory operands to inline asm in the SelectionDAG are modeled with two
4226     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4227     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4228     // it doesn't get misinterpreted), and continue. We do this here because
4229     // it's important to update the OpChanged array correctly before moving on.
4230     if (Kind == InlineAsm::Kind_Mem) {
4231       SDValue op = N->getOperand(++i);
4232       AsmNodeOperands.push_back(op);
4233       continue;
4234     }
4235
4236     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4237         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4238       continue;
4239
4240     unsigned RC;
4241     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4242     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4243         || NumRegs != 2)
4244       continue;
4245
4246     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4247     SDValue V0 = N->getOperand(i+1);
4248     SDValue V1 = N->getOperand(i+2);
4249     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4250     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4251     SDValue PairedReg;
4252     MachineRegisterInfo &MRI = MF->getRegInfo();
4253
4254     if (Kind == InlineAsm::Kind_RegDef ||
4255         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4256       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4257       // the original GPRs.
4258
4259       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4260       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4261       SDValue Chain = SDValue(N,0);
4262
4263       SDNode *GU = N->getGluedUser();
4264       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4265                                                Chain.getValue(1));
4266
4267       // Extract values from a GPRPair reg and copy to the original GPR reg.
4268       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4269                                                     RegCopy);
4270       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4271                                                     RegCopy);
4272       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4273                                         RegCopy.getValue(1));
4274       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4275
4276       // Update the original glue user.
4277       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4278       Ops.push_back(T1.getValue(1));
4279       CurDAG->UpdateNodeOperands(GU, Ops);
4280     }
4281     else {
4282       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4283       // GPRPair and then pass the GPRPair to the inline asm.
4284       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4285
4286       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4287       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4288                                           Chain.getValue(1));
4289       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4290                                           T0.getValue(1));
4291       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4292
4293       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4294       // i32 VRs of inline asm with it.
4295       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4296       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4297       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4298
4299       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4300       Glue = Chain.getValue(1);
4301     }
4302
4303     Changed = true;
4304
4305     if(PairedReg.getNode()) {
4306       OpChanged[OpChanged.size() -1 ] = true;
4307       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4308       if (IsTiedToChangedOp)
4309         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4310       else
4311         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4312       // Replace the current flag.
4313       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4314           Flag, dl, MVT::i32);
4315       // Add the new register node and skip the original two GPRs.
4316       AsmNodeOperands.push_back(PairedReg);
4317       // Skip the next two GPRs.
4318       i += 2;
4319     }
4320   }
4321
4322   if (Glue.getNode())
4323     AsmNodeOperands.push_back(Glue);
4324   if (!Changed)
4325     return false;
4326
4327   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4328       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4329   New->setNodeId(-1);
4330   ReplaceNode(N, New.getNode());
4331   return true;
4332 }
4333
4334
4335 bool ARMDAGToDAGISel::
4336 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4337                              std::vector<SDValue> &OutOps) {
4338   switch(ConstraintID) {
4339   default:
4340     llvm_unreachable("Unexpected asm memory constraint");
4341   case InlineAsm::Constraint_i:
4342     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4343     //        be an immediate and not a memory constraint.
4344     LLVM_FALLTHROUGH;
4345   case InlineAsm::Constraint_m:
4346   case InlineAsm::Constraint_o:
4347   case InlineAsm::Constraint_Q:
4348   case InlineAsm::Constraint_Um:
4349   case InlineAsm::Constraint_Un:
4350   case InlineAsm::Constraint_Uq:
4351   case InlineAsm::Constraint_Us:
4352   case InlineAsm::Constraint_Ut:
4353   case InlineAsm::Constraint_Uv:
4354   case InlineAsm::Constraint_Uy:
4355     // Require the address to be in a register.  That is safe for all ARM
4356     // variants and it is hard to do anything much smarter without knowing
4357     // how the operand is used.
4358     OutOps.push_back(Op);
4359     return false;
4360   }
4361   return true;
4362 }
4363
4364 /// createARMISelDag - This pass converts a legalized DAG into a
4365 /// ARM-specific DAG, ready for instruction scheduling.
4366 ///
4367 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4368                                      CodeGenOpt::Level OptLevel) {
4369   return new ARMDAGToDAGISel(TM, OptLevel);
4370 }