contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 232   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 233   /// generated to force the table registers to be consecutive.
 234   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 235
 236   /// Try to select SBFX/UBFX instructions for ARM.
 237   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 238
 239   // Select special operations if node forms integer ABS pattern
 240   bool tryABSOp(SDNode *N);
 241
 242   bool tryReadRegister(SDNode *N);
 243   bool tryWriteRegister(SDNode *N);
 244
 245   bool tryInlineAsm(SDNode *N);
 246
 247   void SelectConcatVector(SDNode *N);
 248   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 249
 250   bool trySMLAWSMULW(SDNode *N);
 251
 252   void SelectCMP_SWAP(SDNode *N);
 253
 254   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 255   /// inline asm expressions.
 256   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 257                                     std::vector<SDValue> &OutOps) override;
 258
 259   // Form pairs of consecutive R, S, D, or Q registers.
 260   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 261   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 262   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 263   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 264
 265   // Form sequences of 4 consecutive S, D, or Q registers.
 266   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 267   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 268   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 269
 270   // Get the alignment operand for a NEON VLD or VST instruction.
 271   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 272                         bool is64BitVector);
 273
 274   /// Returns the number of instructions required to materialize the given
 275   /// constant in a register, or 3 if a literal pool load is needed.
 276   unsigned ConstantMaterializationCost(unsigned Val) const;
 277
 278   /// Checks if N is a multiplication by a constant where we can extract out a
 279   /// power of two from the constant so that it can be used in a shift, but only
 280   /// if it simplifies the materialization of the constant. Returns true if it
 281   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 282   /// out and to NewMulConst the new constant to be multiplied by.
 283   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 284                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 285
 286   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 287   /// selected when N would have been selected.
 288   void replaceDAGValue(const SDValue &N, SDValue M);
 289 };
 290 }
 291
 292 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 293 /// operand. If so Imm will receive the 32-bit value.
 294 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 295   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 296     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 297     return true;
 298   }
 299   return false;
 300 }
 301
 302 // isInt32Immediate - This method tests to see if a constant operand.
 303 // If so Imm will receive the 32 bit value.
 304 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 305   return isInt32Immediate(N.getNode(), Imm);
 306 }
 307
 308 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 309 // opcode and that it has a immediate integer right operand.
 310 // If so Imm will receive the 32 bit value.
 311 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 312   return N->getOpcode() == Opc &&
 313          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 314 }
 315
 316 /// \brief Check whether a particular node is a constant value representable as
 317 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 318 ///
 319 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 320 static bool isScaledConstantInRange(SDValue Node, int Scale,
 321                                     int RangeMin, int RangeMax,
 322                                     int &ScaledConstant) {
 323   assert(Scale > 0 && "Invalid scale!");
 324
 325   // Check that this is a constant.
 326   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 327   if (!C)
 328     return false;
 329
 330   ScaledConstant = (int) C->getZExtValue();
 331   if ((ScaledConstant % Scale) != 0)
 332     return false;
 333
 334   ScaledConstant /= Scale;
 335   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 336 }
 337
 338 void ARMDAGToDAGISel::PreprocessISelDAG() {
 339   if (!Subtarget->hasV6T2Ops())
 340     return;
 341
 342   bool isThumb2 = Subtarget->isThumb();
 343   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 344        E = CurDAG->allnodes_end(); I != E; ) {
 345     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 346
 347     if (N->getOpcode() != ISD::ADD)
 348       continue;
 349
 350     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 351     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 352     // trailing zeros, e.g. 1020.
 353     // Transform the expression to
 354     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 355     // of trailing zeros of c2. The left shift would be folded as an shifter
 356     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 357     // node (UBFX).
 358
 359     SDValue N0 = N->getOperand(0);
 360     SDValue N1 = N->getOperand(1);
 361     unsigned And_imm = 0;
 362     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 363       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 364         std::swap(N0, N1);
 365     }
 366     if (!And_imm)
 367       continue;
 368
 369     // Check if the AND mask is an immediate of the form: 000.....1111111100
 370     unsigned TZ = countTrailingZeros(And_imm);
 371     if (TZ != 1 && TZ != 2)
 372       // Be conservative here. Shifter operands aren't always free. e.g. On
 373       // Swift, left shifter operand of 1 / 2 for free but others are not.
 374       // e.g.
 375       //  ubfx   r3, r1, #16, #8
 376       //  ldr.w  r3, [r0, r3, lsl #2]
 377       // vs.
 378       //  mov.w  r9, #1020
 379       //  and.w  r2, r9, r1, lsr #14
 380       //  ldr    r2, [r0, r2]
 381       continue;
 382     And_imm >>= TZ;
 383     if (And_imm & (And_imm + 1))
 384       continue;
 385
 386     // Look for (and (srl X, c1), c2).
 387     SDValue Srl = N1.getOperand(0);
 388     unsigned Srl_imm = 0;
 389     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 390         (Srl_imm <= 2))
 391       continue;
 392
 393     // Make sure first operand is not a shifter operand which would prevent
 394     // folding of the left shift.
 395     SDValue CPTmp0;
 396     SDValue CPTmp1;
 397     SDValue CPTmp2;
 398     if (isThumb2) {
 399       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 400         continue;
 401     } else {
 402       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 403           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 404         continue;
 405     }
 406
 407     // Now make the transformation.
 408     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 409                           Srl.getOperand(0),
 410                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 411                                               MVT::i32));
 412     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 413                          Srl,
 414                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 415     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 416                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 417     CurDAG->UpdateNodeOperands(N, N0, N1);
 418   }
 419 }
 420
 421 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 422 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 423 /// least on current ARM implementations) which should be avoidded.
 424 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 425   if (OptLevel == CodeGenOpt::None)
 426     return true;
 427
 428   if (!Subtarget->hasVMLxHazards())
 429     return true;
 430
 431   if (!N->hasOneUse())
 432     return false;
 433
 434   SDNode *Use = *N->use_begin();
 435   if (Use->getOpcode() == ISD::CopyToReg)
 436     return true;
 437   if (Use->isMachineOpcode()) {
 438     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 439         CurDAG->getSubtarget().getInstrInfo());
 440
 441     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 442     if (MCID.mayStore())
 443       return true;
 444     unsigned Opcode = MCID.getOpcode();
 445     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 446       return true;
 447     // vmlx feeding into another vmlx. We actually want to unfold
 448     // the use later in the MLxExpansion pass. e.g.
 449     // vmla
 450     // vmla (stall 8 cycles)
 451     //
 452     // vmul (5 cycles)
 453     // vadd (5 cycles)
 454     // vmla
 455     // This adds up to about 18 - 19 cycles.
 456     //
 457     // vmla
 458     // vmul (stall 4 cycles)
 459     // vadd adds up to about 14 cycles.
 460     return TII->isFpMLxInstruction(Opcode);
 461   }
 462
 463   return false;
 464 }
 465
 466 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 467                                             ARM_AM::ShiftOpc ShOpcVal,
 468                                             unsigned ShAmt) {
 469   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 470     return true;
 471   if (Shift.hasOneUse())
 472     return true;
 473   // R << 2 is free.
 474   return ShOpcVal == ARM_AM::lsl &&
 475          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 476 }
 477
 478 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 479   if (Subtarget->isThumb()) {
 480     if (Val <= 255) return 1;                               // MOV
 481     if (Subtarget->hasV6T2Ops() &&
 482         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 483       return 1; // MOVW
 484     if (Val <= 510) return 2;                               // MOV + ADDi8
 485     if (~Val <= 255) return 2;                              // MOV + MVN
 486     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 487   } else {
 488     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 489     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 490     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 491     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 492   }
 493   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 494   return 3; // Literal pool load
 495 }
 496
 497 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 498                                              unsigned MaxShift,
 499                                              unsigned &PowerOfTwo,
 500                                              SDValue &NewMulConst) const {
 501   assert(N.getOpcode() == ISD::MUL);
 502   assert(MaxShift > 0);
 503
 504   // If the multiply is used in more than one place then changing the constant
 505   // will make other uses incorrect, so don't.
 506   if (!N.hasOneUse()) return false;
 507   // Check if the multiply is by a constant
 508   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 509   if (!MulConst) return false;
 510   // If the constant is used in more than one place then modifying it will mean
 511   // we need to materialize two constants instead of one, which is a bad idea.
 512   if (!MulConst->hasOneUse()) return false;
 513   unsigned MulConstVal = MulConst->getZExtValue();
 514   if (MulConstVal == 0) return false;
 515
 516   // Find the largest power of 2 that MulConstVal is a multiple of
 517   PowerOfTwo = MaxShift;
 518   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 519     --PowerOfTwo;
 520     if (PowerOfTwo == 0) return false;
 521   }
 522
 523   // Only optimise if the new cost is better
 524   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 525   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 526   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 527   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 528   return NewCost < OldCost;
 529 }
 530
 531 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 532   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 533   CurDAG->ReplaceAllUsesWith(N, M);
 534 }
 535
 536 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 537                                               SDValue &BaseReg,
 538                                               SDValue &Opc,
 539                                               bool CheckProfitability) {
 540   if (DisableShifterOp)
 541     return false;
 542
 543   // If N is a multiply-by-constant and it's profitable to extract a shift and
 544   // use it in a shifted operand do so.
 545   if (N.getOpcode() == ISD::MUL) {
 546     unsigned PowerOfTwo = 0;
 547     SDValue NewMulConst;
 548     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 549       HandleSDNode Handle(N);
 550       replaceDAGValue(N.getOperand(1), NewMulConst);
 551       BaseReg = Handle.getValue();
 552       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 553                                                           PowerOfTwo),
 554                                       SDLoc(N), MVT::i32);
 555       return true;
 556     }
 557   }
 558
 559   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 560
 561   // Don't match base register only case. That is matched to a separate
 562   // lower complexity pattern with explicit register operand.
 563   if (ShOpcVal == ARM_AM::no_shift) return false;
 564
 565   BaseReg = N.getOperand(0);
 566   unsigned ShImmVal = 0;
 567   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 568   if (!RHS) return false;
 569   ShImmVal = RHS->getZExtValue() & 31;
 570   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 571                                   SDLoc(N), MVT::i32);
 572   return true;
 573 }
 574
 575 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 576                                               SDValue &BaseReg,
 577                                               SDValue &ShReg,
 578                                               SDValue &Opc,
 579                                               bool CheckProfitability) {
 580   if (DisableShifterOp)
 581     return false;
 582
 583   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 584
 585   // Don't match base register only case. That is matched to a separate
 586   // lower complexity pattern with explicit register operand.
 587   if (ShOpcVal == ARM_AM::no_shift) return false;
 588
 589   BaseReg = N.getOperand(0);
 590   unsigned ShImmVal = 0;
 591   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 592   if (RHS) return false;
 593
 594   ShReg = N.getOperand(1);
 595   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 596     return false;
 597   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 598                                   SDLoc(N), MVT::i32);
 599   return true;
 600 }
 601
 602
 603 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 604                                           SDValue &Base,
 605                                           SDValue &OffImm) {
 606   // Match simple R + imm12 operands.
 607
 608   // Base only.
 609   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 610       !CurDAG->isBaseWithConstantOffset(N)) {
 611     if (N.getOpcode() == ISD::FrameIndex) {
 612       // Match frame index.
 613       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 614       Base = CurDAG->getTargetFrameIndex(
 615           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 616       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 617       return true;
 618     }
 619
 620     if (N.getOpcode() == ARMISD::Wrapper &&
 621         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 622         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 623         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 624       Base = N.getOperand(0);
 625     } else
 626       Base = N;
 627     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 628     return true;
 629   }
 630
 631   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 632     int RHSC = (int)RHS->getSExtValue();
 633     if (N.getOpcode() == ISD::SUB)
 634       RHSC = -RHSC;
 635
 636     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 637       Base   = N.getOperand(0);
 638       if (Base.getOpcode() == ISD::FrameIndex) {
 639         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 640         Base = CurDAG->getTargetFrameIndex(
 641             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 642       }
 643       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 644       return true;
 645     }
 646   }
 647
 648   // Base only.
 649   Base = N;
 650   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 651   return true;
 652 }
 653
 654
 655
 656 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 657                                       SDValue &Opc) {
 658   if (N.getOpcode() == ISD::MUL &&
 659       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 660     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 661       // X * [3,5,9] -> X + X * [2,4,8] etc.
 662       int RHSC = (int)RHS->getZExtValue();
 663       if (RHSC & 1) {
 664         RHSC = RHSC & ~1;
 665         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 666         if (RHSC < 0) {
 667           AddSub = ARM_AM::sub;
 668           RHSC = - RHSC;
 669         }
 670         if (isPowerOf2_32(RHSC)) {
 671           unsigned ShAmt = Log2_32(RHSC);
 672           Base = Offset = N.getOperand(0);
 673           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 674                                                             ARM_AM::lsl),
 675                                           SDLoc(N), MVT::i32);
 676           return true;
 677         }
 678       }
 679     }
 680   }
 681
 682   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 683       // ISD::OR that is equivalent to an ISD::ADD.
 684       !CurDAG->isBaseWithConstantOffset(N))
 685     return false;
 686
 687   // Leave simple R +/- imm12 operands for LDRi12
 688   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 689     int RHSC;
 690     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 691                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 692       return false;
 693   }
 694
 695   // Otherwise this is R +/- [possibly shifted] R.
 696   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 697   ARM_AM::ShiftOpc ShOpcVal =
 698     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 699   unsigned ShAmt = 0;
 700
 701   Base   = N.getOperand(0);
 702   Offset = N.getOperand(1);
 703
 704   if (ShOpcVal != ARM_AM::no_shift) {
 705     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 706     // it.
 707     if (ConstantSDNode *Sh =
 708            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 709       ShAmt = Sh->getZExtValue();
 710       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 711         Offset = N.getOperand(1).getOperand(0);
 712       else {
 713         ShAmt = 0;
 714         ShOpcVal = ARM_AM::no_shift;
 715       }
 716     } else {
 717       ShOpcVal = ARM_AM::no_shift;
 718     }
 719   }
 720
 721   // Try matching (R shl C) + (R).
 722   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 723       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 724         N.getOperand(0).hasOneUse())) {
 725     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 726     if (ShOpcVal != ARM_AM::no_shift) {
 727       // Check to see if the RHS of the shift is a constant, if not, we can't
 728       // fold it.
 729       if (ConstantSDNode *Sh =
 730           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 731         ShAmt = Sh->getZExtValue();
 732         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 733           Offset = N.getOperand(0).getOperand(0);
 734           Base = N.getOperand(1);
 735         } else {
 736           ShAmt = 0;
 737           ShOpcVal = ARM_AM::no_shift;
 738         }
 739       } else {
 740         ShOpcVal = ARM_AM::no_shift;
 741       }
 742     }
 743   }
 744
 745   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 746   // and use it in a shifted operand do so.
 747   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 748     unsigned PowerOfTwo = 0;
 749     SDValue NewMulConst;
 750     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 751       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 752       ShAmt = PowerOfTwo;
 753       ShOpcVal = ARM_AM::lsl;
 754     }
 755   }
 756
 757   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 758                                   SDLoc(N), MVT::i32);
 759   return true;
 760 }
 761
 762
 763 //-----
 764
 765 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 766                                                      SDValue &Base,
 767                                                      SDValue &Offset,
 768                                                      SDValue &Opc) {
 769   if (N.getOpcode() == ISD::MUL &&
 770       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 771     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 772       // X * [3,5,9] -> X + X * [2,4,8] etc.
 773       int RHSC = (int)RHS->getZExtValue();
 774       if (RHSC & 1) {
 775         RHSC = RHSC & ~1;
 776         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 777         if (RHSC < 0) {
 778           AddSub = ARM_AM::sub;
 779           RHSC = - RHSC;
 780         }
 781         if (isPowerOf2_32(RHSC)) {
 782           unsigned ShAmt = Log2_32(RHSC);
 783           Base = Offset = N.getOperand(0);
 784           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 785                                                             ARM_AM::lsl),
 786                                           SDLoc(N), MVT::i32);
 787           return AM2_SHOP;
 788         }
 789       }
 790     }
 791   }
 792
 793   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 794       // ISD::OR that is equivalent to an ADD.
 795       !CurDAG->isBaseWithConstantOffset(N)) {
 796     Base = N;
 797     if (N.getOpcode() == ISD::FrameIndex) {
 798       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 799       Base = CurDAG->getTargetFrameIndex(
 800           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 801     } else if (N.getOpcode() == ARMISD::Wrapper &&
 802                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 803                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 804                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 805       Base = N.getOperand(0);
 806     }
 807     Offset = CurDAG->getRegister(0, MVT::i32);
 808     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 809                                                       ARM_AM::no_shift),
 810                                     SDLoc(N), MVT::i32);
 811     return AM2_BASE;
 812   }
 813
 814   // Match simple R +/- imm12 operands.
 815   if (N.getOpcode() != ISD::SUB) {
 816     int RHSC;
 817     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 818                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 819       Base = N.getOperand(0);
 820       if (Base.getOpcode() == ISD::FrameIndex) {
 821         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 822         Base = CurDAG->getTargetFrameIndex(
 823             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 824       }
 825       Offset = CurDAG->getRegister(0, MVT::i32);
 826
 827       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 828       if (RHSC < 0) {
 829         AddSub = ARM_AM::sub;
 830         RHSC = - RHSC;
 831       }
 832       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 833                                                         ARM_AM::no_shift),
 834                                       SDLoc(N), MVT::i32);
 835       return AM2_BASE;
 836     }
 837   }
 838
 839   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 840     // Compute R +/- (R << N) and reuse it.
 841     Base = N;
 842     Offset = CurDAG->getRegister(0, MVT::i32);
 843     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 844                                                       ARM_AM::no_shift),
 845                                     SDLoc(N), MVT::i32);
 846     return AM2_BASE;
 847   }
 848
 849   // Otherwise this is R +/- [possibly shifted] R.
 850   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 851   ARM_AM::ShiftOpc ShOpcVal =
 852     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 853   unsigned ShAmt = 0;
 854
 855   Base   = N.getOperand(0);
 856   Offset = N.getOperand(1);
 857
 858   if (ShOpcVal != ARM_AM::no_shift) {
 859     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 860     // it.
 861     if (ConstantSDNode *Sh =
 862            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 863       ShAmt = Sh->getZExtValue();
 864       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 865         Offset = N.getOperand(1).getOperand(0);
 866       else {
 867         ShAmt = 0;
 868         ShOpcVal = ARM_AM::no_shift;
 869       }
 870     } else {
 871       ShOpcVal = ARM_AM::no_shift;
 872     }
 873   }
 874
 875   // Try matching (R shl C) + (R).
 876   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 877       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 878         N.getOperand(0).hasOneUse())) {
 879     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 880     if (ShOpcVal != ARM_AM::no_shift) {
 881       // Check to see if the RHS of the shift is a constant, if not, we can't
 882       // fold it.
 883       if (ConstantSDNode *Sh =
 884           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 885         ShAmt = Sh->getZExtValue();
 886         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 887           Offset = N.getOperand(0).getOperand(0);
 888           Base = N.getOperand(1);
 889         } else {
 890           ShAmt = 0;
 891           ShOpcVal = ARM_AM::no_shift;
 892         }
 893       } else {
 894         ShOpcVal = ARM_AM::no_shift;
 895       }
 896     }
 897   }
 898
 899   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 900                                   SDLoc(N), MVT::i32);
 901   return AM2_SHOP;
 902 }
 903
 904 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 905                                             SDValue &Offset, SDValue &Opc) {
 906   unsigned Opcode = Op->getOpcode();
 907   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 908     ? cast<LoadSDNode>(Op)->getAddressingMode()
 909     : cast<StoreSDNode>(Op)->getAddressingMode();
 910   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 911     ? ARM_AM::add : ARM_AM::sub;
 912   int Val;
 913   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 914     return false;
 915
 916   Offset = N;
 917   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 918   unsigned ShAmt = 0;
 919   if (ShOpcVal != ARM_AM::no_shift) {
 920     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 921     // it.
 922     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 923       ShAmt = Sh->getZExtValue();
 924       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 925         Offset = N.getOperand(0);
 926       else {
 927         ShAmt = 0;
 928         ShOpcVal = ARM_AM::no_shift;
 929       }
 930     } else {
 931       ShOpcVal = ARM_AM::no_shift;
 932     }
 933   }
 934
 935   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 936                                   SDLoc(N), MVT::i32);
 937   return true;
 938 }
 939
 940 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 941                                             SDValue &Offset, SDValue &Opc) {
 942   unsigned Opcode = Op->getOpcode();
 943   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 944     ? cast<LoadSDNode>(Op)->getAddressingMode()
 945     : cast<StoreSDNode>(Op)->getAddressingMode();
 946   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 947     ? ARM_AM::add : ARM_AM::sub;
 948   int Val;
 949   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 950     if (AddSub == ARM_AM::sub) Val *= -1;
 951     Offset = CurDAG->getRegister(0, MVT::i32);
 952     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 953     return true;
 954   }
 955
 956   return false;
 957 }
 958
 959
 960 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 961                                             SDValue &Offset, SDValue &Opc) {
 962   unsigned Opcode = Op->getOpcode();
 963   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 964     ? cast<LoadSDNode>(Op)->getAddressingMode()
 965     : cast<StoreSDNode>(Op)->getAddressingMode();
 966   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 967     ? ARM_AM::add : ARM_AM::sub;
 968   int Val;
 969   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 970     Offset = CurDAG->getRegister(0, MVT::i32);
 971     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 972                                                       ARM_AM::no_shift),
 973                                     SDLoc(Op), MVT::i32);
 974     return true;
 975   }
 976
 977   return false;
 978 }
 979
 980 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 981   Base = N;
 982   return true;
 983 }
 984
 985 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 986                                       SDValue &Base, SDValue &Offset,
 987                                       SDValue &Opc) {
 988   if (N.getOpcode() == ISD::SUB) {
 989     // X - C  is canonicalize to X + -C, no need to handle it here.
 990     Base = N.getOperand(0);
 991     Offset = N.getOperand(1);
 992     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 993                                     MVT::i32);
 994     return true;
 995   }
 996
 997   if (!CurDAG->isBaseWithConstantOffset(N)) {
 998     Base = N;
 999     if (N.getOpcode() == ISD::FrameIndex) {
1000       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1001       Base = CurDAG->getTargetFrameIndex(
1002           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1003     }
1004     Offset = CurDAG->getRegister(0, MVT::i32);
1005     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1006                                     MVT::i32);
1007     return true;
1008   }
1009
1010   // If the RHS is +/- imm8, fold into addr mode.
1011   int RHSC;
1012   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1013                               -256 + 1, 256, RHSC)) { // 8 bits.
1014     Base = N.getOperand(0);
1015     if (Base.getOpcode() == ISD::FrameIndex) {
1016       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1017       Base = CurDAG->getTargetFrameIndex(
1018           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1019     }
1020     Offset = CurDAG->getRegister(0, MVT::i32);
1021
1022     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1023     if (RHSC < 0) {
1024       AddSub = ARM_AM::sub;
1025       RHSC = -RHSC;
1026     }
1027     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1028                                     MVT::i32);
1029     return true;
1030   }
1031
1032   Base = N.getOperand(0);
1033   Offset = N.getOperand(1);
1034   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1035                                   MVT::i32);
1036   return true;
1037 }
1038
1039 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1040                                             SDValue &Offset, SDValue &Opc) {
1041   unsigned Opcode = Op->getOpcode();
1042   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1043     ? cast<LoadSDNode>(Op)->getAddressingMode()
1044     : cast<StoreSDNode>(Op)->getAddressingMode();
1045   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1046     ? ARM_AM::add : ARM_AM::sub;
1047   int Val;
1048   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1049     Offset = CurDAG->getRegister(0, MVT::i32);
1050     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1051                                     MVT::i32);
1052     return true;
1053   }
1054
1055   Offset = N;
1056   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1057                                   MVT::i32);
1058   return true;
1059 }
1060
1061 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1062                                       SDValue &Base, SDValue &Offset) {
1063   if (!CurDAG->isBaseWithConstantOffset(N)) {
1064     Base = N;
1065     if (N.getOpcode() == ISD::FrameIndex) {
1066       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1067       Base = CurDAG->getTargetFrameIndex(
1068           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1069     } else if (N.getOpcode() == ARMISD::Wrapper &&
1070                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1071                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1072                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1073       Base = N.getOperand(0);
1074     }
1075     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1076                                        SDLoc(N), MVT::i32);
1077     return true;
1078   }
1079
1080   // If the RHS is +/- imm8, fold into addr mode.
1081   int RHSC;
1082   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1083                               -256 + 1, 256, RHSC)) {
1084     Base = N.getOperand(0);
1085     if (Base.getOpcode() == ISD::FrameIndex) {
1086       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1087       Base = CurDAG->getTargetFrameIndex(
1088           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1089     }
1090
1091     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1092     if (RHSC < 0) {
1093       AddSub = ARM_AM::sub;
1094       RHSC = -RHSC;
1095     }
1096     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1097                                        SDLoc(N), MVT::i32);
1098     return true;
1099   }
1100
1101   Base = N;
1102   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1103                                      SDLoc(N), MVT::i32);
1104   return true;
1105 }
1106
1107 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1108                                       SDValue &Align) {
1109   Addr = N;
1110
1111   unsigned Alignment = 0;
1112
1113   MemSDNode *MemN = cast<MemSDNode>(Parent);
1114
1115   if (isa<LSBaseSDNode>(MemN) ||
1116       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1117         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1118        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1119     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1120     // The maximum alignment is equal to the memory size being referenced.
1121     unsigned MMOAlign = MemN->getAlignment();
1122     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1123     if (MMOAlign >= MemSize && MemSize > 1)
1124       Alignment = MemSize;
1125   } else {
1126     // All other uses of addrmode6 are for intrinsics.  For now just record
1127     // the raw alignment value; it will be refined later based on the legal
1128     // alignment operands for the intrinsic.
1129     Alignment = MemN->getAlignment();
1130   }
1131
1132   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1133   return true;
1134 }
1135
1136 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1137                                             SDValue &Offset) {
1138   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1139   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1140   if (AM != ISD::POST_INC)
1141     return false;
1142   Offset = N;
1143   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1144     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1145       Offset = CurDAG->getRegister(0, MVT::i32);
1146   }
1147   return true;
1148 }
1149
1150 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1151                                        SDValue &Offset, SDValue &Label) {
1152   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1153     Offset = N.getOperand(0);
1154     SDValue N1 = N.getOperand(1);
1155     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1156                                       SDLoc(N), MVT::i32);
1157     return true;
1158   }
1159
1160   return false;
1161 }
1162
1163
1164 //===----------------------------------------------------------------------===//
1165 //                         Thumb Addressing Modes
1166 //===----------------------------------------------------------------------===//
1167
1168 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1169                                             SDValue &Base, SDValue &Offset){
1170   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1171     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1172     if (!NC || !NC->isNullValue())
1173       return false;
1174
1175     Base = Offset = N;
1176     return true;
1177   }
1178
1179   Base = N.getOperand(0);
1180   Offset = N.getOperand(1);
1181   return true;
1182 }
1183
1184 bool
1185 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1186                                           SDValue &Base, SDValue &OffImm) {
1187   if (!CurDAG->isBaseWithConstantOffset(N)) {
1188     if (N.getOpcode() == ISD::ADD) {
1189       return false; // We want to select register offset instead
1190     } else if (N.getOpcode() == ARMISD::Wrapper &&
1191         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1192         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1193         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1194         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1195       Base = N.getOperand(0);
1196     } else {
1197       Base = N;
1198     }
1199
1200     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1201     return true;
1202   }
1203
1204   // If the RHS is + imm5 * scale, fold into addr mode.
1205   int RHSC;
1206   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1207     Base = N.getOperand(0);
1208     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1209     return true;
1210   }
1211
1212   // Offset is too large, so use register offset instead.
1213   return false;
1214 }
1215
1216 bool
1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1218                                            SDValue &OffImm) {
1219   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1220 }
1221
1222 bool
1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1224                                            SDValue &OffImm) {
1225   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1226 }
1227
1228 bool
1229 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1230                                            SDValue &OffImm) {
1231   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1232 }
1233
1234 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1235                                             SDValue &Base, SDValue &OffImm) {
1236   if (N.getOpcode() == ISD::FrameIndex) {
1237     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1238     // Only multiples of 4 are allowed for the offset, so the frame object
1239     // alignment must be at least 4.
1240     MachineFrameInfo &MFI = MF->getFrameInfo();
1241     if (MFI.getObjectAlignment(FI) < 4)
1242       MFI.setObjectAlignment(FI, 4);
1243     Base = CurDAG->getTargetFrameIndex(
1244         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1245     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1246     return true;
1247   }
1248
1249   if (!CurDAG->isBaseWithConstantOffset(N))
1250     return false;
1251
1252   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1253   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1254       (LHSR && LHSR->getReg() == ARM::SP)) {
1255     // If the RHS is + imm8 * scale, fold into addr mode.
1256     int RHSC;
1257     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1258       Base = N.getOperand(0);
1259       if (Base.getOpcode() == ISD::FrameIndex) {
1260         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1261         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1262         // indexed by the LHS must be 4-byte aligned.
1263         MachineFrameInfo &MFI = MF->getFrameInfo();
1264         if (MFI.getObjectAlignment(FI) < 4)
1265           MFI.setObjectAlignment(FI, 4);
1266         Base = CurDAG->getTargetFrameIndex(
1267             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1268       }
1269       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1270       return true;
1271     }
1272   }
1273
1274   return false;
1275 }
1276
1277
1278 //===----------------------------------------------------------------------===//
1279 //                        Thumb 2 Addressing Modes
1280 //===----------------------------------------------------------------------===//
1281
1282
1283 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1284                                             SDValue &Base, SDValue &OffImm) {
1285   // Match simple R + imm12 operands.
1286
1287   // Base only.
1288   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1289       !CurDAG->isBaseWithConstantOffset(N)) {
1290     if (N.getOpcode() == ISD::FrameIndex) {
1291       // Match frame index.
1292       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1293       Base = CurDAG->getTargetFrameIndex(
1294           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1295       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1296       return true;
1297     }
1298
1299     if (N.getOpcode() == ARMISD::Wrapper &&
1300         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1301         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1302         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1303       Base = N.getOperand(0);
1304       if (Base.getOpcode() == ISD::TargetConstantPool)
1305         return false;  // We want to select t2LDRpci instead.
1306     } else
1307       Base = N;
1308     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1309     return true;
1310   }
1311
1312   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1313     if (SelectT2AddrModeImm8(N, Base, OffImm))
1314       // Let t2LDRi8 handle (R - imm8).
1315       return false;
1316
1317     int RHSC = (int)RHS->getZExtValue();
1318     if (N.getOpcode() == ISD::SUB)
1319       RHSC = -RHSC;
1320
1321     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1322       Base   = N.getOperand(0);
1323       if (Base.getOpcode() == ISD::FrameIndex) {
1324         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1325         Base = CurDAG->getTargetFrameIndex(
1326             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1327       }
1328       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1329       return true;
1330     }
1331   }
1332
1333   // Base only.
1334   Base = N;
1335   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1336   return true;
1337 }
1338
1339 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1340                                            SDValue &Base, SDValue &OffImm) {
1341   // Match simple R - imm8 operands.
1342   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1343       !CurDAG->isBaseWithConstantOffset(N))
1344     return false;
1345
1346   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1347     int RHSC = (int)RHS->getSExtValue();
1348     if (N.getOpcode() == ISD::SUB)
1349       RHSC = -RHSC;
1350
1351     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1352       Base = N.getOperand(0);
1353       if (Base.getOpcode() == ISD::FrameIndex) {
1354         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1355         Base = CurDAG->getTargetFrameIndex(
1356             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1357       }
1358       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1359       return true;
1360     }
1361   }
1362
1363   return false;
1364 }
1365
1366 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1367                                                  SDValue &OffImm){
1368   unsigned Opcode = Op->getOpcode();
1369   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1370     ? cast<LoadSDNode>(Op)->getAddressingMode()
1371     : cast<StoreSDNode>(Op)->getAddressingMode();
1372   int RHSC;
1373   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1374     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1375       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1376       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1377     return true;
1378   }
1379
1380   return false;
1381 }
1382
1383 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1384                                             SDValue &Base,
1385                                             SDValue &OffReg, SDValue &ShImm) {
1386   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1387   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1388     return false;
1389
1390   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1391   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1392     int RHSC = (int)RHS->getZExtValue();
1393     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1394       return false;
1395     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1396       return false;
1397   }
1398
1399   // Look for (R + R) or (R + (R << [1,2,3])).
1400   unsigned ShAmt = 0;
1401   Base   = N.getOperand(0);
1402   OffReg = N.getOperand(1);
1403
1404   // Swap if it is ((R << c) + R).
1405   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1406   if (ShOpcVal != ARM_AM::lsl) {
1407     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1408     if (ShOpcVal == ARM_AM::lsl)
1409       std::swap(Base, OffReg);
1410   }
1411
1412   if (ShOpcVal == ARM_AM::lsl) {
1413     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1414     // it.
1415     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1416       ShAmt = Sh->getZExtValue();
1417       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1418         OffReg = OffReg.getOperand(0);
1419       else {
1420         ShAmt = 0;
1421       }
1422     }
1423   }
1424
1425   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1426   // and use it in a shifted operand do so.
1427   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1428     unsigned PowerOfTwo = 0;
1429     SDValue NewMulConst;
1430     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1431       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1432       ShAmt = PowerOfTwo;
1433     }
1434   }
1435
1436   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1437
1438   return true;
1439 }
1440
1441 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1442                                                 SDValue &OffImm) {
1443   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1444   // instructions.
1445   Base = N;
1446   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1447
1448   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1449     return true;
1450
1451   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1452   if (!RHS)
1453     return true;
1454
1455   uint32_t RHSC = (int)RHS->getZExtValue();
1456   if (RHSC > 1020 || RHSC % 4 != 0)
1457     return true;
1458
1459   Base = N.getOperand(0);
1460   if (Base.getOpcode() == ISD::FrameIndex) {
1461     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1462     Base = CurDAG->getTargetFrameIndex(
1463         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1464   }
1465
1466   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1467   return true;
1468 }
1469
1470 //===--------------------------------------------------------------------===//
1471
1472 /// getAL - Returns a ARMCC::AL immediate node.
1473 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1474   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1475 }
1476
1477 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1478   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1479   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1480   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1481 }
1482
1483 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1484   LoadSDNode *LD = cast<LoadSDNode>(N);
1485   ISD::MemIndexedMode AM = LD->getAddressingMode();
1486   if (AM == ISD::UNINDEXED)
1487     return false;
1488
1489   EVT LoadedVT = LD->getMemoryVT();
1490   SDValue Offset, AMOpc;
1491   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1492   unsigned Opcode = 0;
1493   bool Match = false;
1494   if (LoadedVT == MVT::i32 && isPre &&
1495       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1496     Opcode = ARM::LDR_PRE_IMM;
1497     Match = true;
1498   } else if (LoadedVT == MVT::i32 && !isPre &&
1499       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1500     Opcode = ARM::LDR_POST_IMM;
1501     Match = true;
1502   } else if (LoadedVT == MVT::i32 &&
1503       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1504     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1505     Match = true;
1506
1507   } else if (LoadedVT == MVT::i16 &&
1508              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1509     Match = true;
1510     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1511       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1512       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1513   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1514     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1515       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1516         Match = true;
1517         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1518       }
1519     } else {
1520       if (isPre &&
1521           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1522         Match = true;
1523         Opcode = ARM::LDRB_PRE_IMM;
1524       } else if (!isPre &&
1525                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1526         Match = true;
1527         Opcode = ARM::LDRB_POST_IMM;
1528       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1529         Match = true;
1530         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1531       }
1532     }
1533   }
1534
1535   if (Match) {
1536     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1537       SDValue Chain = LD->getChain();
1538       SDValue Base = LD->getBasePtr();
1539       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1540                        CurDAG->getRegister(0, MVT::i32), Chain };
1541       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1542                                            MVT::Other, Ops);
1543       transferMemOperands(N, New);
1544       ReplaceNode(N, New);
1545       return true;
1546     } else {
1547       SDValue Chain = LD->getChain();
1548       SDValue Base = LD->getBasePtr();
1549       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1550                        CurDAG->getRegister(0, MVT::i32), Chain };
1551       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1552                                            MVT::Other, Ops);
1553       transferMemOperands(N, New);
1554       ReplaceNode(N, New);
1555       return true;
1556     }
1557   }
1558
1559   return false;
1560 }
1561
1562 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1563   LoadSDNode *LD = cast<LoadSDNode>(N);
1564   EVT LoadedVT = LD->getMemoryVT();
1565   ISD::MemIndexedMode AM = LD->getAddressingMode();
1566   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1567       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1568     return false;
1569
1570   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1571   if (!COffs || COffs->getZExtValue() != 4)
1572     return false;
1573
1574   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1575   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1576   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1577   // ISel.
1578   SDValue Chain = LD->getChain();
1579   SDValue Base = LD->getBasePtr();
1580   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1581                    CurDAG->getRegister(0, MVT::i32), Chain };
1582   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1583                                        MVT::i32, MVT::Other, Ops);
1584   transferMemOperands(N, New);
1585   ReplaceNode(N, New);
1586   return true;
1587 }
1588
1589 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1590   LoadSDNode *LD = cast<LoadSDNode>(N);
1591   ISD::MemIndexedMode AM = LD->getAddressingMode();
1592   if (AM == ISD::UNINDEXED)
1593     return false;
1594
1595   EVT LoadedVT = LD->getMemoryVT();
1596   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1597   SDValue Offset;
1598   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1599   unsigned Opcode = 0;
1600   bool Match = false;
1601   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1602     switch (LoadedVT.getSimpleVT().SimpleTy) {
1603     case MVT::i32:
1604       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1605       break;
1606     case MVT::i16:
1607       if (isSExtLd)
1608         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1609       else
1610         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1611       break;
1612     case MVT::i8:
1613     case MVT::i1:
1614       if (isSExtLd)
1615         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1616       else
1617         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1618       break;
1619     default:
1620       return false;
1621     }
1622     Match = true;
1623   }
1624
1625   if (Match) {
1626     SDValue Chain = LD->getChain();
1627     SDValue Base = LD->getBasePtr();
1628     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1629                      CurDAG->getRegister(0, MVT::i32), Chain };
1630     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1631                                          MVT::Other, Ops);
1632     transferMemOperands(N, New);
1633     ReplaceNode(N, New);
1634     return true;
1635   }
1636
1637   return false;
1638 }
1639
1640 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1641 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1642   SDLoc dl(V0.getNode());
1643   SDValue RegClass =
1644     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1645   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1646   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1647   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1648   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1649 }
1650
1651 /// \brief Form a D register from a pair of S registers.
1652 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1653   SDLoc dl(V0.getNode());
1654   SDValue RegClass =
1655     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1656   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1657   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1658   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1659   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1660 }
1661
1662 /// \brief Form a quad register from a pair of D registers.
1663 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1664   SDLoc dl(V0.getNode());
1665   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1666                                                MVT::i32);
1667   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1668   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1669   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1670   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1671 }
1672
1673 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1674 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1675   SDLoc dl(V0.getNode());
1676   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1677                                                MVT::i32);
1678   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1679   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1680   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1681   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1682 }
1683
1684 /// \brief Form 4 consecutive S registers.
1685 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1686                                    SDValue V2, SDValue V3) {
1687   SDLoc dl(V0.getNode());
1688   SDValue RegClass =
1689     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1690   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1691   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1692   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1693   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1694   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1695                                     V2, SubReg2, V3, SubReg3 };
1696   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1697 }
1698
1699 /// \brief Form 4 consecutive D registers.
1700 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1701                                    SDValue V2, SDValue V3) {
1702   SDLoc dl(V0.getNode());
1703   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1704                                                MVT::i32);
1705   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1706   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1707   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1708   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1709   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1710                                     V2, SubReg2, V3, SubReg3 };
1711   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1712 }
1713
1714 /// \brief Form 4 consecutive Q registers.
1715 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1716                                    SDValue V2, SDValue V3) {
1717   SDLoc dl(V0.getNode());
1718   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1719                                                MVT::i32);
1720   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1721   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1722   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1723   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1724   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1725                                     V2, SubReg2, V3, SubReg3 };
1726   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1727 }
1728
1729 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1730 /// of a NEON VLD or VST instruction.  The supported values depend on the
1731 /// number of registers being loaded.
1732 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1733                                        unsigned NumVecs, bool is64BitVector) {
1734   unsigned NumRegs = NumVecs;
1735   if (!is64BitVector && NumVecs < 3)
1736     NumRegs *= 2;
1737
1738   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1739   if (Alignment >= 32 && NumRegs == 4)
1740     Alignment = 32;
1741   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1742     Alignment = 16;
1743   else if (Alignment >= 8)
1744     Alignment = 8;
1745   else
1746     Alignment = 0;
1747
1748   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1749 }
1750
1751 static bool isVLDfixed(unsigned Opc)
1752 {
1753   switch (Opc) {
1754   default: return false;
1755   case ARM::VLD1d8wb_fixed : return true;
1756   case ARM::VLD1d16wb_fixed : return true;
1757   case ARM::VLD1d64Qwb_fixed : return true;
1758   case ARM::VLD1d32wb_fixed : return true;
1759   case ARM::VLD1d64wb_fixed : return true;
1760   case ARM::VLD1d64TPseudoWB_fixed : return true;
1761   case ARM::VLD1d64QPseudoWB_fixed : return true;
1762   case ARM::VLD1q8wb_fixed : return true;
1763   case ARM::VLD1q16wb_fixed : return true;
1764   case ARM::VLD1q32wb_fixed : return true;
1765   case ARM::VLD1q64wb_fixed : return true;
1766   case ARM::VLD1DUPd8wb_fixed : return true;
1767   case ARM::VLD1DUPd16wb_fixed : return true;
1768   case ARM::VLD1DUPd32wb_fixed : return true;
1769   case ARM::VLD1DUPq8wb_fixed : return true;
1770   case ARM::VLD1DUPq16wb_fixed : return true;
1771   case ARM::VLD1DUPq32wb_fixed : return true;
1772   case ARM::VLD2d8wb_fixed : return true;
1773   case ARM::VLD2d16wb_fixed : return true;
1774   case ARM::VLD2d32wb_fixed : return true;
1775   case ARM::VLD2q8PseudoWB_fixed : return true;
1776   case ARM::VLD2q16PseudoWB_fixed : return true;
1777   case ARM::VLD2q32PseudoWB_fixed : return true;
1778   case ARM::VLD2DUPd8wb_fixed : return true;
1779   case ARM::VLD2DUPd16wb_fixed : return true;
1780   case ARM::VLD2DUPd32wb_fixed : return true;
1781   }
1782 }
1783
1784 static bool isVSTfixed(unsigned Opc)
1785 {
1786   switch (Opc) {
1787   default: return false;
1788   case ARM::VST1d8wb_fixed : return true;
1789   case ARM::VST1d16wb_fixed : return true;
1790   case ARM::VST1d32wb_fixed : return true;
1791   case ARM::VST1d64wb_fixed : return true;
1792   case ARM::VST1q8wb_fixed : return true;
1793   case ARM::VST1q16wb_fixed : return true;
1794   case ARM::VST1q32wb_fixed : return true;
1795   case ARM::VST1q64wb_fixed : return true;
1796   case ARM::VST1d64TPseudoWB_fixed : return true;
1797   case ARM::VST1d64QPseudoWB_fixed : return true;
1798   case ARM::VST2d8wb_fixed : return true;
1799   case ARM::VST2d16wb_fixed : return true;
1800   case ARM::VST2d32wb_fixed : return true;
1801   case ARM::VST2q8PseudoWB_fixed : return true;
1802   case ARM::VST2q16PseudoWB_fixed : return true;
1803   case ARM::VST2q32PseudoWB_fixed : return true;
1804   }
1805 }
1806
1807 // Get the register stride update opcode of a VLD/VST instruction that
1808 // is otherwise equivalent to the given fixed stride updating instruction.
1809 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1810   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1811     && "Incorrect fixed stride updating instruction.");
1812   switch (Opc) {
1813   default: break;
1814   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1815   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1816   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1817   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1818   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1819   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1820   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1821   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1822   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1823   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1824   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1825   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1826   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1827   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1828   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1829   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1830   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1831   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1832
1833   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1834   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1835   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1836   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1837   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1838   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1839   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1840   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1841   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1842   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1843
1844   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1845   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1846   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1847   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1848   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1849   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1850
1851   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1852   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1853   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1854   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1855   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1856   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1857
1858   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1859   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1860   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1861   }
1862   return Opc; // If not one we handle, return it unchanged.
1863 }
1864
1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1866                                 const uint16_t *DOpcodes,
1867                                 const uint16_t *QOpcodes0,
1868                                 const uint16_t *QOpcodes1) {
1869   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1870   SDLoc dl(N);
1871
1872   SDValue MemAddr, Align;
1873   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1874   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1875     return;
1876
1877   SDValue Chain = N->getOperand(0);
1878   EVT VT = N->getValueType(0);
1879   bool is64BitVector = VT.is64BitVector();
1880   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1881
1882   unsigned OpcodeIndex;
1883   switch (VT.getSimpleVT().SimpleTy) {
1884   default: llvm_unreachable("unhandled vld type");
1885     // Double-register operations:
1886   case MVT::v8i8:  OpcodeIndex = 0; break;
1887   case MVT::v4i16: OpcodeIndex = 1; break;
1888   case MVT::v2f32:
1889   case MVT::v2i32: OpcodeIndex = 2; break;
1890   case MVT::v1i64: OpcodeIndex = 3; break;
1891     // Quad-register operations:
1892   case MVT::v16i8: OpcodeIndex = 0; break;
1893   case MVT::v8i16: OpcodeIndex = 1; break;
1894   case MVT::v4f32:
1895   case MVT::v4i32: OpcodeIndex = 2; break;
1896   case MVT::v2f64:
1897   case MVT::v2i64: OpcodeIndex = 3;
1898     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1899     break;
1900   }
1901
1902   EVT ResTy;
1903   if (NumVecs == 1)
1904     ResTy = VT;
1905   else {
1906     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1907     if (!is64BitVector)
1908       ResTyElts *= 2;
1909     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1910   }
1911   std::vector<EVT> ResTys;
1912   ResTys.push_back(ResTy);
1913   if (isUpdating)
1914     ResTys.push_back(MVT::i32);
1915   ResTys.push_back(MVT::Other);
1916
1917   SDValue Pred = getAL(CurDAG, dl);
1918   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1919   SDNode *VLd;
1920   SmallVector<SDValue, 7> Ops;
1921
1922   // Double registers and VLD1/VLD2 quad registers are directly supported.
1923   if (is64BitVector || NumVecs <= 2) {
1924     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1925                     QOpcodes0[OpcodeIndex]);
1926     Ops.push_back(MemAddr);
1927     Ops.push_back(Align);
1928     if (isUpdating) {
1929       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1930       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1931       // case entirely when the rest are updated to that form, too.
1932       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1933         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1934       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1935       // check for that explicitly too. Horribly hacky, but temporary.
1936       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1937           !isa<ConstantSDNode>(Inc.getNode()))
1938         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1939     }
1940     Ops.push_back(Pred);
1941     Ops.push_back(Reg0);
1942     Ops.push_back(Chain);
1943     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1944
1945   } else {
1946     // Otherwise, quad registers are loaded with two separate instructions,
1947     // where one loads the even registers and the other loads the odd registers.
1948     EVT AddrTy = MemAddr.getValueType();
1949
1950     // Load the even subregs.  This is always an updating load, so that it
1951     // provides the address to the second load for the odd subregs.
1952     SDValue ImplDef =
1953       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1954     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1955     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1956                                           ResTy, AddrTy, MVT::Other, OpsA);
1957     Chain = SDValue(VLdA, 2);
1958
1959     // Load the odd subregs.
1960     Ops.push_back(SDValue(VLdA, 1));
1961     Ops.push_back(Align);
1962     if (isUpdating) {
1963       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1964       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1965              "only constant post-increment update allowed for VLD3/4");
1966       (void)Inc;
1967       Ops.push_back(Reg0);
1968     }
1969     Ops.push_back(SDValue(VLdA, 0));
1970     Ops.push_back(Pred);
1971     Ops.push_back(Reg0);
1972     Ops.push_back(Chain);
1973     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1974   }
1975
1976   // Transfer memoperands.
1977   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1978   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1979   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1980
1981   if (NumVecs == 1) {
1982     ReplaceNode(N, VLd);
1983     return;
1984   }
1985
1986   // Extract out the subregisters.
1987   SDValue SuperReg = SDValue(VLd, 0);
1988   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1989                     ARM::qsub_3 == ARM::qsub_0 + 3,
1990                 "Unexpected subreg numbering");
1991   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1992   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1993     ReplaceUses(SDValue(N, Vec),
1994                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1995   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1996   if (isUpdating)
1997     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1998   CurDAG->RemoveDeadNode(N);
1999 }
2000
2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2002                                 const uint16_t *DOpcodes,
2003                                 const uint16_t *QOpcodes0,
2004                                 const uint16_t *QOpcodes1) {
2005   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2006   SDLoc dl(N);
2007
2008   SDValue MemAddr, Align;
2009   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2010   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2011   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2012     return;
2013
2014   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2015   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2016
2017   SDValue Chain = N->getOperand(0);
2018   EVT VT = N->getOperand(Vec0Idx).getValueType();
2019   bool is64BitVector = VT.is64BitVector();
2020   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2021
2022   unsigned OpcodeIndex;
2023   switch (VT.getSimpleVT().SimpleTy) {
2024   default: llvm_unreachable("unhandled vst type");
2025     // Double-register operations:
2026   case MVT::v8i8:  OpcodeIndex = 0; break;
2027   case MVT::v4i16: OpcodeIndex = 1; break;
2028   case MVT::v2f32:
2029   case MVT::v2i32: OpcodeIndex = 2; break;
2030   case MVT::v1i64: OpcodeIndex = 3; break;
2031     // Quad-register operations:
2032   case MVT::v16i8: OpcodeIndex = 0; break;
2033   case MVT::v8i16: OpcodeIndex = 1; break;
2034   case MVT::v4f32:
2035   case MVT::v4i32: OpcodeIndex = 2; break;
2036   case MVT::v2f64:
2037   case MVT::v2i64: OpcodeIndex = 3;
2038     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2039     break;
2040   }
2041
2042   std::vector<EVT> ResTys;
2043   if (isUpdating)
2044     ResTys.push_back(MVT::i32);
2045   ResTys.push_back(MVT::Other);
2046
2047   SDValue Pred = getAL(CurDAG, dl);
2048   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2049   SmallVector<SDValue, 7> Ops;
2050
2051   // Double registers and VST1/VST2 quad registers are directly supported.
2052   if (is64BitVector || NumVecs <= 2) {
2053     SDValue SrcReg;
2054     if (NumVecs == 1) {
2055       SrcReg = N->getOperand(Vec0Idx);
2056     } else if (is64BitVector) {
2057       // Form a REG_SEQUENCE to force register allocation.
2058       SDValue V0 = N->getOperand(Vec0Idx + 0);
2059       SDValue V1 = N->getOperand(Vec0Idx + 1);
2060       if (NumVecs == 2)
2061         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2062       else {
2063         SDValue V2 = N->getOperand(Vec0Idx + 2);
2064         // If it's a vst3, form a quad D-register and leave the last part as
2065         // an undef.
2066         SDValue V3 = (NumVecs == 3)
2067           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2068           : N->getOperand(Vec0Idx + 3);
2069         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2070       }
2071     } else {
2072       // Form a QQ register.
2073       SDValue Q0 = N->getOperand(Vec0Idx);
2074       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2075       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2076     }
2077
2078     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2079                     QOpcodes0[OpcodeIndex]);
2080     Ops.push_back(MemAddr);
2081     Ops.push_back(Align);
2082     if (isUpdating) {
2083       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2084       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2085       // case entirely when the rest are updated to that form, too.
2086       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2087         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2088       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2089       // check for that explicitly too. Horribly hacky, but temporary.
2090       if  (!isa<ConstantSDNode>(Inc.getNode()))
2091         Ops.push_back(Inc);
2092       else if (NumVecs > 2 && !isVSTfixed(Opc))
2093         Ops.push_back(Reg0);
2094     }
2095     Ops.push_back(SrcReg);
2096     Ops.push_back(Pred);
2097     Ops.push_back(Reg0);
2098     Ops.push_back(Chain);
2099     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2100
2101     // Transfer memoperands.
2102     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2103
2104     ReplaceNode(N, VSt);
2105     return;
2106   }
2107
2108   // Otherwise, quad registers are stored with two separate instructions,
2109   // where one stores the even registers and the other stores the odd registers.
2110
2111   // Form the QQQQ REG_SEQUENCE.
2112   SDValue V0 = N->getOperand(Vec0Idx + 0);
2113   SDValue V1 = N->getOperand(Vec0Idx + 1);
2114   SDValue V2 = N->getOperand(Vec0Idx + 2);
2115   SDValue V3 = (NumVecs == 3)
2116     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2117     : N->getOperand(Vec0Idx + 3);
2118   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2119
2120   // Store the even D registers.  This is always an updating store, so that it
2121   // provides the address to the second store for the odd subregs.
2122   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2123   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2124                                         MemAddr.getValueType(),
2125                                         MVT::Other, OpsA);
2126   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2127   Chain = SDValue(VStA, 1);
2128
2129   // Store the odd D registers.
2130   Ops.push_back(SDValue(VStA, 0));
2131   Ops.push_back(Align);
2132   if (isUpdating) {
2133     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2134     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2135            "only constant post-increment update allowed for VST3/4");
2136     (void)Inc;
2137     Ops.push_back(Reg0);
2138   }
2139   Ops.push_back(RegSeq);
2140   Ops.push_back(Pred);
2141   Ops.push_back(Reg0);
2142   Ops.push_back(Chain);
2143   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2144                                         Ops);
2145   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2146   ReplaceNode(N, VStB);
2147 }
2148
2149 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2150                                       unsigned NumVecs,
2151                                       const uint16_t *DOpcodes,
2152                                       const uint16_t *QOpcodes) {
2153   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2154   SDLoc dl(N);
2155
2156   SDValue MemAddr, Align;
2157   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2158   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2159   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2160     return;
2161
2162   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2163   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2164
2165   SDValue Chain = N->getOperand(0);
2166   unsigned Lane =
2167     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2168   EVT VT = N->getOperand(Vec0Idx).getValueType();
2169   bool is64BitVector = VT.is64BitVector();
2170
2171   unsigned Alignment = 0;
2172   if (NumVecs != 3) {
2173     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2174     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2175     if (Alignment > NumBytes)
2176       Alignment = NumBytes;
2177     if (Alignment < 8 && Alignment < NumBytes)
2178       Alignment = 0;
2179     // Alignment must be a power of two; make sure of that.
2180     Alignment = (Alignment & -Alignment);
2181     if (Alignment == 1)
2182       Alignment = 0;
2183   }
2184   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2185
2186   unsigned OpcodeIndex;
2187   switch (VT.getSimpleVT().SimpleTy) {
2188   default: llvm_unreachable("unhandled vld/vst lane type");
2189     // Double-register operations:
2190   case MVT::v8i8:  OpcodeIndex = 0; break;
2191   case MVT::v4i16: OpcodeIndex = 1; break;
2192   case MVT::v2f32:
2193   case MVT::v2i32: OpcodeIndex = 2; break;
2194     // Quad-register operations:
2195   case MVT::v8i16: OpcodeIndex = 0; break;
2196   case MVT::v4f32:
2197   case MVT::v4i32: OpcodeIndex = 1; break;
2198   }
2199
2200   std::vector<EVT> ResTys;
2201   if (IsLoad) {
2202     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2203     if (!is64BitVector)
2204       ResTyElts *= 2;
2205     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2206                                       MVT::i64, ResTyElts));
2207   }
2208   if (isUpdating)
2209     ResTys.push_back(MVT::i32);
2210   ResTys.push_back(MVT::Other);
2211
2212   SDValue Pred = getAL(CurDAG, dl);
2213   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2214
2215   SmallVector<SDValue, 8> Ops;
2216   Ops.push_back(MemAddr);
2217   Ops.push_back(Align);
2218   if (isUpdating) {
2219     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2220     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2221   }
2222
2223   SDValue SuperReg;
2224   SDValue V0 = N->getOperand(Vec0Idx + 0);
2225   SDValue V1 = N->getOperand(Vec0Idx + 1);
2226   if (NumVecs == 2) {
2227     if (is64BitVector)
2228       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2229     else
2230       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2231   } else {
2232     SDValue V2 = N->getOperand(Vec0Idx + 2);
2233     SDValue V3 = (NumVecs == 3)
2234       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2235       : N->getOperand(Vec0Idx + 3);
2236     if (is64BitVector)
2237       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2238     else
2239       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2240   }
2241   Ops.push_back(SuperReg);
2242   Ops.push_back(getI32Imm(Lane, dl));
2243   Ops.push_back(Pred);
2244   Ops.push_back(Reg0);
2245   Ops.push_back(Chain);
2246
2247   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2248                                   QOpcodes[OpcodeIndex]);
2249   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2250   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2251   if (!IsLoad) {
2252     ReplaceNode(N, VLdLn);
2253     return;
2254   }
2255
2256   // Extract the subregisters.
2257   SuperReg = SDValue(VLdLn, 0);
2258   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2259                     ARM::qsub_3 == ARM::qsub_0 + 3,
2260                 "Unexpected subreg numbering");
2261   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2262   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2263     ReplaceUses(SDValue(N, Vec),
2264                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2265   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2266   if (isUpdating)
2267     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2268   CurDAG->RemoveDeadNode(N);
2269 }
2270
2271 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2272                                    const uint16_t *DOpcodes,
2273                                    const uint16_t *QOpcodes) {
2274   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2275   SDLoc dl(N);
2276
2277   SDValue MemAddr, Align;
2278   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2279     return;
2280
2281   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2282   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2283
2284   SDValue Chain = N->getOperand(0);
2285   EVT VT = N->getValueType(0);
2286
2287   unsigned Alignment = 0;
2288   if (NumVecs != 3) {
2289     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2290     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2291     if (Alignment > NumBytes)
2292       Alignment = NumBytes;
2293     if (Alignment < 8 && Alignment < NumBytes)
2294       Alignment = 0;
2295     // Alignment must be a power of two; make sure of that.
2296     Alignment = (Alignment & -Alignment);
2297     if (Alignment == 1)
2298       Alignment = 0;
2299   }
2300   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2301
2302   unsigned Opc;
2303   switch (VT.getSimpleVT().SimpleTy) {
2304   default: llvm_unreachable("unhandled vld-dup type");
2305   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2306   case MVT::v16i8: Opc = QOpcodes[0]; break;
2307   case MVT::v4i16: Opc = DOpcodes[1]; break;
2308   case MVT::v8i16: Opc = QOpcodes[1]; break;
2309   case MVT::v2f32:
2310   case MVT::v2i32: Opc = DOpcodes[2]; break;
2311   case MVT::v4f32:
2312   case MVT::v4i32: Opc = QOpcodes[2]; break;
2313   }
2314
2315   SDValue Pred = getAL(CurDAG, dl);
2316   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2317   SmallVector<SDValue, 6> Ops;
2318   Ops.push_back(MemAddr);
2319   Ops.push_back(Align);
2320   if (isUpdating) {
2321     // fixed-stride update instructions don't have an explicit writeback
2322     // operand. It's implicit in the opcode itself.
2323     SDValue Inc = N->getOperand(2);
2324     if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2325       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2326     if (!isa<ConstantSDNode>(Inc.getNode()))
2327       Ops.push_back(Inc);
2328     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2329     else if (NumVecs > 2)
2330       Ops.push_back(Reg0);
2331   }
2332   Ops.push_back(Pred);
2333   Ops.push_back(Reg0);
2334   Ops.push_back(Chain);
2335
2336   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2337   std::vector<EVT> ResTys;
2338   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2339   if (isUpdating)
2340     ResTys.push_back(MVT::i32);
2341   ResTys.push_back(MVT::Other);
2342   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2343   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2344
2345   // Extract the subregisters.
2346   if (NumVecs == 1) {
2347     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2348   } else {
2349     SDValue SuperReg = SDValue(VLdDup, 0);
2350     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2351     unsigned SubIdx = ARM::dsub_0;
2352     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2353       ReplaceUses(SDValue(N, Vec),
2354                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2355   }
2356   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2357   if (isUpdating)
2358     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2359   CurDAG->RemoveDeadNode(N);
2360 }
2361
2362 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2363                                  unsigned Opc) {
2364   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2365   SDLoc dl(N);
2366   EVT VT = N->getValueType(0);
2367   unsigned FirstTblReg = IsExt ? 2 : 1;
2368
2369   // Form a REG_SEQUENCE to force register allocation.
2370   SDValue RegSeq;
2371   SDValue V0 = N->getOperand(FirstTblReg + 0);
2372   SDValue V1 = N->getOperand(FirstTblReg + 1);
2373   if (NumVecs == 2)
2374     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2375   else {
2376     SDValue V2 = N->getOperand(FirstTblReg + 2);
2377     // If it's a vtbl3, form a quad D-register and leave the last part as
2378     // an undef.
2379     SDValue V3 = (NumVecs == 3)
2380       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2381       : N->getOperand(FirstTblReg + 3);
2382     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2383   }
2384
2385   SmallVector<SDValue, 6> Ops;
2386   if (IsExt)
2387     Ops.push_back(N->getOperand(1));
2388   Ops.push_back(RegSeq);
2389   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2390   Ops.push_back(getAL(CurDAG, dl)); // predicate
2391   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2392   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2393 }
2394
2395 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2396   if (!Subtarget->hasV6T2Ops())
2397     return false;
2398
2399   unsigned Opc = isSigned
2400     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2401     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2402   SDLoc dl(N);
2403
2404   // For unsigned extracts, check for a shift right and mask
2405   unsigned And_imm = 0;
2406   if (N->getOpcode() == ISD::AND) {
2407     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2408
2409       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2410       if (And_imm & (And_imm + 1))
2411         return false;
2412
2413       unsigned Srl_imm = 0;
2414       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2415                                 Srl_imm)) {
2416         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2417
2418         // Note: The width operand is encoded as width-1.
2419         unsigned Width = countTrailingOnes(And_imm) - 1;
2420         unsigned LSB = Srl_imm;
2421
2422         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2423
2424         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2425           // It's cheaper to use a right shift to extract the top bits.
2426           if (Subtarget->isThumb()) {
2427             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2428             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2429                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2430                               getAL(CurDAG, dl), Reg0, Reg0 };
2431             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2432             return true;
2433           }
2434
2435           // ARM models shift instructions as MOVsi with shifter operand.
2436           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2437           SDValue ShOpc =
2438             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2439                                       MVT::i32);
2440           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2441                             getAL(CurDAG, dl), Reg0, Reg0 };
2442           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2443           return true;
2444         }
2445
2446         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2447                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2448                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2449                           getAL(CurDAG, dl), Reg0 };
2450         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2451         return true;
2452       }
2453     }
2454     return false;
2455   }
2456
2457   // Otherwise, we're looking for a shift of a shift
2458   unsigned Shl_imm = 0;
2459   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2460     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2461     unsigned Srl_imm = 0;
2462     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2463       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2464       // Note: The width operand is encoded as width-1.
2465       unsigned Width = 32 - Srl_imm - 1;
2466       int LSB = Srl_imm - Shl_imm;
2467       if (LSB < 0)
2468         return false;
2469       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2470       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2471                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2472                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2473                         getAL(CurDAG, dl), Reg0 };
2474       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2475       return true;
2476     }
2477   }
2478
2479   // Or we are looking for a shift of an and, with a mask operand
2480   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2481       isShiftedMask_32(And_imm)) {
2482     unsigned Srl_imm = 0;
2483     unsigned LSB = countTrailingZeros(And_imm);
2484     // Shift must be the same as the ands lsb
2485     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2486       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2487       unsigned MSB = 31 - countLeadingZeros(And_imm);
2488       // Note: The width operand is encoded as width-1.
2489       unsigned Width = MSB - LSB;
2490       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2491       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2492                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2493                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2494                         getAL(CurDAG, dl), Reg0 };
2495       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2496       return true;
2497     }
2498   }
2499
2500   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2501     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2502     unsigned LSB = 0;
2503     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2504         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2505       return false;
2506
2507     if (LSB + Width > 32)
2508       return false;
2509
2510     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2511     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2512                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2513                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2514                       getAL(CurDAG, dl), Reg0 };
2515     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2516     return true;
2517   }
2518
2519   return false;
2520 }
2521
2522 /// Target-specific DAG combining for ISD::XOR.
2523 /// Target-independent combining lowers SELECT_CC nodes of the form
2524 /// select_cc setg[ge] X,  0,  X, -X
2525 /// select_cc setgt    X, -1,  X, -X
2526 /// select_cc setl[te] X,  0, -X,  X
2527 /// select_cc setlt    X,  1, -X,  X
2528 /// which represent Integer ABS into:
2529 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2530 /// ARM instruction selection detects the latter and matches it to
2531 /// ARM::ABS or ARM::t2ABS machine node.
2532 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2533   SDValue XORSrc0 = N->getOperand(0);
2534   SDValue XORSrc1 = N->getOperand(1);
2535   EVT VT = N->getValueType(0);
2536
2537   if (Subtarget->isThumb1Only())
2538     return false;
2539
2540   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2541     return false;
2542
2543   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2544   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2545   SDValue SRASrc0 = XORSrc1.getOperand(0);
2546   SDValue SRASrc1 = XORSrc1.getOperand(1);
2547   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2548   EVT XType = SRASrc0.getValueType();
2549   unsigned Size = XType.getSizeInBits() - 1;
2550
2551   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2552       XType.isInteger() && SRAConstant != nullptr &&
2553       Size == SRAConstant->getZExtValue()) {
2554     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2555     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2556     return true;
2557   }
2558
2559   return false;
2560 }
2561
2562 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2563                                  bool Accumulate) {
2564   // For SM*WB, we need to some form of sext.
2565   // For SM*WT, we need to search for (sra X, 16)
2566   // Src1 then gets set to X.
2567   if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2568        SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2569        SignExt.getOpcode() == ISD::AssertSext) &&
2570        SignExt.getValueType() == MVT::i32) {
2571
2572     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2573     Src1 = SignExt.getOperand(0);
2574     return true;
2575   }
2576
2577   if (SignExt.getOpcode() != ISD::SRA)
2578     return false;
2579
2580   ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2581   if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2582     return false;
2583
2584   SDValue Op0 = SignExt.getOperand(0);
2585
2586   // The sign extend operand for SM*WB could be generated by a shl and ashr.
2587   if (Op0.getOpcode() == ISD::SHL) {
2588     SDValue SHL = Op0;
2589     ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2590     if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2591       return false;
2592
2593     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2594     Src1 = Op0.getOperand(0);
2595     return true;
2596   }
2597   *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2598   Src1 = SignExt.getOperand(0);
2599   return true;
2600 }
2601
2602 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2603                                 SDValue &Src1, bool Accumulate) {
2604   // First we look for:
2605   // (add (or (srl ?, 16), (shl ?, 16)))
2606   if (OR.getOpcode() != ISD::OR)
2607     return false;
2608
2609   SDValue SRL = OR.getOperand(0);
2610   SDValue SHL = OR.getOperand(1);
2611
2612   if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2613     SRL = OR.getOperand(1);
2614     SHL = OR.getOperand(0);
2615     if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2616       return false;
2617   }
2618
2619   ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2620   ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2621   if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2622       SHLSrc1->getZExtValue() != 16)
2623     return false;
2624
2625   // The first operands to the shifts need to be the two results from the
2626   // same smul_lohi node.
2627   if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2628        SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2629     return false;
2630
2631   SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2632   if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2633       SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2634     return false;
2635
2636   // Now we have:
2637   // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2638   // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2639   // For SMLAWB the 16-bit value will signed extended somehow.
2640   // For SMLAWT only the SRA is required.
2641
2642   // Check both sides of SMUL_LOHI
2643   if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2644     Src0 = SMULLOHI->getOperand(1);
2645   } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2646                                   Accumulate)) {
2647     Src0 = SMULLOHI->getOperand(0);
2648   } else {
2649     return false;
2650   }
2651   return true;
2652 }
2653
2654 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2655   if (!Subtarget->hasV6Ops() ||
2656       (Subtarget->isThumb() && !Subtarget->hasThumb2()))
2657     return false;
2658
2659   SDLoc dl(N);
2660   SDValue Src0 = N->getOperand(0);
2661   SDValue Src1 = N->getOperand(1);
2662   SDValue A, B;
2663   unsigned Opc = 0;
2664
2665   if (N->getOpcode() == ISD::ADD) {
2666     if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2667       return false;
2668
2669     SDValue Acc;
2670     if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2671       Acc = Src1;
2672     } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2673       Acc = Src0;
2674     } else {
2675       return false;
2676     }
2677     if (Opc == 0)
2678       return false;
2679
2680     SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2681                       CurDAG->getRegister(0, MVT::i32) };
2682     CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2683     return true;
2684   } else if (N->getOpcode() == ISD::OR &&
2685              SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2686     if (Opc == 0)
2687       return false;
2688
2689     SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2690                       CurDAG->getRegister(0, MVT::i32)};
2691     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2692     return true;
2693   }
2694   return false;
2695 }
2696
2697 /// We've got special pseudo-instructions for these
2698 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2699   unsigned Opcode;
2700   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2701   if (MemTy == MVT::i8)
2702     Opcode = ARM::CMP_SWAP_8;
2703   else if (MemTy == MVT::i16)
2704     Opcode = ARM::CMP_SWAP_16;
2705   else if (MemTy == MVT::i32)
2706     Opcode = ARM::CMP_SWAP_32;
2707   else
2708     llvm_unreachable("Unknown AtomicCmpSwap type");
2709
2710   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2711                    N->getOperand(0)};
2712   SDNode *CmpSwap = CurDAG->getMachineNode(
2713       Opcode, SDLoc(N),
2714       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2715
2716   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2717   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2718   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2719
2720   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2721   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2722   CurDAG->RemoveDeadNode(N);
2723 }
2724
2725 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2726   // The only time a CONCAT_VECTORS operation can have legal types is when
2727   // two 64-bit vectors are concatenated to a 128-bit vector.
2728   EVT VT = N->getValueType(0);
2729   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2730     llvm_unreachable("unexpected CONCAT_VECTORS");
2731   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2732 }
2733
2734 static Optional<std::pair<unsigned, unsigned>>
2735 getContiguousRangeOfSetBits(const APInt &A) {
2736   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2737   unsigned LastOne = A.countTrailingZeros();
2738   if (A.countPopulation() != (FirstOne - LastOne + 1))
2739     return Optional<std::pair<unsigned,unsigned>>();
2740   return std::make_pair(FirstOne, LastOne);
2741 }
2742
2743 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2744   assert(N->getOpcode() == ARMISD::CMPZ);
2745   SwitchEQNEToPLMI = false;
2746
2747   if (!Subtarget->isThumb())
2748     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2749     // LSR don't exist as standalone instructions - they need the barrel shifter.
2750     return;
2751
2752   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2753   SDValue And = N->getOperand(0);
2754   if (!And->hasOneUse())
2755     return;
2756
2757   SDValue Zero = N->getOperand(1);
2758   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2759       And->getOpcode() != ISD::AND)
2760     return;
2761   SDValue X = And.getOperand(0);
2762   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2763
2764   if (!C || !X->hasOneUse())
2765     return;
2766   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2767   if (!Range)
2768     return;
2769
2770   // There are several ways to lower this:
2771   SDNode *NewN;
2772   SDLoc dl(N);
2773
2774   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2775     if (Subtarget->isThumb2()) {
2776       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2777       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2778                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2779                         CurDAG->getRegister(0, MVT::i32) };
2780       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2781     } else {
2782       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2783                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2784                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2785       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2786     }
2787   };
2788
2789   if (Range->second == 0) {
2790     //  1. Mask includes the LSB -> Simply shift the top N bits off
2791     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2792     ReplaceNode(And.getNode(), NewN);
2793   } else if (Range->first == 31) {
2794     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2795     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2796     ReplaceNode(And.getNode(), NewN);
2797   } else if (Range->first == Range->second) {
2798     //  3. Only one bit is set. We can shift this into the sign bit and use a
2799     //     PL/MI comparison.
2800     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2801     ReplaceNode(And.getNode(), NewN);
2802
2803     SwitchEQNEToPLMI = true;
2804   } else if (!Subtarget->hasV6T2Ops()) {
2805     //  4. Do a double shift to clear bottom and top bits, but only in
2806     //     thumb-1 mode as in thumb-2 we can use UBFX.
2807     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2808     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2809                      Range->second + (31 - Range->first));
2810     ReplaceNode(And.getNode(), NewN);
2811   }
2812
2813 }
2814
2815 void ARMDAGToDAGISel::Select(SDNode *N) {
2816   SDLoc dl(N);
2817
2818   if (N->isMachineOpcode()) {
2819     N->setNodeId(-1);
2820     return;   // Already selected.
2821   }
2822
2823   switch (N->getOpcode()) {
2824   default: break;
2825   case ISD::ADD:
2826   case ISD::OR:
2827     if (trySMLAWSMULW(N))
2828       return;
2829     break;
2830   case ISD::WRITE_REGISTER:
2831     if (tryWriteRegister(N))
2832       return;
2833     break;
2834   case ISD::READ_REGISTER:
2835     if (tryReadRegister(N))
2836       return;
2837     break;
2838   case ISD::INLINEASM:
2839     if (tryInlineAsm(N))
2840       return;
2841     break;
2842   case ISD::XOR:
2843     // Select special operations if XOR node forms integer ABS pattern
2844     if (tryABSOp(N))
2845       return;
2846     // Other cases are autogenerated.
2847     break;
2848   case ISD::Constant: {
2849     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2850     // If we can't materialize the constant we need to use a literal pool
2851     if (ConstantMaterializationCost(Val) > 2) {
2852       SDValue CPIdx = CurDAG->getTargetConstantPool(
2853           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2854           TLI->getPointerTy(CurDAG->getDataLayout()));
2855
2856       SDNode *ResNode;
2857       if (Subtarget->isThumb()) {
2858         SDValue Pred = getAL(CurDAG, dl);
2859         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2860         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2861         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2862                                          Ops);
2863       } else {
2864         SDValue Ops[] = {
2865           CPIdx,
2866           CurDAG->getTargetConstant(0, dl, MVT::i32),
2867           getAL(CurDAG, dl),
2868           CurDAG->getRegister(0, MVT::i32),
2869           CurDAG->getEntryNode()
2870         };
2871         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2872                                          Ops);
2873       }
2874       ReplaceNode(N, ResNode);
2875       return;
2876     }
2877
2878     // Other cases are autogenerated.
2879     break;
2880   }
2881   case ISD::FrameIndex: {
2882     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2883     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2884     SDValue TFI = CurDAG->getTargetFrameIndex(
2885         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2886     if (Subtarget->isThumb1Only()) {
2887       // Set the alignment of the frame object to 4, to avoid having to generate
2888       // more than one ADD
2889       MachineFrameInfo &MFI = MF->getFrameInfo();
2890       if (MFI.getObjectAlignment(FI) < 4)
2891         MFI.setObjectAlignment(FI, 4);
2892       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2893                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2894       return;
2895     } else {
2896       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2897                       ARM::t2ADDri : ARM::ADDri);
2898       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2899                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2900                         CurDAG->getRegister(0, MVT::i32) };
2901       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2902       return;
2903     }
2904   }
2905   case ISD::SRL:
2906     if (tryV6T2BitfieldExtractOp(N, false))
2907       return;
2908     break;
2909   case ISD::SIGN_EXTEND_INREG:
2910   case ISD::SRA:
2911     if (tryV6T2BitfieldExtractOp(N, true))
2912       return;
2913     break;
2914   case ISD::MUL:
2915     if (Subtarget->isThumb1Only())
2916       break;
2917     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2918       unsigned RHSV = C->getZExtValue();
2919       if (!RHSV) break;
2920       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2921         unsigned ShImm = Log2_32(RHSV-1);
2922         if (ShImm >= 32)
2923           break;
2924         SDValue V = N->getOperand(0);
2925         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2926         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2927         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2928         if (Subtarget->isThumb()) {
2929           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2930           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2931           return;
2932         } else {
2933           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2934                             Reg0 };
2935           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2936           return;
2937         }
2938       }
2939       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2940         unsigned ShImm = Log2_32(RHSV+1);
2941         if (ShImm >= 32)
2942           break;
2943         SDValue V = N->getOperand(0);
2944         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2945         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2946         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2947         if (Subtarget->isThumb()) {
2948           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2949           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2950           return;
2951         } else {
2952           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2953                             Reg0 };
2954           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2955           return;
2956         }
2957       }
2958     }
2959     break;
2960   case ISD::AND: {
2961     // Check for unsigned bitfield extract
2962     if (tryV6T2BitfieldExtractOp(N, false))
2963       return;
2964
2965     // If an immediate is used in an AND node, it is possible that the immediate
2966     // can be more optimally materialized when negated. If this is the case we
2967     // can negate the immediate and use a BIC instead.
2968     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2969     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2970       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2971
2972       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2973       // immediate can be negated and fit in the immediate operand of
2974       // a t2BIC, don't do any manual transform here as this can be
2975       // handled by the generic ISel machinery.
2976       bool PreferImmediateEncoding =
2977         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2978       if (!PreferImmediateEncoding &&
2979           ConstantMaterializationCost(Imm) >
2980               ConstantMaterializationCost(~Imm)) {
2981         // The current immediate costs more to materialize than a negated
2982         // immediate, so negate the immediate and use a BIC.
2983         SDValue NewImm =
2984           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2985         // If the new constant didn't exist before, reposition it in the topological
2986         // ordering so it is just before N. Otherwise, don't touch its location.
2987         if (NewImm->getNodeId() == -1)
2988           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2989
2990         if (!Subtarget->hasThumb2()) {
2991           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2992                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2993                            CurDAG->getRegister(0, MVT::i32)};
2994           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2995           return;
2996         } else {
2997           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2998                            CurDAG->getRegister(0, MVT::i32),
2999                            CurDAG->getRegister(0, MVT::i32)};
3000           ReplaceNode(N,
3001                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3002           return;
3003         }
3004       }
3005     }
3006
3007     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3008     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3009     // are entirely contributed by c2 and lower 16-bits are entirely contributed
3010     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3011     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3012     EVT VT = N->getValueType(0);
3013     if (VT != MVT::i32)
3014       break;
3015     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3016       ? ARM::t2MOVTi16
3017       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3018     if (!Opc)
3019       break;
3020     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3021     N1C = dyn_cast<ConstantSDNode>(N1);
3022     if (!N1C)
3023       break;
3024     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3025       SDValue N2 = N0.getOperand(1);
3026       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3027       if (!N2C)
3028         break;
3029       unsigned N1CVal = N1C->getZExtValue();
3030       unsigned N2CVal = N2C->getZExtValue();
3031       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3032           (N1CVal & 0xffffU) == 0xffffU &&
3033           (N2CVal & 0xffffU) == 0x0U) {
3034         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3035                                                   dl, MVT::i32);
3036         SDValue Ops[] = { N0.getOperand(0), Imm16,
3037                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3038         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3039         return;
3040       }
3041     }
3042
3043     break;
3044   }
3045   case ARMISD::VMOVRRD:
3046     ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
3047                                           N->getOperand(0), getAL(CurDAG, dl),
3048                                           CurDAG->getRegister(0, MVT::i32)));
3049     return;
3050   case ISD::UMUL_LOHI: {
3051     if (Subtarget->isThumb1Only())
3052       break;
3053     if (Subtarget->isThumb()) {
3054       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3055                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3056       ReplaceNode(
3057           N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
3058       return;
3059     } else {
3060       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3061                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3062                         CurDAG->getRegister(0, MVT::i32) };
3063       ReplaceNode(N, CurDAG->getMachineNode(
3064                          Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
3065                          MVT::i32, MVT::i32, Ops));
3066       return;
3067     }
3068   }
3069   case ISD::SMUL_LOHI: {
3070     if (Subtarget->isThumb1Only())
3071       break;
3072     if (Subtarget->isThumb()) {
3073       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3074                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3075       ReplaceNode(
3076           N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
3077       return;
3078     } else {
3079       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3080                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3081                         CurDAG->getRegister(0, MVT::i32) };
3082       ReplaceNode(N, CurDAG->getMachineNode(
3083                          Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
3084                          MVT::i32, MVT::i32, Ops));
3085       return;
3086     }
3087   }
3088   case ARMISD::UMAAL: {
3089     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3090     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3091                       N->getOperand(2), N->getOperand(3),
3092                       getAL(CurDAG, dl),
3093                       CurDAG->getRegister(0, MVT::i32) };
3094     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3095     return;
3096   }
3097   case ARMISD::UMLAL:{
3098     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
3099     // 64-bit multiplication result.
3100     if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
3101         N->getOperand(2).getOpcode() == ARMISD::ADDC &&
3102         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
3103
3104       SDValue Addc = N->getOperand(2);
3105       SDValue Adde = N->getOperand(3);
3106
3107       if (Adde.getOperand(2).getNode() == Addc.getNode()) {
3108
3109         ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
3110         ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
3111
3112         if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
3113         {
3114           // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
3115           // RdLo = one operand to be added, lower 32-bits of res
3116           // RdHi = other operand to be added, upper 32-bits of res
3117           // Rn = first multiply operand
3118           // Rm = second multiply operand
3119           SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3120                             Addc.getOperand(0), Addc.getOperand(1),
3121                             getAL(CurDAG, dl),
3122                             CurDAG->getRegister(0, MVT::i32) };
3123           unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3124           CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
3125           return;
3126         }
3127       }
3128     }
3129
3130     if (Subtarget->isThumb()) {
3131       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3132                         N->getOperand(3), getAL(CurDAG, dl),
3133                         CurDAG->getRegister(0, MVT::i32)};
3134       ReplaceNode(
3135           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3136       return;
3137     }else{
3138       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3139                         N->getOperand(3), getAL(CurDAG, dl),
3140                         CurDAG->getRegister(0, MVT::i32),
3141                         CurDAG->getRegister(0, MVT::i32) };
3142       ReplaceNode(N, CurDAG->getMachineNode(
3143                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3144                          MVT::i32, MVT::i32, Ops));
3145       return;
3146     }
3147   }
3148   case ARMISD::SMLAL:{
3149     if (Subtarget->isThumb()) {
3150       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3151                         N->getOperand(3), getAL(CurDAG, dl),
3152                         CurDAG->getRegister(0, MVT::i32)};
3153       ReplaceNode(
3154           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3155       return;
3156     }else{
3157       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3158                         N->getOperand(3), getAL(CurDAG, dl),
3159                         CurDAG->getRegister(0, MVT::i32),
3160                         CurDAG->getRegister(0, MVT::i32) };
3161       ReplaceNode(N, CurDAG->getMachineNode(
3162                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3163                          MVT::i32, MVT::i32, Ops));
3164       return;
3165     }
3166   }
3167   case ARMISD::SUBE: {
3168     if (!Subtarget->hasV6Ops())
3169       break;
3170     // Look for a pattern to match SMMLS
3171     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3172     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3173         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3174         !SDValue(N, 1).use_empty())
3175       break;
3176
3177     if (Subtarget->isThumb())
3178       assert(Subtarget->hasThumb2() &&
3179              "This pattern should not be generated for Thumb");
3180
3181     SDValue SmulLoHi = N->getOperand(1);
3182     SDValue Subc = N->getOperand(2);
3183     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3184
3185     if (!Zero || Zero->getZExtValue() != 0 ||
3186         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3187         N->getOperand(1) != SmulLoHi.getValue(1) ||
3188         N->getOperand(2) != Subc.getValue(1))
3189       break;
3190
3191     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3192     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3193                       N->getOperand(0), getAL(CurDAG, dl),
3194                       CurDAG->getRegister(0, MVT::i32) };
3195     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3196     return;
3197   }
3198   case ISD::LOAD: {
3199     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3200       if (tryT2IndexedLoad(N))
3201         return;
3202     } else if (Subtarget->isThumb()) {
3203       if (tryT1IndexedLoad(N))
3204         return;
3205     } else if (tryARMIndexedLoad(N))
3206       return;
3207     // Other cases are autogenerated.
3208     break;
3209   }
3210   case ARMISD::BRCOND: {
3211     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3212     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3213     // Pattern complexity = 6  cost = 1  size = 0
3214
3215     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3216     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3217     // Pattern complexity = 6  cost = 1  size = 0
3218
3219     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3220     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3221     // Pattern complexity = 6  cost = 1  size = 0
3222
3223     unsigned Opc = Subtarget->isThumb() ?
3224       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3225     SDValue Chain = N->getOperand(0);
3226     SDValue N1 = N->getOperand(1);
3227     SDValue N2 = N->getOperand(2);
3228     SDValue N3 = N->getOperand(3);
3229     SDValue InFlag = N->getOperand(4);
3230     assert(N1.getOpcode() == ISD::BasicBlock);
3231     assert(N2.getOpcode() == ISD::Constant);
3232     assert(N3.getOpcode() == ISD::Register);
3233
3234     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3235
3236     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3237       bool SwitchEQNEToPLMI;
3238       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3239       InFlag = N->getOperand(4);
3240
3241       if (SwitchEQNEToPLMI) {
3242         switch ((ARMCC::CondCodes)CC) {
3243         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3244         case ARMCC::NE:
3245           CC = (unsigned)ARMCC::MI;
3246           break;
3247         case ARMCC::EQ:
3248           CC = (unsigned)ARMCC::PL;
3249           break;
3250         }
3251       }
3252     }
3253
3254     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3255     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3256     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3257                                              MVT::Glue, Ops);
3258     Chain = SDValue(ResNode, 0);
3259     if (N->getNumValues() == 2) {
3260       InFlag = SDValue(ResNode, 1);
3261       ReplaceUses(SDValue(N, 1), InFlag);
3262     }
3263     ReplaceUses(SDValue(N, 0),
3264                 SDValue(Chain.getNode(), Chain.getResNo()));
3265     CurDAG->RemoveDeadNode(N);
3266     return;
3267   }
3268
3269   case ARMISD::CMPZ: {
3270     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3271     //   This allows us to avoid materializing the expensive negative constant.
3272     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3273     //   for its glue output.
3274     SDValue X = N->getOperand(0);
3275     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3276     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3277       int64_t Addend = -C->getSExtValue();
3278
3279       SDNode *Add = nullptr;
3280       // In T2 mode, ADDS can be better than CMN if the immediate fits in a
3281       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3282       // Outside that range we can just use a CMN which is 32-bit but has a
3283       // 12-bit immediate range.
3284       if (Subtarget->isThumb2() && Addend < 1<<8) {
3285         SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3286                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3287                           CurDAG->getRegister(0, MVT::i32) };
3288         Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3289       } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
3290         // FIXME: Add T1 tADDi8 code.
3291         SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3292                          CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3293                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3294         Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
3295       } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
3296         SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3297                          CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3298                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3299         Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
3300       }
3301       if (Add) {
3302         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3303         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3304       }
3305     }
3306     // Other cases are autogenerated.
3307     break;
3308   }
3309
3310   case ARMISD::CMOV: {
3311     SDValue InFlag = N->getOperand(4);
3312
3313     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3314       bool SwitchEQNEToPLMI;
3315       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3316
3317       if (SwitchEQNEToPLMI) {
3318         SDValue ARMcc = N->getOperand(2);
3319         ARMCC::CondCodes CC =
3320           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3321
3322         switch (CC) {
3323         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3324         case ARMCC::NE:
3325           CC = ARMCC::MI;
3326           break;
3327         case ARMCC::EQ:
3328           CC = ARMCC::PL;
3329           break;
3330         }
3331         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3332         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3333                          N->getOperand(3), N->getOperand(4)};
3334         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3335       }
3336
3337     }
3338     // Other cases are autogenerated.
3339     break;
3340   }
3341
3342   case ARMISD::VZIP: {
3343     unsigned Opc = 0;
3344     EVT VT = N->getValueType(0);
3345     switch (VT.getSimpleVT().SimpleTy) {
3346     default: return;
3347     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3348     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3349     case MVT::v2f32:
3350     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3351     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3352     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3353     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3354     case MVT::v4f32:
3355     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3356     }
3357     SDValue Pred = getAL(CurDAG, dl);
3358     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3359     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3360     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3361     return;
3362   }
3363   case ARMISD::VUZP: {
3364     unsigned Opc = 0;
3365     EVT VT = N->getValueType(0);
3366     switch (VT.getSimpleVT().SimpleTy) {
3367     default: return;
3368     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3369     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3370     case MVT::v2f32:
3371     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3372     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3373     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3374     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3375     case MVT::v4f32:
3376     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3377     }
3378     SDValue Pred = getAL(CurDAG, dl);
3379     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3380     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3381     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3382     return;
3383   }
3384   case ARMISD::VTRN: {
3385     unsigned Opc = 0;
3386     EVT VT = N->getValueType(0);
3387     switch (VT.getSimpleVT().SimpleTy) {
3388     default: return;
3389     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3390     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3391     case MVT::v2f32:
3392     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3393     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3394     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3395     case MVT::v4f32:
3396     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3397     }
3398     SDValue Pred = getAL(CurDAG, dl);
3399     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3400     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3401     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3402     return;
3403   }
3404   case ARMISD::BUILD_VECTOR: {
3405     EVT VecVT = N->getValueType(0);
3406     EVT EltVT = VecVT.getVectorElementType();
3407     unsigned NumElts = VecVT.getVectorNumElements();
3408     if (EltVT == MVT::f64) {
3409       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3410       ReplaceNode(
3411           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3412       return;
3413     }
3414     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3415     if (NumElts == 2) {
3416       ReplaceNode(
3417           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3418       return;
3419     }
3420     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3421     ReplaceNode(N,
3422                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3423                                     N->getOperand(2), N->getOperand(3)));
3424     return;
3425   }
3426
3427   case ARMISD::VLD1DUP: {
3428     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3429                                          ARM::VLD1DUPd32 };
3430     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3431                                          ARM::VLD1DUPq32 };
3432     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3433     return;
3434   }
3435
3436   case ARMISD::VLD2DUP: {
3437     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3438                                         ARM::VLD2DUPd32 };
3439     SelectVLDDup(N, false, 2, Opcodes);
3440     return;
3441   }
3442
3443   case ARMISD::VLD3DUP: {
3444     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3445                                         ARM::VLD3DUPd16Pseudo,
3446                                         ARM::VLD3DUPd32Pseudo };
3447     SelectVLDDup(N, false, 3, Opcodes);
3448     return;
3449   }
3450
3451   case ARMISD::VLD4DUP: {
3452     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3453                                         ARM::VLD4DUPd16Pseudo,
3454                                         ARM::VLD4DUPd32Pseudo };
3455     SelectVLDDup(N, false, 4, Opcodes);
3456     return;
3457   }
3458
3459   case ARMISD::VLD1DUP_UPD: {
3460     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3461                                          ARM::VLD1DUPd16wb_fixed,
3462                                          ARM::VLD1DUPd32wb_fixed };
3463     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3464                                          ARM::VLD1DUPq16wb_fixed,
3465                                          ARM::VLD1DUPq32wb_fixed };
3466     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3467     return;
3468   }
3469
3470   case ARMISD::VLD2DUP_UPD: {
3471     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3472                                         ARM::VLD2DUPd16wb_fixed,
3473                                         ARM::VLD2DUPd32wb_fixed };
3474     SelectVLDDup(N, true, 2, Opcodes);
3475     return;
3476   }
3477
3478   case ARMISD::VLD3DUP_UPD: {
3479     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3480                                         ARM::VLD3DUPd16Pseudo_UPD,
3481                                         ARM::VLD3DUPd32Pseudo_UPD };
3482     SelectVLDDup(N, true, 3, Opcodes);
3483     return;
3484   }
3485
3486   case ARMISD::VLD4DUP_UPD: {
3487     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3488                                         ARM::VLD4DUPd16Pseudo_UPD,
3489                                         ARM::VLD4DUPd32Pseudo_UPD };
3490     SelectVLDDup(N, true, 4, Opcodes);
3491     return;
3492   }
3493
3494   case ARMISD::VLD1_UPD: {
3495     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3496                                          ARM::VLD1d16wb_fixed,
3497                                          ARM::VLD1d32wb_fixed,
3498                                          ARM::VLD1d64wb_fixed };
3499     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3500                                          ARM::VLD1q16wb_fixed,
3501                                          ARM::VLD1q32wb_fixed,
3502                                          ARM::VLD1q64wb_fixed };
3503     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3504     return;
3505   }
3506
3507   case ARMISD::VLD2_UPD: {
3508     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3509                                          ARM::VLD2d16wb_fixed,
3510                                          ARM::VLD2d32wb_fixed,
3511                                          ARM::VLD1q64wb_fixed};
3512     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3513                                          ARM::VLD2q16PseudoWB_fixed,
3514                                          ARM::VLD2q32PseudoWB_fixed };
3515     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3516     return;
3517   }
3518
3519   case ARMISD::VLD3_UPD: {
3520     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3521                                          ARM::VLD3d16Pseudo_UPD,
3522                                          ARM::VLD3d32Pseudo_UPD,
3523                                          ARM::VLD1d64TPseudoWB_fixed};
3524     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3525                                           ARM::VLD3q16Pseudo_UPD,
3526                                           ARM::VLD3q32Pseudo_UPD };
3527     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3528                                           ARM::VLD3q16oddPseudo_UPD,
3529                                           ARM::VLD3q32oddPseudo_UPD };
3530     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3531     return;
3532   }
3533
3534   case ARMISD::VLD4_UPD: {
3535     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3536                                          ARM::VLD4d16Pseudo_UPD,
3537                                          ARM::VLD4d32Pseudo_UPD,
3538                                          ARM::VLD1d64QPseudoWB_fixed};
3539     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3540                                           ARM::VLD4q16Pseudo_UPD,
3541                                           ARM::VLD4q32Pseudo_UPD };
3542     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3543                                           ARM::VLD4q16oddPseudo_UPD,
3544                                           ARM::VLD4q32oddPseudo_UPD };
3545     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3546     return;
3547   }
3548
3549   case ARMISD::VLD2LN_UPD: {
3550     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3551                                          ARM::VLD2LNd16Pseudo_UPD,
3552                                          ARM::VLD2LNd32Pseudo_UPD };
3553     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3554                                          ARM::VLD2LNq32Pseudo_UPD };
3555     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3556     return;
3557   }
3558
3559   case ARMISD::VLD3LN_UPD: {
3560     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3561                                          ARM::VLD3LNd16Pseudo_UPD,
3562                                          ARM::VLD3LNd32Pseudo_UPD };
3563     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3564                                          ARM::VLD3LNq32Pseudo_UPD };
3565     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3566     return;
3567   }
3568
3569   case ARMISD::VLD4LN_UPD: {
3570     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3571                                          ARM::VLD4LNd16Pseudo_UPD,
3572                                          ARM::VLD4LNd32Pseudo_UPD };
3573     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3574                                          ARM::VLD4LNq32Pseudo_UPD };
3575     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3576     return;
3577   }
3578
3579   case ARMISD::VST1_UPD: {
3580     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3581                                          ARM::VST1d16wb_fixed,
3582                                          ARM::VST1d32wb_fixed,
3583                                          ARM::VST1d64wb_fixed };
3584     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3585                                          ARM::VST1q16wb_fixed,
3586                                          ARM::VST1q32wb_fixed,
3587                                          ARM::VST1q64wb_fixed };
3588     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3589     return;
3590   }
3591
3592   case ARMISD::VST2_UPD: {
3593     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3594                                          ARM::VST2d16wb_fixed,
3595                                          ARM::VST2d32wb_fixed,
3596                                          ARM::VST1q64wb_fixed};
3597     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3598                                          ARM::VST2q16PseudoWB_fixed,
3599                                          ARM::VST2q32PseudoWB_fixed };
3600     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3601     return;
3602   }
3603
3604   case ARMISD::VST3_UPD: {
3605     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3606                                          ARM::VST3d16Pseudo_UPD,
3607                                          ARM::VST3d32Pseudo_UPD,
3608                                          ARM::VST1d64TPseudoWB_fixed};
3609     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3610                                           ARM::VST3q16Pseudo_UPD,
3611                                           ARM::VST3q32Pseudo_UPD };
3612     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3613                                           ARM::VST3q16oddPseudo_UPD,
3614                                           ARM::VST3q32oddPseudo_UPD };
3615     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3616     return;
3617   }
3618
3619   case ARMISD::VST4_UPD: {
3620     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3621                                          ARM::VST4d16Pseudo_UPD,
3622                                          ARM::VST4d32Pseudo_UPD,
3623                                          ARM::VST1d64QPseudoWB_fixed};
3624     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3625                                           ARM::VST4q16Pseudo_UPD,
3626                                           ARM::VST4q32Pseudo_UPD };
3627     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3628                                           ARM::VST4q16oddPseudo_UPD,
3629                                           ARM::VST4q32oddPseudo_UPD };
3630     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3631     return;
3632   }
3633
3634   case ARMISD::VST2LN_UPD: {
3635     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3636                                          ARM::VST2LNd16Pseudo_UPD,
3637                                          ARM::VST2LNd32Pseudo_UPD };
3638     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3639                                          ARM::VST2LNq32Pseudo_UPD };
3640     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3641     return;
3642   }
3643
3644   case ARMISD::VST3LN_UPD: {
3645     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3646                                          ARM::VST3LNd16Pseudo_UPD,
3647                                          ARM::VST3LNd32Pseudo_UPD };
3648     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3649                                          ARM::VST3LNq32Pseudo_UPD };
3650     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3651     return;
3652   }
3653
3654   case ARMISD::VST4LN_UPD: {
3655     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3656                                          ARM::VST4LNd16Pseudo_UPD,
3657                                          ARM::VST4LNd32Pseudo_UPD };
3658     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3659                                          ARM::VST4LNq32Pseudo_UPD };
3660     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3661     return;
3662   }
3663
3664   case ISD::INTRINSIC_VOID:
3665   case ISD::INTRINSIC_W_CHAIN: {
3666     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3667     switch (IntNo) {
3668     default:
3669       break;
3670
3671     case Intrinsic::arm_mrrc:
3672     case Intrinsic::arm_mrrc2: {
3673       SDLoc dl(N);
3674       SDValue Chain = N->getOperand(0);
3675       unsigned Opc;
3676
3677       if (Subtarget->isThumb())
3678         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3679       else
3680         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3681
3682       SmallVector<SDValue, 5> Ops;
3683       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3684       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3685       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3686
3687       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3688       // instruction will always be '1111' but it is possible in assembly language to specify
3689       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3690       if (Opc != ARM::MRRC2) {
3691         Ops.push_back(getAL(CurDAG, dl));
3692         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3693       }
3694
3695       Ops.push_back(Chain);
3696
3697       // Writes to two registers.
3698       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3699
3700       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3701       return;
3702     }
3703     case Intrinsic::arm_ldaexd:
3704     case Intrinsic::arm_ldrexd: {
3705       SDLoc dl(N);
3706       SDValue Chain = N->getOperand(0);
3707       SDValue MemAddr = N->getOperand(2);
3708       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3709
3710       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3711       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3712                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3713
3714       // arm_ldrexd returns a i64 value in {i32, i32}
3715       std::vector<EVT> ResTys;
3716       if (isThumb) {
3717         ResTys.push_back(MVT::i32);
3718         ResTys.push_back(MVT::i32);
3719       } else
3720         ResTys.push_back(MVT::Untyped);
3721       ResTys.push_back(MVT::Other);
3722
3723       // Place arguments in the right order.
3724       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3725                        CurDAG->getRegister(0, MVT::i32), Chain};
3726       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3727       // Transfer memoperands.
3728       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3729       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3730       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3731
3732       // Remap uses.
3733       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3734       if (!SDValue(N, 0).use_empty()) {
3735         SDValue Result;
3736         if (isThumb)
3737           Result = SDValue(Ld, 0);
3738         else {
3739           SDValue SubRegIdx =
3740             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3741           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3742               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3743           Result = SDValue(ResNode,0);
3744         }
3745         ReplaceUses(SDValue(N, 0), Result);
3746       }
3747       if (!SDValue(N, 1).use_empty()) {
3748         SDValue Result;
3749         if (isThumb)
3750           Result = SDValue(Ld, 1);
3751         else {
3752           SDValue SubRegIdx =
3753             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3754           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3755               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3756           Result = SDValue(ResNode,0);
3757         }
3758         ReplaceUses(SDValue(N, 1), Result);
3759       }
3760       ReplaceUses(SDValue(N, 2), OutChain);
3761       CurDAG->RemoveDeadNode(N);
3762       return;
3763     }
3764     case Intrinsic::arm_stlexd:
3765     case Intrinsic::arm_strexd: {
3766       SDLoc dl(N);
3767       SDValue Chain = N->getOperand(0);
3768       SDValue Val0 = N->getOperand(2);
3769       SDValue Val1 = N->getOperand(3);
3770       SDValue MemAddr = N->getOperand(4);
3771
3772       // Store exclusive double return a i32 value which is the return status
3773       // of the issued store.
3774       const EVT ResTys[] = {MVT::i32, MVT::Other};
3775
3776       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3777       // Place arguments in the right order.
3778       SmallVector<SDValue, 7> Ops;
3779       if (isThumb) {
3780         Ops.push_back(Val0);
3781         Ops.push_back(Val1);
3782       } else
3783         // arm_strexd uses GPRPair.
3784         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3785       Ops.push_back(MemAddr);
3786       Ops.push_back(getAL(CurDAG, dl));
3787       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3788       Ops.push_back(Chain);
3789
3790       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3791       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3792                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3793
3794       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3795       // Transfer memoperands.
3796       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3797       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3798       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3799
3800       ReplaceNode(N, St);
3801       return;
3802     }
3803
3804     case Intrinsic::arm_neon_vld1: {
3805       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3806                                            ARM::VLD1d32, ARM::VLD1d64 };
3807       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3808                                            ARM::VLD1q32, ARM::VLD1q64};
3809       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3810       return;
3811     }
3812
3813     case Intrinsic::arm_neon_vld2: {
3814       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3815                                            ARM::VLD2d32, ARM::VLD1q64 };
3816       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3817                                            ARM::VLD2q32Pseudo };
3818       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3819       return;
3820     }
3821
3822     case Intrinsic::arm_neon_vld3: {
3823       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3824                                            ARM::VLD3d16Pseudo,
3825                                            ARM::VLD3d32Pseudo,
3826                                            ARM::VLD1d64TPseudo };
3827       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3828                                             ARM::VLD3q16Pseudo_UPD,
3829                                             ARM::VLD3q32Pseudo_UPD };
3830       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3831                                             ARM::VLD3q16oddPseudo,
3832                                             ARM::VLD3q32oddPseudo };
3833       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3834       return;
3835     }
3836
3837     case Intrinsic::arm_neon_vld4: {
3838       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3839                                            ARM::VLD4d16Pseudo,
3840                                            ARM::VLD4d32Pseudo,
3841                                            ARM::VLD1d64QPseudo };
3842       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3843                                             ARM::VLD4q16Pseudo_UPD,
3844                                             ARM::VLD4q32Pseudo_UPD };
3845       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3846                                             ARM::VLD4q16oddPseudo,
3847                                             ARM::VLD4q32oddPseudo };
3848       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3849       return;
3850     }
3851
3852     case Intrinsic::arm_neon_vld2lane: {
3853       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3854                                            ARM::VLD2LNd16Pseudo,
3855                                            ARM::VLD2LNd32Pseudo };
3856       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3857                                            ARM::VLD2LNq32Pseudo };
3858       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3859       return;
3860     }
3861
3862     case Intrinsic::arm_neon_vld3lane: {
3863       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3864                                            ARM::VLD3LNd16Pseudo,
3865                                            ARM::VLD3LNd32Pseudo };
3866       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3867                                            ARM::VLD3LNq32Pseudo };
3868       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3869       return;
3870     }
3871
3872     case Intrinsic::arm_neon_vld4lane: {
3873       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3874                                            ARM::VLD4LNd16Pseudo,
3875                                            ARM::VLD4LNd32Pseudo };
3876       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3877                                            ARM::VLD4LNq32Pseudo };
3878       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3879       return;
3880     }
3881
3882     case Intrinsic::arm_neon_vst1: {
3883       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3884                                            ARM::VST1d32, ARM::VST1d64 };
3885       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3886                                            ARM::VST1q32, ARM::VST1q64 };
3887       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3888       return;
3889     }
3890
3891     case Intrinsic::arm_neon_vst2: {
3892       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3893                                            ARM::VST2d32, ARM::VST1q64 };
3894       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3895                                            ARM::VST2q32Pseudo };
3896       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3897       return;
3898     }
3899
3900     case Intrinsic::arm_neon_vst3: {
3901       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3902                                            ARM::VST3d16Pseudo,
3903                                            ARM::VST3d32Pseudo,
3904                                            ARM::VST1d64TPseudo };
3905       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3906                                             ARM::VST3q16Pseudo_UPD,
3907                                             ARM::VST3q32Pseudo_UPD };
3908       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3909                                             ARM::VST3q16oddPseudo,
3910                                             ARM::VST3q32oddPseudo };
3911       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3912       return;
3913     }
3914
3915     case Intrinsic::arm_neon_vst4: {
3916       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3917                                            ARM::VST4d16Pseudo,
3918                                            ARM::VST4d32Pseudo,
3919                                            ARM::VST1d64QPseudo };
3920       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3921                                             ARM::VST4q16Pseudo_UPD,
3922                                             ARM::VST4q32Pseudo_UPD };
3923       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3924                                             ARM::VST4q16oddPseudo,
3925                                             ARM::VST4q32oddPseudo };
3926       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3927       return;
3928     }
3929
3930     case Intrinsic::arm_neon_vst2lane: {
3931       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3932                                            ARM::VST2LNd16Pseudo,
3933                                            ARM::VST2LNd32Pseudo };
3934       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3935                                            ARM::VST2LNq32Pseudo };
3936       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3937       return;
3938     }
3939
3940     case Intrinsic::arm_neon_vst3lane: {
3941       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3942                                            ARM::VST3LNd16Pseudo,
3943                                            ARM::VST3LNd32Pseudo };
3944       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3945                                            ARM::VST3LNq32Pseudo };
3946       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3947       return;
3948     }
3949
3950     case Intrinsic::arm_neon_vst4lane: {
3951       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3952                                            ARM::VST4LNd16Pseudo,
3953                                            ARM::VST4LNd32Pseudo };
3954       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3955                                            ARM::VST4LNq32Pseudo };
3956       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3957       return;
3958     }
3959     }
3960     break;
3961   }
3962
3963   case ISD::INTRINSIC_WO_CHAIN: {
3964     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3965     switch (IntNo) {
3966     default:
3967       break;
3968
3969     case Intrinsic::arm_neon_vtbl2:
3970       SelectVTBL(N, false, 2, ARM::VTBL2);
3971       return;
3972     case Intrinsic::arm_neon_vtbl3:
3973       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3974       return;
3975     case Intrinsic::arm_neon_vtbl4:
3976       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3977       return;
3978
3979     case Intrinsic::arm_neon_vtbx2:
3980       SelectVTBL(N, true, 2, ARM::VTBX2);
3981       return;
3982     case Intrinsic::arm_neon_vtbx3:
3983       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3984       return;
3985     case Intrinsic::arm_neon_vtbx4:
3986       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3987       return;
3988     }
3989     break;
3990   }
3991
3992   case ARMISD::VTBL1: {
3993     SDLoc dl(N);
3994     EVT VT = N->getValueType(0);
3995     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3996                      getAL(CurDAG, dl),                 // Predicate
3997                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3998     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3999     return;
4000   }
4001   case ARMISD::VTBL2: {
4002     SDLoc dl(N);
4003     EVT VT = N->getValueType(0);
4004
4005     // Form a REG_SEQUENCE to force register allocation.
4006     SDValue V0 = N->getOperand(0);
4007     SDValue V1 = N->getOperand(1);
4008     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
4009
4010     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
4011                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
4012     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
4013     return;
4014   }
4015
4016   case ISD::CONCAT_VECTORS:
4017     SelectConcatVector(N);
4018     return;
4019
4020   case ISD::ATOMIC_CMP_SWAP:
4021     SelectCMP_SWAP(N);
4022     return;
4023   }
4024
4025   SelectCode(N);
4026 }
4027
4028 // Inspect a register string of the form
4029 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4030 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4031 // and obtain the integer operands from them, adding these operands to the
4032 // provided vector.
4033 static void getIntOperandsFromRegisterString(StringRef RegString,
4034                                              SelectionDAG *CurDAG,
4035                                              const SDLoc &DL,
4036                                              std::vector<SDValue> &Ops) {
4037   SmallVector<StringRef, 5> Fields;
4038   RegString.split(Fields, ':');
4039
4040   if (Fields.size() > 1) {
4041     bool AllIntFields = true;
4042
4043     for (StringRef Field : Fields) {
4044       // Need to trim out leading 'cp' characters and get the integer field.
4045       unsigned IntField;
4046       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4047       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4048     }
4049
4050     assert(AllIntFields &&
4051             "Unexpected non-integer value in special register string.");
4052   }
4053 }
4054
4055 // Maps a Banked Register string to its mask value. The mask value returned is
4056 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4057 // mask operand, which expresses which register is to be used, e.g. r8, and in
4058 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4059 // was invalid.
4060 static inline int getBankedRegisterMask(StringRef RegString) {
4061   return StringSwitch<int>(RegString.lower())
4062           .Case("r8_usr", 0x00)
4063           .Case("r9_usr", 0x01)
4064           .Case("r10_usr", 0x02)
4065           .Case("r11_usr", 0x03)
4066           .Case("r12_usr", 0x04)
4067           .Case("sp_usr", 0x05)
4068           .Case("lr_usr", 0x06)
4069           .Case("r8_fiq", 0x08)
4070           .Case("r9_fiq", 0x09)
4071           .Case("r10_fiq", 0x0a)
4072           .Case("r11_fiq", 0x0b)
4073           .Case("r12_fiq", 0x0c)
4074           .Case("sp_fiq", 0x0d)
4075           .Case("lr_fiq", 0x0e)
4076           .Case("lr_irq", 0x10)
4077           .Case("sp_irq", 0x11)
4078           .Case("lr_svc", 0x12)
4079           .Case("sp_svc", 0x13)
4080           .Case("lr_abt", 0x14)
4081           .Case("sp_abt", 0x15)
4082           .Case("lr_und", 0x16)
4083           .Case("sp_und", 0x17)
4084           .Case("lr_mon", 0x1c)
4085           .Case("sp_mon", 0x1d)
4086           .Case("elr_hyp", 0x1e)
4087           .Case("sp_hyp", 0x1f)
4088           .Case("spsr_fiq", 0x2e)
4089           .Case("spsr_irq", 0x30)
4090           .Case("spsr_svc", 0x32)
4091           .Case("spsr_abt", 0x34)
4092           .Case("spsr_und", 0x36)
4093           .Case("spsr_mon", 0x3c)
4094           .Case("spsr_hyp", 0x3e)
4095           .Default(-1);
4096 }
4097
4098 // Maps a MClass special register string to its value for use in the
4099 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
4100 // Returns -1 to signify that the string was invalid.
4101 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
4102   return StringSwitch<int>(RegString.lower())
4103           .Case("apsr", 0x0)
4104           .Case("iapsr", 0x1)
4105           .Case("eapsr", 0x2)
4106           .Case("xpsr", 0x3)
4107           .Case("ipsr", 0x5)
4108           .Case("epsr", 0x6)
4109           .Case("iepsr", 0x7)
4110           .Case("msp", 0x8)
4111           .Case("psp", 0x9)
4112           .Case("primask", 0x10)
4113           .Case("basepri", 0x11)
4114           .Case("basepri_max", 0x12)
4115           .Case("faultmask", 0x13)
4116           .Case("control", 0x14)
4117           .Case("msplim", 0x0a)
4118           .Case("psplim", 0x0b)
4119           .Case("sp", 0x18)
4120           .Default(-1);
4121 }
4122
4123 // The flags here are common to those allowed for apsr in the A class cores and
4124 // those allowed for the special registers in the M class cores. Returns a
4125 // value representing which flags were present, -1 if invalid.
4126 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
4127   if (Flags.empty())
4128     return 0x2 | (int)hasDSP;
4129
4130   return StringSwitch<int>(Flags)
4131           .Case("g", 0x1)
4132           .Case("nzcvq", 0x2)
4133           .Case("nzcvqg", 0x3)
4134           .Default(-1);
4135 }
4136
4137 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
4138                                  const ARMSubtarget *Subtarget) {
4139   // Ensure that the register (without flags) was a valid M Class special
4140   // register.
4141   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
4142   if (SYSmvalue == -1)
4143     return -1;
4144
4145   // basepri, basepri_max and faultmask are only valid for V7m.
4146   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
4147     return -1;
4148
4149   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
4150     Flags = "";
4151     SYSmvalue |= 0x80;
4152   }
4153
4154   if (!Subtarget->has8MSecExt() &&
4155       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
4156     return -1;
4157
4158   if (!Subtarget->hasV8MMainlineOps() &&
4159       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
4160        SYSmvalue == 0x93))
4161     return -1;
4162
4163   // If it was a read then we won't be expecting flags and so at this point
4164   // we can return the mask.
4165   if (IsRead) {
4166     if (Flags.empty())
4167       return SYSmvalue;
4168     else
4169       return -1;
4170   }
4171
4172   // We know we are now handling a write so need to get the mask for the flags.
4173   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
4174
4175   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
4176   // shouldn't have flags present.
4177   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
4178     return -1;
4179
4180   // The _g and _nzcvqg versions are only valid if the DSP extension is
4181   // available.
4182   if (!Subtarget->hasDSP() && (Mask & 0x1))
4183     return -1;
4184
4185   // The register was valid so need to put the mask in the correct place
4186   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
4187   // construct the operand for the instruction node.
4188   if (SYSmvalue < 0x4)
4189     return SYSmvalue | Mask << 10;
4190
4191   return SYSmvalue;
4192 }
4193
4194 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4195   // The mask operand contains the special register (R Bit) in bit 4, whether
4196   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4197   // bits 3-0 contains the fields to be accessed in the special register, set by
4198   // the flags provided with the register.
4199   int Mask = 0;
4200   if (Reg == "apsr") {
4201     // The flags permitted for apsr are the same flags that are allowed in
4202     // M class registers. We get the flag value and then shift the flags into
4203     // the correct place to combine with the mask.
4204     Mask = getMClassFlagsMask(Flags, true);
4205     if (Mask == -1)
4206       return -1;
4207     return Mask << 2;
4208   }
4209
4210   if (Reg != "cpsr" && Reg != "spsr") {
4211     return -1;
4212   }
4213
4214   // This is the same as if the flags were "fc"
4215   if (Flags.empty() || Flags == "all")
4216     return Mask | 0x9;
4217
4218   // Inspect the supplied flags string and set the bits in the mask for
4219   // the relevant and valid flags allowed for cpsr and spsr.
4220   for (char Flag : Flags) {
4221     int FlagVal;
4222     switch (Flag) {
4223       case 'c':
4224         FlagVal = 0x1;
4225         break;
4226       case 'x':
4227         FlagVal = 0x2;
4228         break;
4229       case 's':
4230         FlagVal = 0x4;
4231         break;
4232       case 'f':
4233         FlagVal = 0x8;
4234         break;
4235       default:
4236         FlagVal = 0;
4237     }
4238
4239     // This avoids allowing strings where the same flag bit appears twice.
4240     if (!FlagVal || (Mask & FlagVal))
4241       return -1;
4242     Mask |= FlagVal;
4243   }
4244
4245   // If the register is spsr then we need to set the R bit.
4246   if (Reg == "spsr")
4247     Mask |= 0x10;
4248
4249   return Mask;
4250 }
4251
4252 // Lower the read_register intrinsic to ARM specific DAG nodes
4253 // using the supplied metadata string to select the instruction node to use
4254 // and the registers/masks to construct as operands for the node.
4255 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4256   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4257   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4258   bool IsThumb2 = Subtarget->isThumb2();
4259   SDLoc DL(N);
4260
4261   std::vector<SDValue> Ops;
4262   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4263
4264   if (!Ops.empty()) {
4265     // If the special register string was constructed of fields (as defined
4266     // in the ACLE) then need to lower to MRC node (32 bit) or
4267     // MRRC node(64 bit), we can make the distinction based on the number of
4268     // operands we have.
4269     unsigned Opcode;
4270     SmallVector<EVT, 3> ResTypes;
4271     if (Ops.size() == 5){
4272       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4273       ResTypes.append({ MVT::i32, MVT::Other });
4274     } else {
4275       assert(Ops.size() == 3 &&
4276               "Invalid number of fields in special register string.");
4277       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4278       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4279     }
4280
4281     Ops.push_back(getAL(CurDAG, DL));
4282     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4283     Ops.push_back(N->getOperand(0));
4284     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4285     return true;
4286   }
4287
4288   std::string SpecialReg = RegString->getString().lower();
4289
4290   int BankedReg = getBankedRegisterMask(SpecialReg);
4291   if (BankedReg != -1) {
4292     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4293             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4294             N->getOperand(0) };
4295     ReplaceNode(
4296         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4297                                   DL, MVT::i32, MVT::Other, Ops));
4298     return true;
4299   }
4300
4301   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4302   // corresponding to the register that is being read from. So we switch on the
4303   // string to find which opcode we need to use.
4304   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4305                     .Case("fpscr", ARM::VMRS)
4306                     .Case("fpexc", ARM::VMRS_FPEXC)
4307                     .Case("fpsid", ARM::VMRS_FPSID)
4308                     .Case("mvfr0", ARM::VMRS_MVFR0)
4309                     .Case("mvfr1", ARM::VMRS_MVFR1)
4310                     .Case("mvfr2", ARM::VMRS_MVFR2)
4311                     .Case("fpinst", ARM::VMRS_FPINST)
4312                     .Case("fpinst2", ARM::VMRS_FPINST2)
4313                     .Default(0);
4314
4315   // If an opcode was found then we can lower the read to a VFP instruction.
4316   if (Opcode) {
4317     if (!Subtarget->hasVFP2())
4318       return false;
4319     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4320       return false;
4321
4322     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4323             N->getOperand(0) };
4324     ReplaceNode(N,
4325                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4326     return true;
4327   }
4328
4329   // If the target is M Class then need to validate that the register string
4330   // is an acceptable value, so check that a mask can be constructed from the
4331   // string.
4332   if (Subtarget->isMClass()) {
4333     StringRef Flags = "", Reg = SpecialReg;
4334     if (Reg.endswith("_ns")) {
4335       Flags = "ns";
4336       Reg = Reg.drop_back(3);
4337     }
4338
4339     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4340     if (SYSmValue == -1)
4341       return false;
4342
4343     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4344                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4345                       N->getOperand(0) };
4346     ReplaceNode(
4347         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4348     return true;
4349   }
4350
4351   // Here we know the target is not M Class so we need to check if it is one
4352   // of the remaining possible values which are apsr, cpsr or spsr.
4353   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4354     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4355             N->getOperand(0) };
4356     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4357                                           DL, MVT::i32, MVT::Other, Ops));
4358     return true;
4359   }
4360
4361   if (SpecialReg == "spsr") {
4362     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4363             N->getOperand(0) };
4364     ReplaceNode(
4365         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4366                                   MVT::i32, MVT::Other, Ops));
4367     return true;
4368   }
4369
4370   return false;
4371 }
4372
4373 // Lower the write_register intrinsic to ARM specific DAG nodes
4374 // using the supplied metadata string to select the instruction node to use
4375 // and the registers/masks to use in the nodes
4376 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4377   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4378   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4379   bool IsThumb2 = Subtarget->isThumb2();
4380   SDLoc DL(N);
4381
4382   std::vector<SDValue> Ops;
4383   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4384
4385   if (!Ops.empty()) {
4386     // If the special register string was constructed of fields (as defined
4387     // in the ACLE) then need to lower to MCR node (32 bit) or
4388     // MCRR node(64 bit), we can make the distinction based on the number of
4389     // operands we have.
4390     unsigned Opcode;
4391     if (Ops.size() == 5) {
4392       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4393       Ops.insert(Ops.begin()+2, N->getOperand(2));
4394     } else {
4395       assert(Ops.size() == 3 &&
4396               "Invalid number of fields in special register string.");
4397       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4398       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4399       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4400     }
4401
4402     Ops.push_back(getAL(CurDAG, DL));
4403     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4404     Ops.push_back(N->getOperand(0));
4405
4406     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4407     return true;
4408   }
4409
4410   std::string SpecialReg = RegString->getString().lower();
4411   int BankedReg = getBankedRegisterMask(SpecialReg);
4412   if (BankedReg != -1) {
4413     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4414             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4415             N->getOperand(0) };
4416     ReplaceNode(
4417         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4418                                   DL, MVT::Other, Ops));
4419     return true;
4420   }
4421
4422   // The VFP registers are written to by creating SelectionDAG nodes with
4423   // opcodes corresponding to the register that is being written. So we switch
4424   // on the string to find which opcode we need to use.
4425   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4426                     .Case("fpscr", ARM::VMSR)
4427                     .Case("fpexc", ARM::VMSR_FPEXC)
4428                     .Case("fpsid", ARM::VMSR_FPSID)
4429                     .Case("fpinst", ARM::VMSR_FPINST)
4430                     .Case("fpinst2", ARM::VMSR_FPINST2)
4431                     .Default(0);
4432
4433   if (Opcode) {
4434     if (!Subtarget->hasVFP2())
4435       return false;
4436     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4437             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4438     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4439     return true;
4440   }
4441
4442   std::pair<StringRef, StringRef> Fields;
4443   Fields = StringRef(SpecialReg).rsplit('_');
4444   std::string Reg = Fields.first.str();
4445   StringRef Flags = Fields.second;
4446
4447   // If the target was M Class then need to validate the special register value
4448   // and retrieve the mask for use in the instruction node.
4449   if (Subtarget->isMClass()) {
4450     // basepri_max gets split so need to correct Reg and Flags.
4451     if (SpecialReg == "basepri_max") {
4452       Reg = SpecialReg;
4453       Flags = "";
4454     }
4455     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4456     if (SYSmValue == -1)
4457       return false;
4458
4459     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4460                       N->getOperand(2), getAL(CurDAG, DL),
4461                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4462     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4463     return true;
4464   }
4465
4466   // We then check to see if a valid mask can be constructed for one of the
4467   // register string values permitted for the A and R class cores. These values
4468   // are apsr, spsr and cpsr; these are also valid on older cores.
4469   int Mask = getARClassRegisterMask(Reg, Flags);
4470   if (Mask != -1) {
4471     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4472             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4473             N->getOperand(0) };
4474     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4475                                           DL, MVT::Other, Ops));
4476     return true;
4477   }
4478
4479   return false;
4480 }
4481
4482 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4483   std::vector<SDValue> AsmNodeOperands;
4484   unsigned Flag, Kind;
4485   bool Changed = false;
4486   unsigned NumOps = N->getNumOperands();
4487
4488   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4489   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4490   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4491   // respectively. Since there is no constraint to explicitly specify a
4492   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4493   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4494   // them into a GPRPair.
4495
4496   SDLoc dl(N);
4497   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4498                                    : SDValue(nullptr,0);
4499
4500   SmallVector<bool, 8> OpChanged;
4501   // Glue node will be appended late.
4502   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4503     SDValue op = N->getOperand(i);
4504     AsmNodeOperands.push_back(op);
4505
4506     if (i < InlineAsm::Op_FirstOperand)
4507       continue;
4508
4509     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4510       Flag = C->getZExtValue();
4511       Kind = InlineAsm::getKind(Flag);
4512     }
4513     else
4514       continue;
4515
4516     // Immediate operands to inline asm in the SelectionDAG are modeled with
4517     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4518     // the second is a constant with the value of the immediate. If we get here
4519     // and we have a Kind_Imm, skip the next operand, and continue.
4520     if (Kind == InlineAsm::Kind_Imm) {
4521       SDValue op = N->getOperand(++i);
4522       AsmNodeOperands.push_back(op);
4523       continue;
4524     }
4525
4526     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4527     if (NumRegs)
4528       OpChanged.push_back(false);
4529
4530     unsigned DefIdx = 0;
4531     bool IsTiedToChangedOp = false;
4532     // If it's a use that is tied with a previous def, it has no
4533     // reg class constraint.
4534     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4535       IsTiedToChangedOp = OpChanged[DefIdx];
4536
4537     // Memory operands to inline asm in the SelectionDAG are modeled with two
4538     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4539     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4540     // it doesn't get misinterpreted), and continue. We do this here because
4541     // it's important to update the OpChanged array correctly before moving on.
4542     if (Kind == InlineAsm::Kind_Mem) {
4543       SDValue op = N->getOperand(++i);
4544       AsmNodeOperands.push_back(op);
4545       continue;
4546     }
4547
4548     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4549         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4550       continue;
4551
4552     unsigned RC;
4553     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4554     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4555         || NumRegs != 2)
4556       continue;
4557
4558     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4559     SDValue V0 = N->getOperand(i+1);
4560     SDValue V1 = N->getOperand(i+2);
4561     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4562     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4563     SDValue PairedReg;
4564     MachineRegisterInfo &MRI = MF->getRegInfo();
4565
4566     if (Kind == InlineAsm::Kind_RegDef ||
4567         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4568       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4569       // the original GPRs.
4570
4571       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4572       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4573       SDValue Chain = SDValue(N,0);
4574
4575       SDNode *GU = N->getGluedUser();
4576       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4577                                                Chain.getValue(1));
4578
4579       // Extract values from a GPRPair reg and copy to the original GPR reg.
4580       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4581                                                     RegCopy);
4582       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4583                                                     RegCopy);
4584       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4585                                         RegCopy.getValue(1));
4586       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4587
4588       // Update the original glue user.
4589       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4590       Ops.push_back(T1.getValue(1));
4591       CurDAG->UpdateNodeOperands(GU, Ops);
4592     }
4593     else {
4594       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4595       // GPRPair and then pass the GPRPair to the inline asm.
4596       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4597
4598       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4599       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4600                                           Chain.getValue(1));
4601       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4602                                           T0.getValue(1));
4603       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4604
4605       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4606       // i32 VRs of inline asm with it.
4607       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4608       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4609       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4610
4611       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4612       Glue = Chain.getValue(1);
4613     }
4614
4615     Changed = true;
4616
4617     if(PairedReg.getNode()) {
4618       OpChanged[OpChanged.size() -1 ] = true;
4619       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4620       if (IsTiedToChangedOp)
4621         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4622       else
4623         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4624       // Replace the current flag.
4625       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4626           Flag, dl, MVT::i32);
4627       // Add the new register node and skip the original two GPRs.
4628       AsmNodeOperands.push_back(PairedReg);
4629       // Skip the next two GPRs.
4630       i += 2;
4631     }
4632   }
4633
4634   if (Glue.getNode())
4635     AsmNodeOperands.push_back(Glue);
4636   if (!Changed)
4637     return false;
4638
4639   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4640       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4641   New->setNodeId(-1);
4642   ReplaceNode(N, New.getNode());
4643   return true;
4644 }
4645
4646
4647 bool ARMDAGToDAGISel::
4648 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4649                              std::vector<SDValue> &OutOps) {
4650   switch(ConstraintID) {
4651   default:
4652     llvm_unreachable("Unexpected asm memory constraint");
4653   case InlineAsm::Constraint_i:
4654     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4655     //        be an immediate and not a memory constraint.
4656     LLVM_FALLTHROUGH;
4657   case InlineAsm::Constraint_m:
4658   case InlineAsm::Constraint_o:
4659   case InlineAsm::Constraint_Q:
4660   case InlineAsm::Constraint_Um:
4661   case InlineAsm::Constraint_Un:
4662   case InlineAsm::Constraint_Uq:
4663   case InlineAsm::Constraint_Us:
4664   case InlineAsm::Constraint_Ut:
4665   case InlineAsm::Constraint_Uv:
4666   case InlineAsm::Constraint_Uy:
4667     // Require the address to be in a register.  That is safe for all ARM
4668     // variants and it is hard to do anything much smarter without knowing
4669     // how the operand is used.
4670     OutOps.push_back(Op);
4671     return false;
4672   }
4673   return true;
4674 }
4675
4676 /// createARMISelDag - This pass converts a legalized DAG into a
4677 /// ARM-specific DAG, ready for instruction scheduling.
4678 ///
4679 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4680                                      CodeGenOpt::Level OptLevel) {
4681   return new ARMDAGToDAGISel(TM, OptLevel);
4682 }