contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Target/TargetLowering.h"
  35 #include "llvm/Target/TargetOptions.h"
  36
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "arm-isel"
  40
  41 static cl::opt<bool>
  42 DisableShifterOp("disable-shifter-op", cl::Hidden,
  43   cl::desc("Disable isel of shifter-op"),
  44   cl::init(false));
  45
  46 //===--------------------------------------------------------------------===//
  47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  48 /// instructions for SelectionDAG operations.
  49 ///
  50 namespace {
  51
  52 enum AddrMode2Type {
  53   AM2_BASE, // Simple AM2 (+-imm12)
  54   AM2_SHOP  // Shifter-op AM2
  55 };
  56
  57 class ARMDAGToDAGISel : public SelectionDAGISel {
  58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  59   /// make the right decision when generating code for different targets.
  60   const ARMSubtarget *Subtarget;
  61
  62 public:
  63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  64       : SelectionDAGISel(tm, OptLevel) {}
  65
  66   bool runOnMachineFunction(MachineFunction &MF) override {
  67     // Reset the subtarget each time through.
  68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  69     SelectionDAGISel::runOnMachineFunction(MF);
  70     return true;
  71   }
  72
  73   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  74
  75   void PreprocessISelDAG() override;
  76
  77   /// getI32Imm - Return a target constant of type i32 with the specified
  78   /// value.
  79   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  80     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  81   }
  82
  83   void Select(SDNode *N) override;
  84
  85   bool hasNoVMLxHazardUse(SDNode *N) const;
  86   bool isShifterOpProfitable(const SDValue &Shift,
  87                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  88   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  89                                SDValue &B, SDValue &C,
  90                                bool CheckProfitability = true);
  91   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  92                                SDValue &B, bool CheckProfitability = true);
  93   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  94                                     SDValue &B, SDValue &C) {
  95     // Don't apply the profitability check
  96     return SelectRegShifterOperand(N, A, B, C, false);
  97   }
  98   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B) {
 100     // Don't apply the profitability check
 101     return SelectImmShifterOperand(N, A, B, false);
 102   }
 103
 104   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 105   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 106
 107   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 108                                       SDValue &Offset, SDValue &Opc);
 109   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 110                            SDValue &Opc) {
 111     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 112   }
 113
 114   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 117   }
 118
 119   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 120                        SDValue &Opc) {
 121     SelectAddrMode2Worker(N, Base, Offset, Opc);
 122 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 123     // This always matches one way or another.
 124     return true;
 125   }
 126
 127   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 128     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 129     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 130     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 155                                 SDValue &OffImm);
 156   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 157                                  SDValue &OffImm);
 158   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 159                                  SDValue &OffImm);
 160   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 161                                  SDValue &OffImm);
 162   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 163
 164   // Thumb 2 Addressing Modes:
 165   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 166   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 167                             SDValue &OffImm);
 168   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 169                                  SDValue &OffImm);
 170   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 171                              SDValue &OffReg, SDValue &ShImm);
 172   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 173
 174   inline bool is_so_imm(unsigned Imm) const {
 175     return ARM_AM::getSOImmVal(Imm) != -1;
 176   }
 177
 178   inline bool is_so_imm_not(unsigned Imm) const {
 179     return ARM_AM::getSOImmVal(~Imm) != -1;
 180   }
 181
 182   inline bool is_t2_so_imm(unsigned Imm) const {
 183     return ARM_AM::getT2SOImmVal(Imm) != -1;
 184   }
 185
 186   inline bool is_t2_so_imm_not(unsigned Imm) const {
 187     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 188   }
 189
 190   // Include the pieces autogenerated from the target description.
 191 #include "ARMGenDAGISel.inc"
 192
 193 private:
 194   void transferMemOperands(SDNode *Src, SDNode *Dst);
 195
 196   /// Indexed (pre/post inc/dec) load matching code for ARM.
 197   bool tryARMIndexedLoad(SDNode *N);
 198   bool tryT1IndexedLoad(SDNode *N);
 199   bool tryT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 207                  const uint16_t *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 215                  const uint16_t *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 221                        unsigned NumVecs, const uint16_t *DOpcodes,
 222                        const uint16_t *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes = nullptr);
 230
 231   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 232   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 233   /// generated to force the table registers to be consecutive.
 234   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 235
 236   /// Try to select SBFX/UBFX instructions for ARM.
 237   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 238
 239   // Select special operations if node forms integer ABS pattern
 240   bool tryABSOp(SDNode *N);
 241
 242   bool tryReadRegister(SDNode *N);
 243   bool tryWriteRegister(SDNode *N);
 244
 245   bool tryInlineAsm(SDNode *N);
 246
 247   void SelectConcatVector(SDNode *N);
 248   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 249
 250   bool trySMLAWSMULW(SDNode *N);
 251
 252   void SelectCMP_SWAP(SDNode *N);
 253
 254   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 255   /// inline asm expressions.
 256   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 257                                     std::vector<SDValue> &OutOps) override;
 258
 259   // Form pairs of consecutive R, S, D, or Q registers.
 260   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 261   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 262   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 263   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 264
 265   // Form sequences of 4 consecutive S, D, or Q registers.
 266   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 267   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 268   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 269
 270   // Get the alignment operand for a NEON VLD or VST instruction.
 271   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 272                         bool is64BitVector);
 273
 274   /// Returns the number of instructions required to materialize the given
 275   /// constant in a register, or 3 if a literal pool load is needed.
 276   unsigned ConstantMaterializationCost(unsigned Val) const;
 277
 278   /// Checks if N is a multiplication by a constant where we can extract out a
 279   /// power of two from the constant so that it can be used in a shift, but only
 280   /// if it simplifies the materialization of the constant. Returns true if it
 281   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 282   /// out and to NewMulConst the new constant to be multiplied by.
 283   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 284                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 285
 286   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 287   /// selected when N would have been selected.
 288   void replaceDAGValue(const SDValue &N, SDValue M);
 289 };
 290 }
 291
 292 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 293 /// operand. If so Imm will receive the 32-bit value.
 294 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 295   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 296     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 297     return true;
 298   }
 299   return false;
 300 }
 301
 302 // isInt32Immediate - This method tests to see if a constant operand.
 303 // If so Imm will receive the 32 bit value.
 304 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 305   return isInt32Immediate(N.getNode(), Imm);
 306 }
 307
 308 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 309 // opcode and that it has a immediate integer right operand.
 310 // If so Imm will receive the 32 bit value.
 311 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 312   return N->getOpcode() == Opc &&
 313          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 314 }
 315
 316 /// \brief Check whether a particular node is a constant value representable as
 317 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 318 ///
 319 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 320 static bool isScaledConstantInRange(SDValue Node, int Scale,
 321                                     int RangeMin, int RangeMax,
 322                                     int &ScaledConstant) {
 323   assert(Scale > 0 && "Invalid scale!");
 324
 325   // Check that this is a constant.
 326   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 327   if (!C)
 328     return false;
 329
 330   ScaledConstant = (int) C->getZExtValue();
 331   if ((ScaledConstant % Scale) != 0)
 332     return false;
 333
 334   ScaledConstant /= Scale;
 335   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 336 }
 337
 338 void ARMDAGToDAGISel::PreprocessISelDAG() {
 339   if (!Subtarget->hasV6T2Ops())
 340     return;
 341
 342   bool isThumb2 = Subtarget->isThumb();
 343   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 344        E = CurDAG->allnodes_end(); I != E; ) {
 345     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 346
 347     if (N->getOpcode() != ISD::ADD)
 348       continue;
 349
 350     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 351     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 352     // trailing zeros, e.g. 1020.
 353     // Transform the expression to
 354     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 355     // of trailing zeros of c2. The left shift would be folded as an shifter
 356     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 357     // node (UBFX).
 358
 359     SDValue N0 = N->getOperand(0);
 360     SDValue N1 = N->getOperand(1);
 361     unsigned And_imm = 0;
 362     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 363       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 364         std::swap(N0, N1);
 365     }
 366     if (!And_imm)
 367       continue;
 368
 369     // Check if the AND mask is an immediate of the form: 000.....1111111100
 370     unsigned TZ = countTrailingZeros(And_imm);
 371     if (TZ != 1 && TZ != 2)
 372       // Be conservative here. Shifter operands aren't always free. e.g. On
 373       // Swift, left shifter operand of 1 / 2 for free but others are not.
 374       // e.g.
 375       //  ubfx   r3, r1, #16, #8
 376       //  ldr.w  r3, [r0, r3, lsl #2]
 377       // vs.
 378       //  mov.w  r9, #1020
 379       //  and.w  r2, r9, r1, lsr #14
 380       //  ldr    r2, [r0, r2]
 381       continue;
 382     And_imm >>= TZ;
 383     if (And_imm & (And_imm + 1))
 384       continue;
 385
 386     // Look for (and (srl X, c1), c2).
 387     SDValue Srl = N1.getOperand(0);
 388     unsigned Srl_imm = 0;
 389     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 390         (Srl_imm <= 2))
 391       continue;
 392
 393     // Make sure first operand is not a shifter operand which would prevent
 394     // folding of the left shift.
 395     SDValue CPTmp0;
 396     SDValue CPTmp1;
 397     SDValue CPTmp2;
 398     if (isThumb2) {
 399       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 400         continue;
 401     } else {
 402       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 403           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 404         continue;
 405     }
 406
 407     // Now make the transformation.
 408     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 409                           Srl.getOperand(0),
 410                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 411                                               MVT::i32));
 412     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 413                          Srl,
 414                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 415     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 416                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 417     CurDAG->UpdateNodeOperands(N, N0, N1);
 418   }
 419 }
 420
 421 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 422 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 423 /// least on current ARM implementations) which should be avoidded.
 424 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 425   if (OptLevel == CodeGenOpt::None)
 426     return true;
 427
 428   if (!Subtarget->hasVMLxHazards())
 429     return true;
 430
 431   if (!N->hasOneUse())
 432     return false;
 433
 434   SDNode *Use = *N->use_begin();
 435   if (Use->getOpcode() == ISD::CopyToReg)
 436     return true;
 437   if (Use->isMachineOpcode()) {
 438     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 439         CurDAG->getSubtarget().getInstrInfo());
 440
 441     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 442     if (MCID.mayStore())
 443       return true;
 444     unsigned Opcode = MCID.getOpcode();
 445     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 446       return true;
 447     // vmlx feeding into another vmlx. We actually want to unfold
 448     // the use later in the MLxExpansion pass. e.g.
 449     // vmla
 450     // vmla (stall 8 cycles)
 451     //
 452     // vmul (5 cycles)
 453     // vadd (5 cycles)
 454     // vmla
 455     // This adds up to about 18 - 19 cycles.
 456     //
 457     // vmla
 458     // vmul (stall 4 cycles)
 459     // vadd adds up to about 14 cycles.
 460     return TII->isFpMLxInstruction(Opcode);
 461   }
 462
 463   return false;
 464 }
 465
 466 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 467                                             ARM_AM::ShiftOpc ShOpcVal,
 468                                             unsigned ShAmt) {
 469   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 470     return true;
 471   if (Shift.hasOneUse())
 472     return true;
 473   // R << 2 is free.
 474   return ShOpcVal == ARM_AM::lsl &&
 475          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 476 }
 477
 478 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 479   if (Subtarget->isThumb()) {
 480     if (Val <= 255) return 1;                               // MOV
 481     if (Subtarget->hasV6T2Ops() &&
 482         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 483       return 1; // MOVW
 484     if (Val <= 510) return 2;                               // MOV + ADDi8
 485     if (~Val <= 255) return 2;                              // MOV + MVN
 486     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 487   } else {
 488     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 489     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 490     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 491     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 492   }
 493   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 494   return 3; // Literal pool load
 495 }
 496
 497 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 498                                              unsigned MaxShift,
 499                                              unsigned &PowerOfTwo,
 500                                              SDValue &NewMulConst) const {
 501   assert(N.getOpcode() == ISD::MUL);
 502   assert(MaxShift > 0);
 503
 504   // If the multiply is used in more than one place then changing the constant
 505   // will make other uses incorrect, so don't.
 506   if (!N.hasOneUse()) return false;
 507   // Check if the multiply is by a constant
 508   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 509   if (!MulConst) return false;
 510   // If the constant is used in more than one place then modifying it will mean
 511   // we need to materialize two constants instead of one, which is a bad idea.
 512   if (!MulConst->hasOneUse()) return false;
 513   unsigned MulConstVal = MulConst->getZExtValue();
 514   if (MulConstVal == 0) return false;
 515
 516   // Find the largest power of 2 that MulConstVal is a multiple of
 517   PowerOfTwo = MaxShift;
 518   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 519     --PowerOfTwo;
 520     if (PowerOfTwo == 0) return false;
 521   }
 522
 523   // Only optimise if the new cost is better
 524   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 525   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 526   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 527   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 528   return NewCost < OldCost;
 529 }
 530
 531 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 532   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 533   CurDAG->ReplaceAllUsesWith(N, M);
 534 }
 535
 536 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 537                                               SDValue &BaseReg,
 538                                               SDValue &Opc,
 539                                               bool CheckProfitability) {
 540   if (DisableShifterOp)
 541     return false;
 542
 543   // If N is a multiply-by-constant and it's profitable to extract a shift and
 544   // use it in a shifted operand do so.
 545   if (N.getOpcode() == ISD::MUL) {
 546     unsigned PowerOfTwo = 0;
 547     SDValue NewMulConst;
 548     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 549       HandleSDNode Handle(N);
 550       replaceDAGValue(N.getOperand(1), NewMulConst);
 551       BaseReg = Handle.getValue();
 552       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 553                                                           PowerOfTwo),
 554                                       SDLoc(N), MVT::i32);
 555       return true;
 556     }
 557   }
 558
 559   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 560
 561   // Don't match base register only case. That is matched to a separate
 562   // lower complexity pattern with explicit register operand.
 563   if (ShOpcVal == ARM_AM::no_shift) return false;
 564
 565   BaseReg = N.getOperand(0);
 566   unsigned ShImmVal = 0;
 567   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 568   if (!RHS) return false;
 569   ShImmVal = RHS->getZExtValue() & 31;
 570   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 571                                   SDLoc(N), MVT::i32);
 572   return true;
 573 }
 574
 575 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 576                                               SDValue &BaseReg,
 577                                               SDValue &ShReg,
 578                                               SDValue &Opc,
 579                                               bool CheckProfitability) {
 580   if (DisableShifterOp)
 581     return false;
 582
 583   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 584
 585   // Don't match base register only case. That is matched to a separate
 586   // lower complexity pattern with explicit register operand.
 587   if (ShOpcVal == ARM_AM::no_shift) return false;
 588
 589   BaseReg = N.getOperand(0);
 590   unsigned ShImmVal = 0;
 591   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 592   if (RHS) return false;
 593
 594   ShReg = N.getOperand(1);
 595   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 596     return false;
 597   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 598                                   SDLoc(N), MVT::i32);
 599   return true;
 600 }
 601
 602
 603 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 604                                           SDValue &Base,
 605                                           SDValue &OffImm) {
 606   // Match simple R + imm12 operands.
 607
 608   // Base only.
 609   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 610       !CurDAG->isBaseWithConstantOffset(N)) {
 611     if (N.getOpcode() == ISD::FrameIndex) {
 612       // Match frame index.
 613       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 614       Base = CurDAG->getTargetFrameIndex(
 615           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 616       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 617       return true;
 618     }
 619
 620     if (N.getOpcode() == ARMISD::Wrapper &&
 621         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 622         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 623         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 624       Base = N.getOperand(0);
 625     } else
 626       Base = N;
 627     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 628     return true;
 629   }
 630
 631   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 632     int RHSC = (int)RHS->getSExtValue();
 633     if (N.getOpcode() == ISD::SUB)
 634       RHSC = -RHSC;
 635
 636     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 637       Base   = N.getOperand(0);
 638       if (Base.getOpcode() == ISD::FrameIndex) {
 639         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 640         Base = CurDAG->getTargetFrameIndex(
 641             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 642       }
 643       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 644       return true;
 645     }
 646   }
 647
 648   // Base only.
 649   Base = N;
 650   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 651   return true;
 652 }
 653
 654
 655
 656 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 657                                       SDValue &Opc) {
 658   if (N.getOpcode() == ISD::MUL &&
 659       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 660     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 661       // X * [3,5,9] -> X + X * [2,4,8] etc.
 662       int RHSC = (int)RHS->getZExtValue();
 663       if (RHSC & 1) {
 664         RHSC = RHSC & ~1;
 665         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 666         if (RHSC < 0) {
 667           AddSub = ARM_AM::sub;
 668           RHSC = - RHSC;
 669         }
 670         if (isPowerOf2_32(RHSC)) {
 671           unsigned ShAmt = Log2_32(RHSC);
 672           Base = Offset = N.getOperand(0);
 673           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 674                                                             ARM_AM::lsl),
 675                                           SDLoc(N), MVT::i32);
 676           return true;
 677         }
 678       }
 679     }
 680   }
 681
 682   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 683       // ISD::OR that is equivalent to an ISD::ADD.
 684       !CurDAG->isBaseWithConstantOffset(N))
 685     return false;
 686
 687   // Leave simple R +/- imm12 operands for LDRi12
 688   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 689     int RHSC;
 690     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 691                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 692       return false;
 693   }
 694
 695   // Otherwise this is R +/- [possibly shifted] R.
 696   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 697   ARM_AM::ShiftOpc ShOpcVal =
 698     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 699   unsigned ShAmt = 0;
 700
 701   Base   = N.getOperand(0);
 702   Offset = N.getOperand(1);
 703
 704   if (ShOpcVal != ARM_AM::no_shift) {
 705     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 706     // it.
 707     if (ConstantSDNode *Sh =
 708            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 709       ShAmt = Sh->getZExtValue();
 710       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 711         Offset = N.getOperand(1).getOperand(0);
 712       else {
 713         ShAmt = 0;
 714         ShOpcVal = ARM_AM::no_shift;
 715       }
 716     } else {
 717       ShOpcVal = ARM_AM::no_shift;
 718     }
 719   }
 720
 721   // Try matching (R shl C) + (R).
 722   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 723       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 724         N.getOperand(0).hasOneUse())) {
 725     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 726     if (ShOpcVal != ARM_AM::no_shift) {
 727       // Check to see if the RHS of the shift is a constant, if not, we can't
 728       // fold it.
 729       if (ConstantSDNode *Sh =
 730           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 731         ShAmt = Sh->getZExtValue();
 732         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 733           Offset = N.getOperand(0).getOperand(0);
 734           Base = N.getOperand(1);
 735         } else {
 736           ShAmt = 0;
 737           ShOpcVal = ARM_AM::no_shift;
 738         }
 739       } else {
 740         ShOpcVal = ARM_AM::no_shift;
 741       }
 742     }
 743   }
 744
 745   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 746   // and use it in a shifted operand do so.
 747   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 748     unsigned PowerOfTwo = 0;
 749     SDValue NewMulConst;
 750     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 751       HandleSDNode Handle(Offset);
 752       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 753       Offset = Handle.getValue();
 754       ShAmt = PowerOfTwo;
 755       ShOpcVal = ARM_AM::lsl;
 756     }
 757   }
 758
 759   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 760                                   SDLoc(N), MVT::i32);
 761   return true;
 762 }
 763
 764
 765 //-----
 766
 767 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 768                                                      SDValue &Base,
 769                                                      SDValue &Offset,
 770                                                      SDValue &Opc) {
 771   if (N.getOpcode() == ISD::MUL &&
 772       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 773     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 774       // X * [3,5,9] -> X + X * [2,4,8] etc.
 775       int RHSC = (int)RHS->getZExtValue();
 776       if (RHSC & 1) {
 777         RHSC = RHSC & ~1;
 778         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 779         if (RHSC < 0) {
 780           AddSub = ARM_AM::sub;
 781           RHSC = - RHSC;
 782         }
 783         if (isPowerOf2_32(RHSC)) {
 784           unsigned ShAmt = Log2_32(RHSC);
 785           Base = Offset = N.getOperand(0);
 786           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 787                                                             ARM_AM::lsl),
 788                                           SDLoc(N), MVT::i32);
 789           return AM2_SHOP;
 790         }
 791       }
 792     }
 793   }
 794
 795   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 796       // ISD::OR that is equivalent to an ADD.
 797       !CurDAG->isBaseWithConstantOffset(N)) {
 798     Base = N;
 799     if (N.getOpcode() == ISD::FrameIndex) {
 800       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 801       Base = CurDAG->getTargetFrameIndex(
 802           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 803     } else if (N.getOpcode() == ARMISD::Wrapper &&
 804                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 805                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 806                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 807       Base = N.getOperand(0);
 808     }
 809     Offset = CurDAG->getRegister(0, MVT::i32);
 810     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 811                                                       ARM_AM::no_shift),
 812                                     SDLoc(N), MVT::i32);
 813     return AM2_BASE;
 814   }
 815
 816   // Match simple R +/- imm12 operands.
 817   if (N.getOpcode() != ISD::SUB) {
 818     int RHSC;
 819     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 820                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 821       Base = N.getOperand(0);
 822       if (Base.getOpcode() == ISD::FrameIndex) {
 823         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 824         Base = CurDAG->getTargetFrameIndex(
 825             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 826       }
 827       Offset = CurDAG->getRegister(0, MVT::i32);
 828
 829       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 830       if (RHSC < 0) {
 831         AddSub = ARM_AM::sub;
 832         RHSC = - RHSC;
 833       }
 834       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 835                                                         ARM_AM::no_shift),
 836                                       SDLoc(N), MVT::i32);
 837       return AM2_BASE;
 838     }
 839   }
 840
 841   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 842     // Compute R +/- (R << N) and reuse it.
 843     Base = N;
 844     Offset = CurDAG->getRegister(0, MVT::i32);
 845     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 846                                                       ARM_AM::no_shift),
 847                                     SDLoc(N), MVT::i32);
 848     return AM2_BASE;
 849   }
 850
 851   // Otherwise this is R +/- [possibly shifted] R.
 852   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 853   ARM_AM::ShiftOpc ShOpcVal =
 854     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 855   unsigned ShAmt = 0;
 856
 857   Base   = N.getOperand(0);
 858   Offset = N.getOperand(1);
 859
 860   if (ShOpcVal != ARM_AM::no_shift) {
 861     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 862     // it.
 863     if (ConstantSDNode *Sh =
 864            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 865       ShAmt = Sh->getZExtValue();
 866       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 867         Offset = N.getOperand(1).getOperand(0);
 868       else {
 869         ShAmt = 0;
 870         ShOpcVal = ARM_AM::no_shift;
 871       }
 872     } else {
 873       ShOpcVal = ARM_AM::no_shift;
 874     }
 875   }
 876
 877   // Try matching (R shl C) + (R).
 878   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 879       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 880         N.getOperand(0).hasOneUse())) {
 881     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 882     if (ShOpcVal != ARM_AM::no_shift) {
 883       // Check to see if the RHS of the shift is a constant, if not, we can't
 884       // fold it.
 885       if (ConstantSDNode *Sh =
 886           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 887         ShAmt = Sh->getZExtValue();
 888         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 889           Offset = N.getOperand(0).getOperand(0);
 890           Base = N.getOperand(1);
 891         } else {
 892           ShAmt = 0;
 893           ShOpcVal = ARM_AM::no_shift;
 894         }
 895       } else {
 896         ShOpcVal = ARM_AM::no_shift;
 897       }
 898     }
 899   }
 900
 901   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 902                                   SDLoc(N), MVT::i32);
 903   return AM2_SHOP;
 904 }
 905
 906 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 907                                             SDValue &Offset, SDValue &Opc) {
 908   unsigned Opcode = Op->getOpcode();
 909   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 910     ? cast<LoadSDNode>(Op)->getAddressingMode()
 911     : cast<StoreSDNode>(Op)->getAddressingMode();
 912   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 913     ? ARM_AM::add : ARM_AM::sub;
 914   int Val;
 915   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 916     return false;
 917
 918   Offset = N;
 919   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 920   unsigned ShAmt = 0;
 921   if (ShOpcVal != ARM_AM::no_shift) {
 922     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 923     // it.
 924     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 925       ShAmt = Sh->getZExtValue();
 926       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 927         Offset = N.getOperand(0);
 928       else {
 929         ShAmt = 0;
 930         ShOpcVal = ARM_AM::no_shift;
 931       }
 932     } else {
 933       ShOpcVal = ARM_AM::no_shift;
 934     }
 935   }
 936
 937   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 938                                   SDLoc(N), MVT::i32);
 939   return true;
 940 }
 941
 942 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 943                                             SDValue &Offset, SDValue &Opc) {
 944   unsigned Opcode = Op->getOpcode();
 945   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 946     ? cast<LoadSDNode>(Op)->getAddressingMode()
 947     : cast<StoreSDNode>(Op)->getAddressingMode();
 948   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 949     ? ARM_AM::add : ARM_AM::sub;
 950   int Val;
 951   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 952     if (AddSub == ARM_AM::sub) Val *= -1;
 953     Offset = CurDAG->getRegister(0, MVT::i32);
 954     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 955     return true;
 956   }
 957
 958   return false;
 959 }
 960
 961
 962 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 963                                             SDValue &Offset, SDValue &Opc) {
 964   unsigned Opcode = Op->getOpcode();
 965   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 966     ? cast<LoadSDNode>(Op)->getAddressingMode()
 967     : cast<StoreSDNode>(Op)->getAddressingMode();
 968   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 969     ? ARM_AM::add : ARM_AM::sub;
 970   int Val;
 971   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 972     Offset = CurDAG->getRegister(0, MVT::i32);
 973     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 974                                                       ARM_AM::no_shift),
 975                                     SDLoc(Op), MVT::i32);
 976     return true;
 977   }
 978
 979   return false;
 980 }
 981
 982 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 983   Base = N;
 984   return true;
 985 }
 986
 987 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 988                                       SDValue &Base, SDValue &Offset,
 989                                       SDValue &Opc) {
 990   if (N.getOpcode() == ISD::SUB) {
 991     // X - C  is canonicalize to X + -C, no need to handle it here.
 992     Base = N.getOperand(0);
 993     Offset = N.getOperand(1);
 994     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 995                                     MVT::i32);
 996     return true;
 997   }
 998
 999   if (!CurDAG->isBaseWithConstantOffset(N)) {
1000     Base = N;
1001     if (N.getOpcode() == ISD::FrameIndex) {
1002       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1003       Base = CurDAG->getTargetFrameIndex(
1004           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1005     }
1006     Offset = CurDAG->getRegister(0, MVT::i32);
1007     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1008                                     MVT::i32);
1009     return true;
1010   }
1011
1012   // If the RHS is +/- imm8, fold into addr mode.
1013   int RHSC;
1014   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1015                               -256 + 1, 256, RHSC)) { // 8 bits.
1016     Base = N.getOperand(0);
1017     if (Base.getOpcode() == ISD::FrameIndex) {
1018       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1019       Base = CurDAG->getTargetFrameIndex(
1020           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1021     }
1022     Offset = CurDAG->getRegister(0, MVT::i32);
1023
1024     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1025     if (RHSC < 0) {
1026       AddSub = ARM_AM::sub;
1027       RHSC = -RHSC;
1028     }
1029     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1030                                     MVT::i32);
1031     return true;
1032   }
1033
1034   Base = N.getOperand(0);
1035   Offset = N.getOperand(1);
1036   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1037                                   MVT::i32);
1038   return true;
1039 }
1040
1041 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1042                                             SDValue &Offset, SDValue &Opc) {
1043   unsigned Opcode = Op->getOpcode();
1044   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1045     ? cast<LoadSDNode>(Op)->getAddressingMode()
1046     : cast<StoreSDNode>(Op)->getAddressingMode();
1047   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1048     ? ARM_AM::add : ARM_AM::sub;
1049   int Val;
1050   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1051     Offset = CurDAG->getRegister(0, MVT::i32);
1052     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1053                                     MVT::i32);
1054     return true;
1055   }
1056
1057   Offset = N;
1058   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1059                                   MVT::i32);
1060   return true;
1061 }
1062
1063 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1064                                       SDValue &Base, SDValue &Offset) {
1065   if (!CurDAG->isBaseWithConstantOffset(N)) {
1066     Base = N;
1067     if (N.getOpcode() == ISD::FrameIndex) {
1068       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1069       Base = CurDAG->getTargetFrameIndex(
1070           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1071     } else if (N.getOpcode() == ARMISD::Wrapper &&
1072                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1073                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1074                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1075       Base = N.getOperand(0);
1076     }
1077     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1078                                        SDLoc(N), MVT::i32);
1079     return true;
1080   }
1081
1082   // If the RHS is +/- imm8, fold into addr mode.
1083   int RHSC;
1084   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1085                               -256 + 1, 256, RHSC)) {
1086     Base = N.getOperand(0);
1087     if (Base.getOpcode() == ISD::FrameIndex) {
1088       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1089       Base = CurDAG->getTargetFrameIndex(
1090           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1091     }
1092
1093     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1094     if (RHSC < 0) {
1095       AddSub = ARM_AM::sub;
1096       RHSC = -RHSC;
1097     }
1098     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1099                                        SDLoc(N), MVT::i32);
1100     return true;
1101   }
1102
1103   Base = N;
1104   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1105                                      SDLoc(N), MVT::i32);
1106   return true;
1107 }
1108
1109 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1110                                       SDValue &Align) {
1111   Addr = N;
1112
1113   unsigned Alignment = 0;
1114
1115   MemSDNode *MemN = cast<MemSDNode>(Parent);
1116
1117   if (isa<LSBaseSDNode>(MemN) ||
1118       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1119         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1120        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1121     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1122     // The maximum alignment is equal to the memory size being referenced.
1123     unsigned MMOAlign = MemN->getAlignment();
1124     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1125     if (MMOAlign >= MemSize && MemSize > 1)
1126       Alignment = MemSize;
1127   } else {
1128     // All other uses of addrmode6 are for intrinsics.  For now just record
1129     // the raw alignment value; it will be refined later based on the legal
1130     // alignment operands for the intrinsic.
1131     Alignment = MemN->getAlignment();
1132   }
1133
1134   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1135   return true;
1136 }
1137
1138 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1139                                             SDValue &Offset) {
1140   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1141   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1142   if (AM != ISD::POST_INC)
1143     return false;
1144   Offset = N;
1145   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1146     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1147       Offset = CurDAG->getRegister(0, MVT::i32);
1148   }
1149   return true;
1150 }
1151
1152 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1153                                        SDValue &Offset, SDValue &Label) {
1154   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1155     Offset = N.getOperand(0);
1156     SDValue N1 = N.getOperand(1);
1157     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1158                                       SDLoc(N), MVT::i32);
1159     return true;
1160   }
1161
1162   return false;
1163 }
1164
1165
1166 //===----------------------------------------------------------------------===//
1167 //                         Thumb Addressing Modes
1168 //===----------------------------------------------------------------------===//
1169
1170 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1171                                             SDValue &Base, SDValue &Offset){
1172   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1173     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1174     if (!NC || !NC->isNullValue())
1175       return false;
1176
1177     Base = Offset = N;
1178     return true;
1179   }
1180
1181   Base = N.getOperand(0);
1182   Offset = N.getOperand(1);
1183   return true;
1184 }
1185
1186 bool
1187 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1188                                           SDValue &Base, SDValue &OffImm) {
1189   if (!CurDAG->isBaseWithConstantOffset(N)) {
1190     if (N.getOpcode() == ISD::ADD) {
1191       return false; // We want to select register offset instead
1192     } else if (N.getOpcode() == ARMISD::Wrapper &&
1193         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1194         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1195         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1196         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1197       Base = N.getOperand(0);
1198     } else {
1199       Base = N;
1200     }
1201
1202     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1203     return true;
1204   }
1205
1206   // If the RHS is + imm5 * scale, fold into addr mode.
1207   int RHSC;
1208   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1209     Base = N.getOperand(0);
1210     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1211     return true;
1212   }
1213
1214   // Offset is too large, so use register offset instead.
1215   return false;
1216 }
1217
1218 bool
1219 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1220                                            SDValue &OffImm) {
1221   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1222 }
1223
1224 bool
1225 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1226                                            SDValue &OffImm) {
1227   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1228 }
1229
1230 bool
1231 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1232                                            SDValue &OffImm) {
1233   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1234 }
1235
1236 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1237                                             SDValue &Base, SDValue &OffImm) {
1238   if (N.getOpcode() == ISD::FrameIndex) {
1239     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1240     // Only multiples of 4 are allowed for the offset, so the frame object
1241     // alignment must be at least 4.
1242     MachineFrameInfo &MFI = MF->getFrameInfo();
1243     if (MFI.getObjectAlignment(FI) < 4)
1244       MFI.setObjectAlignment(FI, 4);
1245     Base = CurDAG->getTargetFrameIndex(
1246         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1247     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1248     return true;
1249   }
1250
1251   if (!CurDAG->isBaseWithConstantOffset(N))
1252     return false;
1253
1254   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1255   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1256       (LHSR && LHSR->getReg() == ARM::SP)) {
1257     // If the RHS is + imm8 * scale, fold into addr mode.
1258     int RHSC;
1259     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1260       Base = N.getOperand(0);
1261       if (Base.getOpcode() == ISD::FrameIndex) {
1262         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1263         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1264         // indexed by the LHS must be 4-byte aligned.
1265         MachineFrameInfo &MFI = MF->getFrameInfo();
1266         if (MFI.getObjectAlignment(FI) < 4)
1267           MFI.setObjectAlignment(FI, 4);
1268         Base = CurDAG->getTargetFrameIndex(
1269             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1270       }
1271       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1272       return true;
1273     }
1274   }
1275
1276   return false;
1277 }
1278
1279
1280 //===----------------------------------------------------------------------===//
1281 //                        Thumb 2 Addressing Modes
1282 //===----------------------------------------------------------------------===//
1283
1284
1285 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1286                                             SDValue &Base, SDValue &OffImm) {
1287   // Match simple R + imm12 operands.
1288
1289   // Base only.
1290   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1291       !CurDAG->isBaseWithConstantOffset(N)) {
1292     if (N.getOpcode() == ISD::FrameIndex) {
1293       // Match frame index.
1294       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1295       Base = CurDAG->getTargetFrameIndex(
1296           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1297       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1298       return true;
1299     }
1300
1301     if (N.getOpcode() == ARMISD::Wrapper &&
1302         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1303         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1304         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1305       Base = N.getOperand(0);
1306       if (Base.getOpcode() == ISD::TargetConstantPool)
1307         return false;  // We want to select t2LDRpci instead.
1308     } else
1309       Base = N;
1310     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1311     return true;
1312   }
1313
1314   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1315     if (SelectT2AddrModeImm8(N, Base, OffImm))
1316       // Let t2LDRi8 handle (R - imm8).
1317       return false;
1318
1319     int RHSC = (int)RHS->getZExtValue();
1320     if (N.getOpcode() == ISD::SUB)
1321       RHSC = -RHSC;
1322
1323     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1324       Base   = N.getOperand(0);
1325       if (Base.getOpcode() == ISD::FrameIndex) {
1326         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1327         Base = CurDAG->getTargetFrameIndex(
1328             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1329       }
1330       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1331       return true;
1332     }
1333   }
1334
1335   // Base only.
1336   Base = N;
1337   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1338   return true;
1339 }
1340
1341 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1342                                            SDValue &Base, SDValue &OffImm) {
1343   // Match simple R - imm8 operands.
1344   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1345       !CurDAG->isBaseWithConstantOffset(N))
1346     return false;
1347
1348   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1349     int RHSC = (int)RHS->getSExtValue();
1350     if (N.getOpcode() == ISD::SUB)
1351       RHSC = -RHSC;
1352
1353     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1354       Base = N.getOperand(0);
1355       if (Base.getOpcode() == ISD::FrameIndex) {
1356         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1357         Base = CurDAG->getTargetFrameIndex(
1358             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1359       }
1360       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1361       return true;
1362     }
1363   }
1364
1365   return false;
1366 }
1367
1368 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1369                                                  SDValue &OffImm){
1370   unsigned Opcode = Op->getOpcode();
1371   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1372     ? cast<LoadSDNode>(Op)->getAddressingMode()
1373     : cast<StoreSDNode>(Op)->getAddressingMode();
1374   int RHSC;
1375   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1376     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1377       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1378       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1379     return true;
1380   }
1381
1382   return false;
1383 }
1384
1385 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1386                                             SDValue &Base,
1387                                             SDValue &OffReg, SDValue &ShImm) {
1388   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1389   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1390     return false;
1391
1392   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1393   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1394     int RHSC = (int)RHS->getZExtValue();
1395     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1396       return false;
1397     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1398       return false;
1399   }
1400
1401   // Look for (R + R) or (R + (R << [1,2,3])).
1402   unsigned ShAmt = 0;
1403   Base   = N.getOperand(0);
1404   OffReg = N.getOperand(1);
1405
1406   // Swap if it is ((R << c) + R).
1407   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1408   if (ShOpcVal != ARM_AM::lsl) {
1409     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1410     if (ShOpcVal == ARM_AM::lsl)
1411       std::swap(Base, OffReg);
1412   }
1413
1414   if (ShOpcVal == ARM_AM::lsl) {
1415     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1416     // it.
1417     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1418       ShAmt = Sh->getZExtValue();
1419       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1420         OffReg = OffReg.getOperand(0);
1421       else {
1422         ShAmt = 0;
1423       }
1424     }
1425   }
1426
1427   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1428   // and use it in a shifted operand do so.
1429   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1430     unsigned PowerOfTwo = 0;
1431     SDValue NewMulConst;
1432     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1433       HandleSDNode Handle(OffReg);
1434       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1435       OffReg = Handle.getValue();
1436       ShAmt = PowerOfTwo;
1437     }
1438   }
1439
1440   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1441
1442   return true;
1443 }
1444
1445 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1446                                                 SDValue &OffImm) {
1447   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1448   // instructions.
1449   Base = N;
1450   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1451
1452   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1453     return true;
1454
1455   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1456   if (!RHS)
1457     return true;
1458
1459   uint32_t RHSC = (int)RHS->getZExtValue();
1460   if (RHSC > 1020 || RHSC % 4 != 0)
1461     return true;
1462
1463   Base = N.getOperand(0);
1464   if (Base.getOpcode() == ISD::FrameIndex) {
1465     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1466     Base = CurDAG->getTargetFrameIndex(
1467         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1468   }
1469
1470   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1471   return true;
1472 }
1473
1474 //===--------------------------------------------------------------------===//
1475
1476 /// getAL - Returns a ARMCC::AL immediate node.
1477 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1478   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1479 }
1480
1481 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1482   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1483   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1484   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1485 }
1486
1487 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1488   LoadSDNode *LD = cast<LoadSDNode>(N);
1489   ISD::MemIndexedMode AM = LD->getAddressingMode();
1490   if (AM == ISD::UNINDEXED)
1491     return false;
1492
1493   EVT LoadedVT = LD->getMemoryVT();
1494   SDValue Offset, AMOpc;
1495   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1496   unsigned Opcode = 0;
1497   bool Match = false;
1498   if (LoadedVT == MVT::i32 && isPre &&
1499       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1500     Opcode = ARM::LDR_PRE_IMM;
1501     Match = true;
1502   } else if (LoadedVT == MVT::i32 && !isPre &&
1503       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1504     Opcode = ARM::LDR_POST_IMM;
1505     Match = true;
1506   } else if (LoadedVT == MVT::i32 &&
1507       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1508     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1509     Match = true;
1510
1511   } else if (LoadedVT == MVT::i16 &&
1512              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1513     Match = true;
1514     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1515       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1516       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1517   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1518     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1519       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1520         Match = true;
1521         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1522       }
1523     } else {
1524       if (isPre &&
1525           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1526         Match = true;
1527         Opcode = ARM::LDRB_PRE_IMM;
1528       } else if (!isPre &&
1529                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1530         Match = true;
1531         Opcode = ARM::LDRB_POST_IMM;
1532       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1533         Match = true;
1534         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1535       }
1536     }
1537   }
1538
1539   if (Match) {
1540     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1541       SDValue Chain = LD->getChain();
1542       SDValue Base = LD->getBasePtr();
1543       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1544                        CurDAG->getRegister(0, MVT::i32), Chain };
1545       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1546                                            MVT::Other, Ops);
1547       transferMemOperands(N, New);
1548       ReplaceNode(N, New);
1549       return true;
1550     } else {
1551       SDValue Chain = LD->getChain();
1552       SDValue Base = LD->getBasePtr();
1553       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1554                        CurDAG->getRegister(0, MVT::i32), Chain };
1555       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1556                                            MVT::Other, Ops);
1557       transferMemOperands(N, New);
1558       ReplaceNode(N, New);
1559       return true;
1560     }
1561   }
1562
1563   return false;
1564 }
1565
1566 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1567   LoadSDNode *LD = cast<LoadSDNode>(N);
1568   EVT LoadedVT = LD->getMemoryVT();
1569   ISD::MemIndexedMode AM = LD->getAddressingMode();
1570   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1571       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1572     return false;
1573
1574   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1575   if (!COffs || COffs->getZExtValue() != 4)
1576     return false;
1577
1578   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1579   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1580   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1581   // ISel.
1582   SDValue Chain = LD->getChain();
1583   SDValue Base = LD->getBasePtr();
1584   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1585                    CurDAG->getRegister(0, MVT::i32), Chain };
1586   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1587                                        MVT::i32, MVT::Other, Ops);
1588   transferMemOperands(N, New);
1589   ReplaceNode(N, New);
1590   return true;
1591 }
1592
1593 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1594   LoadSDNode *LD = cast<LoadSDNode>(N);
1595   ISD::MemIndexedMode AM = LD->getAddressingMode();
1596   if (AM == ISD::UNINDEXED)
1597     return false;
1598
1599   EVT LoadedVT = LD->getMemoryVT();
1600   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1601   SDValue Offset;
1602   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1603   unsigned Opcode = 0;
1604   bool Match = false;
1605   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1606     switch (LoadedVT.getSimpleVT().SimpleTy) {
1607     case MVT::i32:
1608       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1609       break;
1610     case MVT::i16:
1611       if (isSExtLd)
1612         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1613       else
1614         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1615       break;
1616     case MVT::i8:
1617     case MVT::i1:
1618       if (isSExtLd)
1619         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1620       else
1621         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1622       break;
1623     default:
1624       return false;
1625     }
1626     Match = true;
1627   }
1628
1629   if (Match) {
1630     SDValue Chain = LD->getChain();
1631     SDValue Base = LD->getBasePtr();
1632     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1633                      CurDAG->getRegister(0, MVT::i32), Chain };
1634     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1635                                          MVT::Other, Ops);
1636     transferMemOperands(N, New);
1637     ReplaceNode(N, New);
1638     return true;
1639   }
1640
1641   return false;
1642 }
1643
1644 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1645 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1646   SDLoc dl(V0.getNode());
1647   SDValue RegClass =
1648     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1649   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1650   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1651   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1652   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1653 }
1654
1655 /// \brief Form a D register from a pair of S registers.
1656 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1657   SDLoc dl(V0.getNode());
1658   SDValue RegClass =
1659     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1660   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1661   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1662   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1663   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1664 }
1665
1666 /// \brief Form a quad register from a pair of D registers.
1667 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1668   SDLoc dl(V0.getNode());
1669   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1670                                                MVT::i32);
1671   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1672   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1673   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1674   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1675 }
1676
1677 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1678 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1679   SDLoc dl(V0.getNode());
1680   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1681                                                MVT::i32);
1682   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1683   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1684   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1685   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1686 }
1687
1688 /// \brief Form 4 consecutive S registers.
1689 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1690                                    SDValue V2, SDValue V3) {
1691   SDLoc dl(V0.getNode());
1692   SDValue RegClass =
1693     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1694   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1695   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1696   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1697   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1698   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1699                                     V2, SubReg2, V3, SubReg3 };
1700   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1701 }
1702
1703 /// \brief Form 4 consecutive D registers.
1704 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1705                                    SDValue V2, SDValue V3) {
1706   SDLoc dl(V0.getNode());
1707   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1708                                                MVT::i32);
1709   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1710   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1711   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1712   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1713   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1714                                     V2, SubReg2, V3, SubReg3 };
1715   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1716 }
1717
1718 /// \brief Form 4 consecutive Q registers.
1719 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1720                                    SDValue V2, SDValue V3) {
1721   SDLoc dl(V0.getNode());
1722   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1723                                                MVT::i32);
1724   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1725   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1726   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1727   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1728   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1729                                     V2, SubReg2, V3, SubReg3 };
1730   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1731 }
1732
1733 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1734 /// of a NEON VLD or VST instruction.  The supported values depend on the
1735 /// number of registers being loaded.
1736 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1737                                        unsigned NumVecs, bool is64BitVector) {
1738   unsigned NumRegs = NumVecs;
1739   if (!is64BitVector && NumVecs < 3)
1740     NumRegs *= 2;
1741
1742   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1743   if (Alignment >= 32 && NumRegs == 4)
1744     Alignment = 32;
1745   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1746     Alignment = 16;
1747   else if (Alignment >= 8)
1748     Alignment = 8;
1749   else
1750     Alignment = 0;
1751
1752   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1753 }
1754
1755 static bool isVLDfixed(unsigned Opc)
1756 {
1757   switch (Opc) {
1758   default: return false;
1759   case ARM::VLD1d8wb_fixed : return true;
1760   case ARM::VLD1d16wb_fixed : return true;
1761   case ARM::VLD1d64Qwb_fixed : return true;
1762   case ARM::VLD1d32wb_fixed : return true;
1763   case ARM::VLD1d64wb_fixed : return true;
1764   case ARM::VLD1d64TPseudoWB_fixed : return true;
1765   case ARM::VLD1d64QPseudoWB_fixed : return true;
1766   case ARM::VLD1q8wb_fixed : return true;
1767   case ARM::VLD1q16wb_fixed : return true;
1768   case ARM::VLD1q32wb_fixed : return true;
1769   case ARM::VLD1q64wb_fixed : return true;
1770   case ARM::VLD1DUPd8wb_fixed : return true;
1771   case ARM::VLD1DUPd16wb_fixed : return true;
1772   case ARM::VLD1DUPd32wb_fixed : return true;
1773   case ARM::VLD1DUPq8wb_fixed : return true;
1774   case ARM::VLD1DUPq16wb_fixed : return true;
1775   case ARM::VLD1DUPq32wb_fixed : return true;
1776   case ARM::VLD2d8wb_fixed : return true;
1777   case ARM::VLD2d16wb_fixed : return true;
1778   case ARM::VLD2d32wb_fixed : return true;
1779   case ARM::VLD2q8PseudoWB_fixed : return true;
1780   case ARM::VLD2q16PseudoWB_fixed : return true;
1781   case ARM::VLD2q32PseudoWB_fixed : return true;
1782   case ARM::VLD2DUPd8wb_fixed : return true;
1783   case ARM::VLD2DUPd16wb_fixed : return true;
1784   case ARM::VLD2DUPd32wb_fixed : return true;
1785   }
1786 }
1787
1788 static bool isVSTfixed(unsigned Opc)
1789 {
1790   switch (Opc) {
1791   default: return false;
1792   case ARM::VST1d8wb_fixed : return true;
1793   case ARM::VST1d16wb_fixed : return true;
1794   case ARM::VST1d32wb_fixed : return true;
1795   case ARM::VST1d64wb_fixed : return true;
1796   case ARM::VST1q8wb_fixed : return true;
1797   case ARM::VST1q16wb_fixed : return true;
1798   case ARM::VST1q32wb_fixed : return true;
1799   case ARM::VST1q64wb_fixed : return true;
1800   case ARM::VST1d64TPseudoWB_fixed : return true;
1801   case ARM::VST1d64QPseudoWB_fixed : return true;
1802   case ARM::VST2d8wb_fixed : return true;
1803   case ARM::VST2d16wb_fixed : return true;
1804   case ARM::VST2d32wb_fixed : return true;
1805   case ARM::VST2q8PseudoWB_fixed : return true;
1806   case ARM::VST2q16PseudoWB_fixed : return true;
1807   case ARM::VST2q32PseudoWB_fixed : return true;
1808   }
1809 }
1810
1811 // Get the register stride update opcode of a VLD/VST instruction that
1812 // is otherwise equivalent to the given fixed stride updating instruction.
1813 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1814   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1815     && "Incorrect fixed stride updating instruction.");
1816   switch (Opc) {
1817   default: break;
1818   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1819   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1820   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1821   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1822   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1823   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1824   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1825   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1826   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1827   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1828   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1829   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1830   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1831   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1832   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1833   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1834   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1835   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1836
1837   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1838   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1839   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1840   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1841   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1842   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1843   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1844   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1845   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1846   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1847
1848   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1849   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1850   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1851   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1852   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1853   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1854
1855   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1856   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1857   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1858   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1859   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1860   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1861
1862   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1863   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1864   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1865   }
1866   return Opc; // If not one we handle, return it unchanged.
1867 }
1868
1869 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1870                                 const uint16_t *DOpcodes,
1871                                 const uint16_t *QOpcodes0,
1872                                 const uint16_t *QOpcodes1) {
1873   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1874   SDLoc dl(N);
1875
1876   SDValue MemAddr, Align;
1877   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1878   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1879     return;
1880
1881   SDValue Chain = N->getOperand(0);
1882   EVT VT = N->getValueType(0);
1883   bool is64BitVector = VT.is64BitVector();
1884   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1885
1886   unsigned OpcodeIndex;
1887   switch (VT.getSimpleVT().SimpleTy) {
1888   default: llvm_unreachable("unhandled vld type");
1889     // Double-register operations:
1890   case MVT::v8i8:  OpcodeIndex = 0; break;
1891   case MVT::v4i16: OpcodeIndex = 1; break;
1892   case MVT::v2f32:
1893   case MVT::v2i32: OpcodeIndex = 2; break;
1894   case MVT::v1i64: OpcodeIndex = 3; break;
1895     // Quad-register operations:
1896   case MVT::v16i8: OpcodeIndex = 0; break;
1897   case MVT::v8i16: OpcodeIndex = 1; break;
1898   case MVT::v4f32:
1899   case MVT::v4i32: OpcodeIndex = 2; break;
1900   case MVT::v2f64:
1901   case MVT::v2i64: OpcodeIndex = 3;
1902     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1903     break;
1904   }
1905
1906   EVT ResTy;
1907   if (NumVecs == 1)
1908     ResTy = VT;
1909   else {
1910     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1911     if (!is64BitVector)
1912       ResTyElts *= 2;
1913     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1914   }
1915   std::vector<EVT> ResTys;
1916   ResTys.push_back(ResTy);
1917   if (isUpdating)
1918     ResTys.push_back(MVT::i32);
1919   ResTys.push_back(MVT::Other);
1920
1921   SDValue Pred = getAL(CurDAG, dl);
1922   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1923   SDNode *VLd;
1924   SmallVector<SDValue, 7> Ops;
1925
1926   // Double registers and VLD1/VLD2 quad registers are directly supported.
1927   if (is64BitVector || NumVecs <= 2) {
1928     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1929                     QOpcodes0[OpcodeIndex]);
1930     Ops.push_back(MemAddr);
1931     Ops.push_back(Align);
1932     if (isUpdating) {
1933       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1934       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1935       // case entirely when the rest are updated to that form, too.
1936       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1937         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1938       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1939       // check for that explicitly too. Horribly hacky, but temporary.
1940       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1941           !isa<ConstantSDNode>(Inc.getNode()))
1942         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1943     }
1944     Ops.push_back(Pred);
1945     Ops.push_back(Reg0);
1946     Ops.push_back(Chain);
1947     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1948
1949   } else {
1950     // Otherwise, quad registers are loaded with two separate instructions,
1951     // where one loads the even registers and the other loads the odd registers.
1952     EVT AddrTy = MemAddr.getValueType();
1953
1954     // Load the even subregs.  This is always an updating load, so that it
1955     // provides the address to the second load for the odd subregs.
1956     SDValue ImplDef =
1957       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1958     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1959     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1960                                           ResTy, AddrTy, MVT::Other, OpsA);
1961     Chain = SDValue(VLdA, 2);
1962
1963     // Load the odd subregs.
1964     Ops.push_back(SDValue(VLdA, 1));
1965     Ops.push_back(Align);
1966     if (isUpdating) {
1967       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1968       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1969              "only constant post-increment update allowed for VLD3/4");
1970       (void)Inc;
1971       Ops.push_back(Reg0);
1972     }
1973     Ops.push_back(SDValue(VLdA, 0));
1974     Ops.push_back(Pred);
1975     Ops.push_back(Reg0);
1976     Ops.push_back(Chain);
1977     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1978   }
1979
1980   // Transfer memoperands.
1981   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1982   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1983   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1984
1985   if (NumVecs == 1) {
1986     ReplaceNode(N, VLd);
1987     return;
1988   }
1989
1990   // Extract out the subregisters.
1991   SDValue SuperReg = SDValue(VLd, 0);
1992   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1993                     ARM::qsub_3 == ARM::qsub_0 + 3,
1994                 "Unexpected subreg numbering");
1995   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1996   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1997     ReplaceUses(SDValue(N, Vec),
1998                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1999   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2000   if (isUpdating)
2001     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2002   CurDAG->RemoveDeadNode(N);
2003 }
2004
2005 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2006                                 const uint16_t *DOpcodes,
2007                                 const uint16_t *QOpcodes0,
2008                                 const uint16_t *QOpcodes1) {
2009   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2010   SDLoc dl(N);
2011
2012   SDValue MemAddr, Align;
2013   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2014   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2015   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2016     return;
2017
2018   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2019   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2020
2021   SDValue Chain = N->getOperand(0);
2022   EVT VT = N->getOperand(Vec0Idx).getValueType();
2023   bool is64BitVector = VT.is64BitVector();
2024   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2025
2026   unsigned OpcodeIndex;
2027   switch (VT.getSimpleVT().SimpleTy) {
2028   default: llvm_unreachable("unhandled vst type");
2029     // Double-register operations:
2030   case MVT::v8i8:  OpcodeIndex = 0; break;
2031   case MVT::v4i16: OpcodeIndex = 1; break;
2032   case MVT::v2f32:
2033   case MVT::v2i32: OpcodeIndex = 2; break;
2034   case MVT::v1i64: OpcodeIndex = 3; break;
2035     // Quad-register operations:
2036   case MVT::v16i8: OpcodeIndex = 0; break;
2037   case MVT::v8i16: OpcodeIndex = 1; break;
2038   case MVT::v4f32:
2039   case MVT::v4i32: OpcodeIndex = 2; break;
2040   case MVT::v2f64:
2041   case MVT::v2i64: OpcodeIndex = 3;
2042     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2043     break;
2044   }
2045
2046   std::vector<EVT> ResTys;
2047   if (isUpdating)
2048     ResTys.push_back(MVT::i32);
2049   ResTys.push_back(MVT::Other);
2050
2051   SDValue Pred = getAL(CurDAG, dl);
2052   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2053   SmallVector<SDValue, 7> Ops;
2054
2055   // Double registers and VST1/VST2 quad registers are directly supported.
2056   if (is64BitVector || NumVecs <= 2) {
2057     SDValue SrcReg;
2058     if (NumVecs == 1) {
2059       SrcReg = N->getOperand(Vec0Idx);
2060     } else if (is64BitVector) {
2061       // Form a REG_SEQUENCE to force register allocation.
2062       SDValue V0 = N->getOperand(Vec0Idx + 0);
2063       SDValue V1 = N->getOperand(Vec0Idx + 1);
2064       if (NumVecs == 2)
2065         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2066       else {
2067         SDValue V2 = N->getOperand(Vec0Idx + 2);
2068         // If it's a vst3, form a quad D-register and leave the last part as
2069         // an undef.
2070         SDValue V3 = (NumVecs == 3)
2071           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2072           : N->getOperand(Vec0Idx + 3);
2073         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2074       }
2075     } else {
2076       // Form a QQ register.
2077       SDValue Q0 = N->getOperand(Vec0Idx);
2078       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2079       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2080     }
2081
2082     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2083                     QOpcodes0[OpcodeIndex]);
2084     Ops.push_back(MemAddr);
2085     Ops.push_back(Align);
2086     if (isUpdating) {
2087       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2088       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2089       // case entirely when the rest are updated to that form, too.
2090       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2091         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2092       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2093       // check for that explicitly too. Horribly hacky, but temporary.
2094       if  (!isa<ConstantSDNode>(Inc.getNode()))
2095         Ops.push_back(Inc);
2096       else if (NumVecs > 2 && !isVSTfixed(Opc))
2097         Ops.push_back(Reg0);
2098     }
2099     Ops.push_back(SrcReg);
2100     Ops.push_back(Pred);
2101     Ops.push_back(Reg0);
2102     Ops.push_back(Chain);
2103     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2104
2105     // Transfer memoperands.
2106     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2107
2108     ReplaceNode(N, VSt);
2109     return;
2110   }
2111
2112   // Otherwise, quad registers are stored with two separate instructions,
2113   // where one stores the even registers and the other stores the odd registers.
2114
2115   // Form the QQQQ REG_SEQUENCE.
2116   SDValue V0 = N->getOperand(Vec0Idx + 0);
2117   SDValue V1 = N->getOperand(Vec0Idx + 1);
2118   SDValue V2 = N->getOperand(Vec0Idx + 2);
2119   SDValue V3 = (NumVecs == 3)
2120     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2121     : N->getOperand(Vec0Idx + 3);
2122   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2123
2124   // Store the even D registers.  This is always an updating store, so that it
2125   // provides the address to the second store for the odd subregs.
2126   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2127   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2128                                         MemAddr.getValueType(),
2129                                         MVT::Other, OpsA);
2130   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2131   Chain = SDValue(VStA, 1);
2132
2133   // Store the odd D registers.
2134   Ops.push_back(SDValue(VStA, 0));
2135   Ops.push_back(Align);
2136   if (isUpdating) {
2137     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2138     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2139            "only constant post-increment update allowed for VST3/4");
2140     (void)Inc;
2141     Ops.push_back(Reg0);
2142   }
2143   Ops.push_back(RegSeq);
2144   Ops.push_back(Pred);
2145   Ops.push_back(Reg0);
2146   Ops.push_back(Chain);
2147   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2148                                         Ops);
2149   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2150   ReplaceNode(N, VStB);
2151 }
2152
2153 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2154                                       unsigned NumVecs,
2155                                       const uint16_t *DOpcodes,
2156                                       const uint16_t *QOpcodes) {
2157   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2158   SDLoc dl(N);
2159
2160   SDValue MemAddr, Align;
2161   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2162   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2163   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2164     return;
2165
2166   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2167   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2168
2169   SDValue Chain = N->getOperand(0);
2170   unsigned Lane =
2171     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2172   EVT VT = N->getOperand(Vec0Idx).getValueType();
2173   bool is64BitVector = VT.is64BitVector();
2174
2175   unsigned Alignment = 0;
2176   if (NumVecs != 3) {
2177     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2178     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2179     if (Alignment > NumBytes)
2180       Alignment = NumBytes;
2181     if (Alignment < 8 && Alignment < NumBytes)
2182       Alignment = 0;
2183     // Alignment must be a power of two; make sure of that.
2184     Alignment = (Alignment & -Alignment);
2185     if (Alignment == 1)
2186       Alignment = 0;
2187   }
2188   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2189
2190   unsigned OpcodeIndex;
2191   switch (VT.getSimpleVT().SimpleTy) {
2192   default: llvm_unreachable("unhandled vld/vst lane type");
2193     // Double-register operations:
2194   case MVT::v8i8:  OpcodeIndex = 0; break;
2195   case MVT::v4i16: OpcodeIndex = 1; break;
2196   case MVT::v2f32:
2197   case MVT::v2i32: OpcodeIndex = 2; break;
2198     // Quad-register operations:
2199   case MVT::v8i16: OpcodeIndex = 0; break;
2200   case MVT::v4f32:
2201   case MVT::v4i32: OpcodeIndex = 1; break;
2202   }
2203
2204   std::vector<EVT> ResTys;
2205   if (IsLoad) {
2206     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2207     if (!is64BitVector)
2208       ResTyElts *= 2;
2209     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2210                                       MVT::i64, ResTyElts));
2211   }
2212   if (isUpdating)
2213     ResTys.push_back(MVT::i32);
2214   ResTys.push_back(MVT::Other);
2215
2216   SDValue Pred = getAL(CurDAG, dl);
2217   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2218
2219   SmallVector<SDValue, 8> Ops;
2220   Ops.push_back(MemAddr);
2221   Ops.push_back(Align);
2222   if (isUpdating) {
2223     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2224     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2225   }
2226
2227   SDValue SuperReg;
2228   SDValue V0 = N->getOperand(Vec0Idx + 0);
2229   SDValue V1 = N->getOperand(Vec0Idx + 1);
2230   if (NumVecs == 2) {
2231     if (is64BitVector)
2232       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2233     else
2234       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2235   } else {
2236     SDValue V2 = N->getOperand(Vec0Idx + 2);
2237     SDValue V3 = (NumVecs == 3)
2238       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2239       : N->getOperand(Vec0Idx + 3);
2240     if (is64BitVector)
2241       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2242     else
2243       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2244   }
2245   Ops.push_back(SuperReg);
2246   Ops.push_back(getI32Imm(Lane, dl));
2247   Ops.push_back(Pred);
2248   Ops.push_back(Reg0);
2249   Ops.push_back(Chain);
2250
2251   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2252                                   QOpcodes[OpcodeIndex]);
2253   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2254   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2255   if (!IsLoad) {
2256     ReplaceNode(N, VLdLn);
2257     return;
2258   }
2259
2260   // Extract the subregisters.
2261   SuperReg = SDValue(VLdLn, 0);
2262   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2263                     ARM::qsub_3 == ARM::qsub_0 + 3,
2264                 "Unexpected subreg numbering");
2265   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2266   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2267     ReplaceUses(SDValue(N, Vec),
2268                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2269   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2270   if (isUpdating)
2271     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2272   CurDAG->RemoveDeadNode(N);
2273 }
2274
2275 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2276                                    const uint16_t *DOpcodes,
2277                                    const uint16_t *QOpcodes) {
2278   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2279   SDLoc dl(N);
2280
2281   SDValue MemAddr, Align;
2282   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2283     return;
2284
2285   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2286   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2287
2288   SDValue Chain = N->getOperand(0);
2289   EVT VT = N->getValueType(0);
2290
2291   unsigned Alignment = 0;
2292   if (NumVecs != 3) {
2293     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2294     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2295     if (Alignment > NumBytes)
2296       Alignment = NumBytes;
2297     if (Alignment < 8 && Alignment < NumBytes)
2298       Alignment = 0;
2299     // Alignment must be a power of two; make sure of that.
2300     Alignment = (Alignment & -Alignment);
2301     if (Alignment == 1)
2302       Alignment = 0;
2303   }
2304   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2305
2306   unsigned Opc;
2307   switch (VT.getSimpleVT().SimpleTy) {
2308   default: llvm_unreachable("unhandled vld-dup type");
2309   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2310   case MVT::v16i8: Opc = QOpcodes[0]; break;
2311   case MVT::v4i16: Opc = DOpcodes[1]; break;
2312   case MVT::v8i16: Opc = QOpcodes[1]; break;
2313   case MVT::v2f32:
2314   case MVT::v2i32: Opc = DOpcodes[2]; break;
2315   case MVT::v4f32:
2316   case MVT::v4i32: Opc = QOpcodes[2]; break;
2317   }
2318
2319   SDValue Pred = getAL(CurDAG, dl);
2320   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2321   SmallVector<SDValue, 6> Ops;
2322   Ops.push_back(MemAddr);
2323   Ops.push_back(Align);
2324   if (isUpdating) {
2325     // fixed-stride update instructions don't have an explicit writeback
2326     // operand. It's implicit in the opcode itself.
2327     SDValue Inc = N->getOperand(2);
2328     if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2329       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2330     if (!isa<ConstantSDNode>(Inc.getNode()))
2331       Ops.push_back(Inc);
2332     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2333     else if (NumVecs > 2)
2334       Ops.push_back(Reg0);
2335   }
2336   Ops.push_back(Pred);
2337   Ops.push_back(Reg0);
2338   Ops.push_back(Chain);
2339
2340   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2341   std::vector<EVT> ResTys;
2342   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2343   if (isUpdating)
2344     ResTys.push_back(MVT::i32);
2345   ResTys.push_back(MVT::Other);
2346   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2347   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2348
2349   // Extract the subregisters.
2350   if (NumVecs == 1) {
2351     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2352   } else {
2353     SDValue SuperReg = SDValue(VLdDup, 0);
2354     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2355     unsigned SubIdx = ARM::dsub_0;
2356     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2357       ReplaceUses(SDValue(N, Vec),
2358                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2359   }
2360   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2361   if (isUpdating)
2362     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2363   CurDAG->RemoveDeadNode(N);
2364 }
2365
2366 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2367                                  unsigned Opc) {
2368   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2369   SDLoc dl(N);
2370   EVT VT = N->getValueType(0);
2371   unsigned FirstTblReg = IsExt ? 2 : 1;
2372
2373   // Form a REG_SEQUENCE to force register allocation.
2374   SDValue RegSeq;
2375   SDValue V0 = N->getOperand(FirstTblReg + 0);
2376   SDValue V1 = N->getOperand(FirstTblReg + 1);
2377   if (NumVecs == 2)
2378     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2379   else {
2380     SDValue V2 = N->getOperand(FirstTblReg + 2);
2381     // If it's a vtbl3, form a quad D-register and leave the last part as
2382     // an undef.
2383     SDValue V3 = (NumVecs == 3)
2384       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2385       : N->getOperand(FirstTblReg + 3);
2386     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2387   }
2388
2389   SmallVector<SDValue, 6> Ops;
2390   if (IsExt)
2391     Ops.push_back(N->getOperand(1));
2392   Ops.push_back(RegSeq);
2393   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2394   Ops.push_back(getAL(CurDAG, dl)); // predicate
2395   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2396   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2397 }
2398
2399 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2400   if (!Subtarget->hasV6T2Ops())
2401     return false;
2402
2403   unsigned Opc = isSigned
2404     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2405     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2406   SDLoc dl(N);
2407
2408   // For unsigned extracts, check for a shift right and mask
2409   unsigned And_imm = 0;
2410   if (N->getOpcode() == ISD::AND) {
2411     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2412
2413       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2414       if (And_imm & (And_imm + 1))
2415         return false;
2416
2417       unsigned Srl_imm = 0;
2418       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2419                                 Srl_imm)) {
2420         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2421
2422         // Note: The width operand is encoded as width-1.
2423         unsigned Width = countTrailingOnes(And_imm) - 1;
2424         unsigned LSB = Srl_imm;
2425
2426         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2427
2428         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2429           // It's cheaper to use a right shift to extract the top bits.
2430           if (Subtarget->isThumb()) {
2431             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2432             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2433                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2434                               getAL(CurDAG, dl), Reg0, Reg0 };
2435             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2436             return true;
2437           }
2438
2439           // ARM models shift instructions as MOVsi with shifter operand.
2440           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2441           SDValue ShOpc =
2442             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2443                                       MVT::i32);
2444           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2445                             getAL(CurDAG, dl), Reg0, Reg0 };
2446           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2447           return true;
2448         }
2449
2450         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2451                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2452                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2453                           getAL(CurDAG, dl), Reg0 };
2454         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2455         return true;
2456       }
2457     }
2458     return false;
2459   }
2460
2461   // Otherwise, we're looking for a shift of a shift
2462   unsigned Shl_imm = 0;
2463   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2464     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2465     unsigned Srl_imm = 0;
2466     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2467       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2468       // Note: The width operand is encoded as width-1.
2469       unsigned Width = 32 - Srl_imm - 1;
2470       int LSB = Srl_imm - Shl_imm;
2471       if (LSB < 0)
2472         return false;
2473       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2475                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2476                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2477                         getAL(CurDAG, dl), Reg0 };
2478       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2479       return true;
2480     }
2481   }
2482
2483   // Or we are looking for a shift of an and, with a mask operand
2484   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2485       isShiftedMask_32(And_imm)) {
2486     unsigned Srl_imm = 0;
2487     unsigned LSB = countTrailingZeros(And_imm);
2488     // Shift must be the same as the ands lsb
2489     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2490       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2491       unsigned MSB = 31 - countLeadingZeros(And_imm);
2492       // Note: The width operand is encoded as width-1.
2493       unsigned Width = MSB - LSB;
2494       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2495       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2496                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2497                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2498                         getAL(CurDAG, dl), Reg0 };
2499       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2500       return true;
2501     }
2502   }
2503
2504   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2505     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2506     unsigned LSB = 0;
2507     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2508         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2509       return false;
2510
2511     if (LSB + Width > 32)
2512       return false;
2513
2514     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2515     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2516                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2517                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2518                       getAL(CurDAG, dl), Reg0 };
2519     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2520     return true;
2521   }
2522
2523   return false;
2524 }
2525
2526 /// Target-specific DAG combining for ISD::XOR.
2527 /// Target-independent combining lowers SELECT_CC nodes of the form
2528 /// select_cc setg[ge] X,  0,  X, -X
2529 /// select_cc setgt    X, -1,  X, -X
2530 /// select_cc setl[te] X,  0, -X,  X
2531 /// select_cc setlt    X,  1, -X,  X
2532 /// which represent Integer ABS into:
2533 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2534 /// ARM instruction selection detects the latter and matches it to
2535 /// ARM::ABS or ARM::t2ABS machine node.
2536 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2537   SDValue XORSrc0 = N->getOperand(0);
2538   SDValue XORSrc1 = N->getOperand(1);
2539   EVT VT = N->getValueType(0);
2540
2541   if (Subtarget->isThumb1Only())
2542     return false;
2543
2544   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2545     return false;
2546
2547   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2548   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2549   SDValue SRASrc0 = XORSrc1.getOperand(0);
2550   SDValue SRASrc1 = XORSrc1.getOperand(1);
2551   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2552   EVT XType = SRASrc0.getValueType();
2553   unsigned Size = XType.getSizeInBits() - 1;
2554
2555   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2556       XType.isInteger() && SRAConstant != nullptr &&
2557       Size == SRAConstant->getZExtValue()) {
2558     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2559     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2560     return true;
2561   }
2562
2563   return false;
2564 }
2565
2566 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2567                                  bool Accumulate) {
2568   // For SM*WB, we need to some form of sext.
2569   // For SM*WT, we need to search for (sra X, 16)
2570   // Src1 then gets set to X.
2571   if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2572        SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2573        SignExt.getOpcode() == ISD::AssertSext) &&
2574        SignExt.getValueType() == MVT::i32) {
2575
2576     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2577     Src1 = SignExt.getOperand(0);
2578     return true;
2579   }
2580
2581   if (SignExt.getOpcode() != ISD::SRA)
2582     return false;
2583
2584   ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2585   if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2586     return false;
2587
2588   SDValue Op0 = SignExt.getOperand(0);
2589
2590   // The sign extend operand for SM*WB could be generated by a shl and ashr.
2591   if (Op0.getOpcode() == ISD::SHL) {
2592     SDValue SHL = Op0;
2593     ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2594     if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2595       return false;
2596
2597     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2598     Src1 = Op0.getOperand(0);
2599     return true;
2600   }
2601   *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2602   Src1 = SignExt.getOperand(0);
2603   return true;
2604 }
2605
2606 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2607                                 SDValue &Src1, bool Accumulate) {
2608   // First we look for:
2609   // (add (or (srl ?, 16), (shl ?, 16)))
2610   if (OR.getOpcode() != ISD::OR)
2611     return false;
2612
2613   SDValue SRL = OR.getOperand(0);
2614   SDValue SHL = OR.getOperand(1);
2615
2616   if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2617     SRL = OR.getOperand(1);
2618     SHL = OR.getOperand(0);
2619     if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2620       return false;
2621   }
2622
2623   ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2624   ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2625   if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2626       SHLSrc1->getZExtValue() != 16)
2627     return false;
2628
2629   // The first operands to the shifts need to be the two results from the
2630   // same smul_lohi node.
2631   if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2632        SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2633     return false;
2634
2635   SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2636   if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2637       SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2638     return false;
2639
2640   // Now we have:
2641   // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2642   // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2643   // For SMLAWB the 16-bit value will signed extended somehow.
2644   // For SMLAWT only the SRA is required.
2645
2646   // Check both sides of SMUL_LOHI
2647   if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2648     Src0 = SMULLOHI->getOperand(1);
2649   } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2650                                   Accumulate)) {
2651     Src0 = SMULLOHI->getOperand(0);
2652   } else {
2653     return false;
2654   }
2655   return true;
2656 }
2657
2658 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2659   if (!Subtarget->hasV6Ops() ||
2660       (Subtarget->isThumb() && !Subtarget->hasThumb2()))
2661     return false;
2662
2663   SDLoc dl(N);
2664   SDValue Src0 = N->getOperand(0);
2665   SDValue Src1 = N->getOperand(1);
2666   SDValue A, B;
2667   unsigned Opc = 0;
2668
2669   if (N->getOpcode() == ISD::ADD) {
2670     if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2671       return false;
2672
2673     SDValue Acc;
2674     if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2675       Acc = Src1;
2676     } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2677       Acc = Src0;
2678     } else {
2679       return false;
2680     }
2681     if (Opc == 0)
2682       return false;
2683
2684     SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2685                       CurDAG->getRegister(0, MVT::i32) };
2686     CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2687     return true;
2688   } else if (N->getOpcode() == ISD::OR &&
2689              SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2690     if (Opc == 0)
2691       return false;
2692
2693     SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2694                       CurDAG->getRegister(0, MVT::i32)};
2695     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2696     return true;
2697   }
2698   return false;
2699 }
2700
2701 /// We've got special pseudo-instructions for these
2702 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2703   unsigned Opcode;
2704   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2705   if (MemTy == MVT::i8)
2706     Opcode = ARM::CMP_SWAP_8;
2707   else if (MemTy == MVT::i16)
2708     Opcode = ARM::CMP_SWAP_16;
2709   else if (MemTy == MVT::i32)
2710     Opcode = ARM::CMP_SWAP_32;
2711   else
2712     llvm_unreachable("Unknown AtomicCmpSwap type");
2713
2714   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2715                    N->getOperand(0)};
2716   SDNode *CmpSwap = CurDAG->getMachineNode(
2717       Opcode, SDLoc(N),
2718       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2719
2720   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2721   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2722   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2723
2724   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2725   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2726   CurDAG->RemoveDeadNode(N);
2727 }
2728
2729 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2730   // The only time a CONCAT_VECTORS operation can have legal types is when
2731   // two 64-bit vectors are concatenated to a 128-bit vector.
2732   EVT VT = N->getValueType(0);
2733   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2734     llvm_unreachable("unexpected CONCAT_VECTORS");
2735   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2736 }
2737
2738 static Optional<std::pair<unsigned, unsigned>>
2739 getContiguousRangeOfSetBits(const APInt &A) {
2740   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2741   unsigned LastOne = A.countTrailingZeros();
2742   if (A.countPopulation() != (FirstOne - LastOne + 1))
2743     return Optional<std::pair<unsigned,unsigned>>();
2744   return std::make_pair(FirstOne, LastOne);
2745 }
2746
2747 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2748   assert(N->getOpcode() == ARMISD::CMPZ);
2749   SwitchEQNEToPLMI = false;
2750
2751   if (!Subtarget->isThumb())
2752     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2753     // LSR don't exist as standalone instructions - they need the barrel shifter.
2754     return;
2755
2756   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2757   SDValue And = N->getOperand(0);
2758   if (!And->hasOneUse())
2759     return;
2760
2761   SDValue Zero = N->getOperand(1);
2762   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2763       And->getOpcode() != ISD::AND)
2764     return;
2765   SDValue X = And.getOperand(0);
2766   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2767
2768   if (!C || !X->hasOneUse())
2769     return;
2770   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2771   if (!Range)
2772     return;
2773
2774   // There are several ways to lower this:
2775   SDNode *NewN;
2776   SDLoc dl(N);
2777
2778   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2779     if (Subtarget->isThumb2()) {
2780       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2781       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2782                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2783                         CurDAG->getRegister(0, MVT::i32) };
2784       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2785     } else {
2786       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2787                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2788                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2789       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2790     }
2791   };
2792
2793   if (Range->second == 0) {
2794     //  1. Mask includes the LSB -> Simply shift the top N bits off
2795     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2796     ReplaceNode(And.getNode(), NewN);
2797   } else if (Range->first == 31) {
2798     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2799     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2800     ReplaceNode(And.getNode(), NewN);
2801   } else if (Range->first == Range->second) {
2802     //  3. Only one bit is set. We can shift this into the sign bit and use a
2803     //     PL/MI comparison.
2804     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2805     ReplaceNode(And.getNode(), NewN);
2806
2807     SwitchEQNEToPLMI = true;
2808   } else if (!Subtarget->hasV6T2Ops()) {
2809     //  4. Do a double shift to clear bottom and top bits, but only in
2810     //     thumb-1 mode as in thumb-2 we can use UBFX.
2811     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2812     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2813                      Range->second + (31 - Range->first));
2814     ReplaceNode(And.getNode(), NewN);
2815   }
2816
2817 }
2818
2819 void ARMDAGToDAGISel::Select(SDNode *N) {
2820   SDLoc dl(N);
2821
2822   if (N->isMachineOpcode()) {
2823     N->setNodeId(-1);
2824     return;   // Already selected.
2825   }
2826
2827   switch (N->getOpcode()) {
2828   default: break;
2829   case ISD::ADD:
2830   case ISD::OR:
2831     if (trySMLAWSMULW(N))
2832       return;
2833     break;
2834   case ISD::WRITE_REGISTER:
2835     if (tryWriteRegister(N))
2836       return;
2837     break;
2838   case ISD::READ_REGISTER:
2839     if (tryReadRegister(N))
2840       return;
2841     break;
2842   case ISD::INLINEASM:
2843     if (tryInlineAsm(N))
2844       return;
2845     break;
2846   case ISD::XOR:
2847     // Select special operations if XOR node forms integer ABS pattern
2848     if (tryABSOp(N))
2849       return;
2850     // Other cases are autogenerated.
2851     break;
2852   case ISD::Constant: {
2853     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2854     // If we can't materialize the constant we need to use a literal pool
2855     if (ConstantMaterializationCost(Val) > 2) {
2856       SDValue CPIdx = CurDAG->getTargetConstantPool(
2857           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2858           TLI->getPointerTy(CurDAG->getDataLayout()));
2859
2860       SDNode *ResNode;
2861       if (Subtarget->isThumb()) {
2862         SDValue Pred = getAL(CurDAG, dl);
2863         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2864         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2865         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2866                                          Ops);
2867       } else {
2868         SDValue Ops[] = {
2869           CPIdx,
2870           CurDAG->getTargetConstant(0, dl, MVT::i32),
2871           getAL(CurDAG, dl),
2872           CurDAG->getRegister(0, MVT::i32),
2873           CurDAG->getEntryNode()
2874         };
2875         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2876                                          Ops);
2877       }
2878       ReplaceNode(N, ResNode);
2879       return;
2880     }
2881
2882     // Other cases are autogenerated.
2883     break;
2884   }
2885   case ISD::FrameIndex: {
2886     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2887     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2888     SDValue TFI = CurDAG->getTargetFrameIndex(
2889         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2890     if (Subtarget->isThumb1Only()) {
2891       // Set the alignment of the frame object to 4, to avoid having to generate
2892       // more than one ADD
2893       MachineFrameInfo &MFI = MF->getFrameInfo();
2894       if (MFI.getObjectAlignment(FI) < 4)
2895         MFI.setObjectAlignment(FI, 4);
2896       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2897                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2898       return;
2899     } else {
2900       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2901                       ARM::t2ADDri : ARM::ADDri);
2902       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2903                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2904                         CurDAG->getRegister(0, MVT::i32) };
2905       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2906       return;
2907     }
2908   }
2909   case ISD::SRL:
2910     if (tryV6T2BitfieldExtractOp(N, false))
2911       return;
2912     break;
2913   case ISD::SIGN_EXTEND_INREG:
2914   case ISD::SRA:
2915     if (tryV6T2BitfieldExtractOp(N, true))
2916       return;
2917     break;
2918   case ISD::MUL:
2919     if (Subtarget->isThumb1Only())
2920       break;
2921     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2922       unsigned RHSV = C->getZExtValue();
2923       if (!RHSV) break;
2924       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2925         unsigned ShImm = Log2_32(RHSV-1);
2926         if (ShImm >= 32)
2927           break;
2928         SDValue V = N->getOperand(0);
2929         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2930         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2931         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2932         if (Subtarget->isThumb()) {
2933           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2934           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2935           return;
2936         } else {
2937           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2938                             Reg0 };
2939           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2940           return;
2941         }
2942       }
2943       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2944         unsigned ShImm = Log2_32(RHSV+1);
2945         if (ShImm >= 32)
2946           break;
2947         SDValue V = N->getOperand(0);
2948         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2949         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2950         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2951         if (Subtarget->isThumb()) {
2952           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2953           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2954           return;
2955         } else {
2956           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2957                             Reg0 };
2958           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2959           return;
2960         }
2961       }
2962     }
2963     break;
2964   case ISD::AND: {
2965     // Check for unsigned bitfield extract
2966     if (tryV6T2BitfieldExtractOp(N, false))
2967       return;
2968
2969     // If an immediate is used in an AND node, it is possible that the immediate
2970     // can be more optimally materialized when negated. If this is the case we
2971     // can negate the immediate and use a BIC instead.
2972     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2973     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2974       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2975
2976       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2977       // immediate can be negated and fit in the immediate operand of
2978       // a t2BIC, don't do any manual transform here as this can be
2979       // handled by the generic ISel machinery.
2980       bool PreferImmediateEncoding =
2981         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2982       if (!PreferImmediateEncoding &&
2983           ConstantMaterializationCost(Imm) >
2984               ConstantMaterializationCost(~Imm)) {
2985         // The current immediate costs more to materialize than a negated
2986         // immediate, so negate the immediate and use a BIC.
2987         SDValue NewImm =
2988           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2989         // If the new constant didn't exist before, reposition it in the topological
2990         // ordering so it is just before N. Otherwise, don't touch its location.
2991         if (NewImm->getNodeId() == -1)
2992           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2993
2994         if (!Subtarget->hasThumb2()) {
2995           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2996                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2997                            CurDAG->getRegister(0, MVT::i32)};
2998           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2999           return;
3000         } else {
3001           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3002                            CurDAG->getRegister(0, MVT::i32),
3003                            CurDAG->getRegister(0, MVT::i32)};
3004           ReplaceNode(N,
3005                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3006           return;
3007         }
3008       }
3009     }
3010
3011     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3012     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3013     // are entirely contributed by c2 and lower 16-bits are entirely contributed
3014     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3015     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3016     EVT VT = N->getValueType(0);
3017     if (VT != MVT::i32)
3018       break;
3019     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3020       ? ARM::t2MOVTi16
3021       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3022     if (!Opc)
3023       break;
3024     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3025     N1C = dyn_cast<ConstantSDNode>(N1);
3026     if (!N1C)
3027       break;
3028     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3029       SDValue N2 = N0.getOperand(1);
3030       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3031       if (!N2C)
3032         break;
3033       unsigned N1CVal = N1C->getZExtValue();
3034       unsigned N2CVal = N2C->getZExtValue();
3035       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3036           (N1CVal & 0xffffU) == 0xffffU &&
3037           (N2CVal & 0xffffU) == 0x0U) {
3038         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3039                                                   dl, MVT::i32);
3040         SDValue Ops[] = { N0.getOperand(0), Imm16,
3041                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3042         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3043         return;
3044       }
3045     }
3046
3047     break;
3048   }
3049   case ARMISD::VMOVRRD:
3050     ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
3051                                           N->getOperand(0), getAL(CurDAG, dl),
3052                                           CurDAG->getRegister(0, MVT::i32)));
3053     return;
3054   case ISD::UMUL_LOHI: {
3055     if (Subtarget->isThumb1Only())
3056       break;
3057     if (Subtarget->isThumb()) {
3058       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3059                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3060       ReplaceNode(
3061           N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
3062       return;
3063     } else {
3064       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3065                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3066                         CurDAG->getRegister(0, MVT::i32) };
3067       ReplaceNode(N, CurDAG->getMachineNode(
3068                          Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
3069                          MVT::i32, MVT::i32, Ops));
3070       return;
3071     }
3072   }
3073   case ISD::SMUL_LOHI: {
3074     if (Subtarget->isThumb1Only())
3075       break;
3076     if (Subtarget->isThumb()) {
3077       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3078                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3079       ReplaceNode(
3080           N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
3081       return;
3082     } else {
3083       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3084                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3085                         CurDAG->getRegister(0, MVT::i32) };
3086       ReplaceNode(N, CurDAG->getMachineNode(
3087                          Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
3088                          MVT::i32, MVT::i32, Ops));
3089       return;
3090     }
3091   }
3092   case ARMISD::UMAAL: {
3093     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3094     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3095                       N->getOperand(2), N->getOperand(3),
3096                       getAL(CurDAG, dl),
3097                       CurDAG->getRegister(0, MVT::i32) };
3098     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3099     return;
3100   }
3101   case ARMISD::UMLAL:{
3102     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
3103     // 64-bit multiplication result.
3104     if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
3105         N->getOperand(2).getOpcode() == ARMISD::ADDC &&
3106         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
3107
3108       SDValue Addc = N->getOperand(2);
3109       SDValue Adde = N->getOperand(3);
3110
3111       if (Adde.getOperand(2).getNode() == Addc.getNode()) {
3112
3113         ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
3114         ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
3115
3116         if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
3117         {
3118           // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
3119           // RdLo = one operand to be added, lower 32-bits of res
3120           // RdHi = other operand to be added, upper 32-bits of res
3121           // Rn = first multiply operand
3122           // Rm = second multiply operand
3123           SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3124                             Addc.getOperand(0), Addc.getOperand(1),
3125                             getAL(CurDAG, dl),
3126                             CurDAG->getRegister(0, MVT::i32) };
3127           unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3128           CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
3129           return;
3130         }
3131       }
3132     }
3133
3134     if (Subtarget->isThumb()) {
3135       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3136                         N->getOperand(3), getAL(CurDAG, dl),
3137                         CurDAG->getRegister(0, MVT::i32)};
3138       ReplaceNode(
3139           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3140       return;
3141     }else{
3142       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3143                         N->getOperand(3), getAL(CurDAG, dl),
3144                         CurDAG->getRegister(0, MVT::i32),
3145                         CurDAG->getRegister(0, MVT::i32) };
3146       ReplaceNode(N, CurDAG->getMachineNode(
3147                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3148                          MVT::i32, MVT::i32, Ops));
3149       return;
3150     }
3151   }
3152   case ARMISD::SMLAL:{
3153     if (Subtarget->isThumb()) {
3154       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3155                         N->getOperand(3), getAL(CurDAG, dl),
3156                         CurDAG->getRegister(0, MVT::i32)};
3157       ReplaceNode(
3158           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3159       return;
3160     }else{
3161       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3162                         N->getOperand(3), getAL(CurDAG, dl),
3163                         CurDAG->getRegister(0, MVT::i32),
3164                         CurDAG->getRegister(0, MVT::i32) };
3165       ReplaceNode(N, CurDAG->getMachineNode(
3166                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3167                          MVT::i32, MVT::i32, Ops));
3168       return;
3169     }
3170   }
3171   case ARMISD::SUBE: {
3172     if (!Subtarget->hasV6Ops())
3173       break;
3174     // Look for a pattern to match SMMLS
3175     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3176     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3177         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3178         !SDValue(N, 1).use_empty())
3179       break;
3180
3181     if (Subtarget->isThumb())
3182       assert(Subtarget->hasThumb2() &&
3183              "This pattern should not be generated for Thumb");
3184
3185     SDValue SmulLoHi = N->getOperand(1);
3186     SDValue Subc = N->getOperand(2);
3187     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3188
3189     if (!Zero || Zero->getZExtValue() != 0 ||
3190         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3191         N->getOperand(1) != SmulLoHi.getValue(1) ||
3192         N->getOperand(2) != Subc.getValue(1))
3193       break;
3194
3195     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3196     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3197                       N->getOperand(0), getAL(CurDAG, dl),
3198                       CurDAG->getRegister(0, MVT::i32) };
3199     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3200     return;
3201   }
3202   case ISD::LOAD: {
3203     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3204       if (tryT2IndexedLoad(N))
3205         return;
3206     } else if (Subtarget->isThumb()) {
3207       if (tryT1IndexedLoad(N))
3208         return;
3209     } else if (tryARMIndexedLoad(N))
3210       return;
3211     // Other cases are autogenerated.
3212     break;
3213   }
3214   case ARMISD::BRCOND: {
3215     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3216     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3217     // Pattern complexity = 6  cost = 1  size = 0
3218
3219     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3220     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3221     // Pattern complexity = 6  cost = 1  size = 0
3222
3223     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3224     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3225     // Pattern complexity = 6  cost = 1  size = 0
3226
3227     unsigned Opc = Subtarget->isThumb() ?
3228       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3229     SDValue Chain = N->getOperand(0);
3230     SDValue N1 = N->getOperand(1);
3231     SDValue N2 = N->getOperand(2);
3232     SDValue N3 = N->getOperand(3);
3233     SDValue InFlag = N->getOperand(4);
3234     assert(N1.getOpcode() == ISD::BasicBlock);
3235     assert(N2.getOpcode() == ISD::Constant);
3236     assert(N3.getOpcode() == ISD::Register);
3237
3238     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3239
3240     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3241       bool SwitchEQNEToPLMI;
3242       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3243       InFlag = N->getOperand(4);
3244
3245       if (SwitchEQNEToPLMI) {
3246         switch ((ARMCC::CondCodes)CC) {
3247         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3248         case ARMCC::NE:
3249           CC = (unsigned)ARMCC::MI;
3250           break;
3251         case ARMCC::EQ:
3252           CC = (unsigned)ARMCC::PL;
3253           break;
3254         }
3255       }
3256     }
3257
3258     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3259     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3260     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3261                                              MVT::Glue, Ops);
3262     Chain = SDValue(ResNode, 0);
3263     if (N->getNumValues() == 2) {
3264       InFlag = SDValue(ResNode, 1);
3265       ReplaceUses(SDValue(N, 1), InFlag);
3266     }
3267     ReplaceUses(SDValue(N, 0),
3268                 SDValue(Chain.getNode(), Chain.getResNo()));
3269     CurDAG->RemoveDeadNode(N);
3270     return;
3271   }
3272
3273   case ARMISD::CMPZ: {
3274     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3275     //   This allows us to avoid materializing the expensive negative constant.
3276     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3277     //   for its glue output.
3278     SDValue X = N->getOperand(0);
3279     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3280     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3281       int64_t Addend = -C->getSExtValue();
3282
3283       SDNode *Add = nullptr;
3284       // In T2 mode, ADDS can be better than CMN if the immediate fits in a
3285       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3286       // Outside that range we can just use a CMN which is 32-bit but has a
3287       // 12-bit immediate range.
3288       if (Subtarget->isThumb2() && Addend < 1<<8) {
3289         SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3290                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3291                           CurDAG->getRegister(0, MVT::i32) };
3292         Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3293       } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
3294         // FIXME: Add T1 tADDi8 code.
3295         SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3296                          CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3297                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3298         Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
3299       } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
3300         SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3301                          CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3302                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3303         Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
3304       }
3305       if (Add) {
3306         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3307         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3308       }
3309     }
3310     // Other cases are autogenerated.
3311     break;
3312   }
3313
3314   case ARMISD::CMOV: {
3315     SDValue InFlag = N->getOperand(4);
3316
3317     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3318       bool SwitchEQNEToPLMI;
3319       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3320
3321       if (SwitchEQNEToPLMI) {
3322         SDValue ARMcc = N->getOperand(2);
3323         ARMCC::CondCodes CC =
3324           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3325
3326         switch (CC) {
3327         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3328         case ARMCC::NE:
3329           CC = ARMCC::MI;
3330           break;
3331         case ARMCC::EQ:
3332           CC = ARMCC::PL;
3333           break;
3334         }
3335         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3336         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3337                          N->getOperand(3), N->getOperand(4)};
3338         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3339       }
3340
3341     }
3342     // Other cases are autogenerated.
3343     break;
3344   }
3345
3346   case ARMISD::VZIP: {
3347     unsigned Opc = 0;
3348     EVT VT = N->getValueType(0);
3349     switch (VT.getSimpleVT().SimpleTy) {
3350     default: return;
3351     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3352     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3353     case MVT::v2f32:
3354     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3355     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3356     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3357     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3358     case MVT::v4f32:
3359     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3360     }
3361     SDValue Pred = getAL(CurDAG, dl);
3362     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3363     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3364     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3365     return;
3366   }
3367   case ARMISD::VUZP: {
3368     unsigned Opc = 0;
3369     EVT VT = N->getValueType(0);
3370     switch (VT.getSimpleVT().SimpleTy) {
3371     default: return;
3372     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3373     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3374     case MVT::v2f32:
3375     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3376     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3377     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3378     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3379     case MVT::v4f32:
3380     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3381     }
3382     SDValue Pred = getAL(CurDAG, dl);
3383     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3384     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3385     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3386     return;
3387   }
3388   case ARMISD::VTRN: {
3389     unsigned Opc = 0;
3390     EVT VT = N->getValueType(0);
3391     switch (VT.getSimpleVT().SimpleTy) {
3392     default: return;
3393     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3394     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3395     case MVT::v2f32:
3396     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3397     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3398     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3399     case MVT::v4f32:
3400     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3401     }
3402     SDValue Pred = getAL(CurDAG, dl);
3403     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3404     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3405     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3406     return;
3407   }
3408   case ARMISD::BUILD_VECTOR: {
3409     EVT VecVT = N->getValueType(0);
3410     EVT EltVT = VecVT.getVectorElementType();
3411     unsigned NumElts = VecVT.getVectorNumElements();
3412     if (EltVT == MVT::f64) {
3413       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3414       ReplaceNode(
3415           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3416       return;
3417     }
3418     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3419     if (NumElts == 2) {
3420       ReplaceNode(
3421           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3422       return;
3423     }
3424     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3425     ReplaceNode(N,
3426                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3427                                     N->getOperand(2), N->getOperand(3)));
3428     return;
3429   }
3430
3431   case ARMISD::VLD1DUP: {
3432     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3433                                          ARM::VLD1DUPd32 };
3434     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3435                                          ARM::VLD1DUPq32 };
3436     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3437     return;
3438   }
3439
3440   case ARMISD::VLD2DUP: {
3441     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3442                                         ARM::VLD2DUPd32 };
3443     SelectVLDDup(N, false, 2, Opcodes);
3444     return;
3445   }
3446
3447   case ARMISD::VLD3DUP: {
3448     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3449                                         ARM::VLD3DUPd16Pseudo,
3450                                         ARM::VLD3DUPd32Pseudo };
3451     SelectVLDDup(N, false, 3, Opcodes);
3452     return;
3453   }
3454
3455   case ARMISD::VLD4DUP: {
3456     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3457                                         ARM::VLD4DUPd16Pseudo,
3458                                         ARM::VLD4DUPd32Pseudo };
3459     SelectVLDDup(N, false, 4, Opcodes);
3460     return;
3461   }
3462
3463   case ARMISD::VLD1DUP_UPD: {
3464     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3465                                          ARM::VLD1DUPd16wb_fixed,
3466                                          ARM::VLD1DUPd32wb_fixed };
3467     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3468                                          ARM::VLD1DUPq16wb_fixed,
3469                                          ARM::VLD1DUPq32wb_fixed };
3470     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3471     return;
3472   }
3473
3474   case ARMISD::VLD2DUP_UPD: {
3475     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3476                                         ARM::VLD2DUPd16wb_fixed,
3477                                         ARM::VLD2DUPd32wb_fixed };
3478     SelectVLDDup(N, true, 2, Opcodes);
3479     return;
3480   }
3481
3482   case ARMISD::VLD3DUP_UPD: {
3483     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3484                                         ARM::VLD3DUPd16Pseudo_UPD,
3485                                         ARM::VLD3DUPd32Pseudo_UPD };
3486     SelectVLDDup(N, true, 3, Opcodes);
3487     return;
3488   }
3489
3490   case ARMISD::VLD4DUP_UPD: {
3491     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3492                                         ARM::VLD4DUPd16Pseudo_UPD,
3493                                         ARM::VLD4DUPd32Pseudo_UPD };
3494     SelectVLDDup(N, true, 4, Opcodes);
3495     return;
3496   }
3497
3498   case ARMISD::VLD1_UPD: {
3499     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3500                                          ARM::VLD1d16wb_fixed,
3501                                          ARM::VLD1d32wb_fixed,
3502                                          ARM::VLD1d64wb_fixed };
3503     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3504                                          ARM::VLD1q16wb_fixed,
3505                                          ARM::VLD1q32wb_fixed,
3506                                          ARM::VLD1q64wb_fixed };
3507     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3508     return;
3509   }
3510
3511   case ARMISD::VLD2_UPD: {
3512     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3513                                          ARM::VLD2d16wb_fixed,
3514                                          ARM::VLD2d32wb_fixed,
3515                                          ARM::VLD1q64wb_fixed};
3516     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3517                                          ARM::VLD2q16PseudoWB_fixed,
3518                                          ARM::VLD2q32PseudoWB_fixed };
3519     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3520     return;
3521   }
3522
3523   case ARMISD::VLD3_UPD: {
3524     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3525                                          ARM::VLD3d16Pseudo_UPD,
3526                                          ARM::VLD3d32Pseudo_UPD,
3527                                          ARM::VLD1d64TPseudoWB_fixed};
3528     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3529                                           ARM::VLD3q16Pseudo_UPD,
3530                                           ARM::VLD3q32Pseudo_UPD };
3531     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3532                                           ARM::VLD3q16oddPseudo_UPD,
3533                                           ARM::VLD3q32oddPseudo_UPD };
3534     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3535     return;
3536   }
3537
3538   case ARMISD::VLD4_UPD: {
3539     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3540                                          ARM::VLD4d16Pseudo_UPD,
3541                                          ARM::VLD4d32Pseudo_UPD,
3542                                          ARM::VLD1d64QPseudoWB_fixed};
3543     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3544                                           ARM::VLD4q16Pseudo_UPD,
3545                                           ARM::VLD4q32Pseudo_UPD };
3546     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3547                                           ARM::VLD4q16oddPseudo_UPD,
3548                                           ARM::VLD4q32oddPseudo_UPD };
3549     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3550     return;
3551   }
3552
3553   case ARMISD::VLD2LN_UPD: {
3554     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3555                                          ARM::VLD2LNd16Pseudo_UPD,
3556                                          ARM::VLD2LNd32Pseudo_UPD };
3557     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3558                                          ARM::VLD2LNq32Pseudo_UPD };
3559     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3560     return;
3561   }
3562
3563   case ARMISD::VLD3LN_UPD: {
3564     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3565                                          ARM::VLD3LNd16Pseudo_UPD,
3566                                          ARM::VLD3LNd32Pseudo_UPD };
3567     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3568                                          ARM::VLD3LNq32Pseudo_UPD };
3569     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3570     return;
3571   }
3572
3573   case ARMISD::VLD4LN_UPD: {
3574     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3575                                          ARM::VLD4LNd16Pseudo_UPD,
3576                                          ARM::VLD4LNd32Pseudo_UPD };
3577     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3578                                          ARM::VLD4LNq32Pseudo_UPD };
3579     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3580     return;
3581   }
3582
3583   case ARMISD::VST1_UPD: {
3584     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3585                                          ARM::VST1d16wb_fixed,
3586                                          ARM::VST1d32wb_fixed,
3587                                          ARM::VST1d64wb_fixed };
3588     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3589                                          ARM::VST1q16wb_fixed,
3590                                          ARM::VST1q32wb_fixed,
3591                                          ARM::VST1q64wb_fixed };
3592     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3593     return;
3594   }
3595
3596   case ARMISD::VST2_UPD: {
3597     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3598                                          ARM::VST2d16wb_fixed,
3599                                          ARM::VST2d32wb_fixed,
3600                                          ARM::VST1q64wb_fixed};
3601     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3602                                          ARM::VST2q16PseudoWB_fixed,
3603                                          ARM::VST2q32PseudoWB_fixed };
3604     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3605     return;
3606   }
3607
3608   case ARMISD::VST3_UPD: {
3609     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3610                                          ARM::VST3d16Pseudo_UPD,
3611                                          ARM::VST3d32Pseudo_UPD,
3612                                          ARM::VST1d64TPseudoWB_fixed};
3613     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3614                                           ARM::VST3q16Pseudo_UPD,
3615                                           ARM::VST3q32Pseudo_UPD };
3616     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3617                                           ARM::VST3q16oddPseudo_UPD,
3618                                           ARM::VST3q32oddPseudo_UPD };
3619     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3620     return;
3621   }
3622
3623   case ARMISD::VST4_UPD: {
3624     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3625                                          ARM::VST4d16Pseudo_UPD,
3626                                          ARM::VST4d32Pseudo_UPD,
3627                                          ARM::VST1d64QPseudoWB_fixed};
3628     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3629                                           ARM::VST4q16Pseudo_UPD,
3630                                           ARM::VST4q32Pseudo_UPD };
3631     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3632                                           ARM::VST4q16oddPseudo_UPD,
3633                                           ARM::VST4q32oddPseudo_UPD };
3634     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3635     return;
3636   }
3637
3638   case ARMISD::VST2LN_UPD: {
3639     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3640                                          ARM::VST2LNd16Pseudo_UPD,
3641                                          ARM::VST2LNd32Pseudo_UPD };
3642     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3643                                          ARM::VST2LNq32Pseudo_UPD };
3644     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3645     return;
3646   }
3647
3648   case ARMISD::VST3LN_UPD: {
3649     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3650                                          ARM::VST3LNd16Pseudo_UPD,
3651                                          ARM::VST3LNd32Pseudo_UPD };
3652     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3653                                          ARM::VST3LNq32Pseudo_UPD };
3654     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3655     return;
3656   }
3657
3658   case ARMISD::VST4LN_UPD: {
3659     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3660                                          ARM::VST4LNd16Pseudo_UPD,
3661                                          ARM::VST4LNd32Pseudo_UPD };
3662     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3663                                          ARM::VST4LNq32Pseudo_UPD };
3664     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3665     return;
3666   }
3667
3668   case ISD::INTRINSIC_VOID:
3669   case ISD::INTRINSIC_W_CHAIN: {
3670     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3671     switch (IntNo) {
3672     default:
3673       break;
3674
3675     case Intrinsic::arm_mrrc:
3676     case Intrinsic::arm_mrrc2: {
3677       SDLoc dl(N);
3678       SDValue Chain = N->getOperand(0);
3679       unsigned Opc;
3680
3681       if (Subtarget->isThumb())
3682         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3683       else
3684         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3685
3686       SmallVector<SDValue, 5> Ops;
3687       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3688       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3689       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3690
3691       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3692       // instruction will always be '1111' but it is possible in assembly language to specify
3693       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3694       if (Opc != ARM::MRRC2) {
3695         Ops.push_back(getAL(CurDAG, dl));
3696         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3697       }
3698
3699       Ops.push_back(Chain);
3700
3701       // Writes to two registers.
3702       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3703
3704       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3705       return;
3706     }
3707     case Intrinsic::arm_ldaexd:
3708     case Intrinsic::arm_ldrexd: {
3709       SDLoc dl(N);
3710       SDValue Chain = N->getOperand(0);
3711       SDValue MemAddr = N->getOperand(2);
3712       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3713
3714       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3715       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3716                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3717
3718       // arm_ldrexd returns a i64 value in {i32, i32}
3719       std::vector<EVT> ResTys;
3720       if (isThumb) {
3721         ResTys.push_back(MVT::i32);
3722         ResTys.push_back(MVT::i32);
3723       } else
3724         ResTys.push_back(MVT::Untyped);
3725       ResTys.push_back(MVT::Other);
3726
3727       // Place arguments in the right order.
3728       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3729                        CurDAG->getRegister(0, MVT::i32), Chain};
3730       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3731       // Transfer memoperands.
3732       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3733       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3734       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3735
3736       // Remap uses.
3737       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3738       if (!SDValue(N, 0).use_empty()) {
3739         SDValue Result;
3740         if (isThumb)
3741           Result = SDValue(Ld, 0);
3742         else {
3743           SDValue SubRegIdx =
3744             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3745           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3746               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3747           Result = SDValue(ResNode,0);
3748         }
3749         ReplaceUses(SDValue(N, 0), Result);
3750       }
3751       if (!SDValue(N, 1).use_empty()) {
3752         SDValue Result;
3753         if (isThumb)
3754           Result = SDValue(Ld, 1);
3755         else {
3756           SDValue SubRegIdx =
3757             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3758           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3759               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3760           Result = SDValue(ResNode,0);
3761         }
3762         ReplaceUses(SDValue(N, 1), Result);
3763       }
3764       ReplaceUses(SDValue(N, 2), OutChain);
3765       CurDAG->RemoveDeadNode(N);
3766       return;
3767     }
3768     case Intrinsic::arm_stlexd:
3769     case Intrinsic::arm_strexd: {
3770       SDLoc dl(N);
3771       SDValue Chain = N->getOperand(0);
3772       SDValue Val0 = N->getOperand(2);
3773       SDValue Val1 = N->getOperand(3);
3774       SDValue MemAddr = N->getOperand(4);
3775
3776       // Store exclusive double return a i32 value which is the return status
3777       // of the issued store.
3778       const EVT ResTys[] = {MVT::i32, MVT::Other};
3779
3780       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3781       // Place arguments in the right order.
3782       SmallVector<SDValue, 7> Ops;
3783       if (isThumb) {
3784         Ops.push_back(Val0);
3785         Ops.push_back(Val1);
3786       } else
3787         // arm_strexd uses GPRPair.
3788         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3789       Ops.push_back(MemAddr);
3790       Ops.push_back(getAL(CurDAG, dl));
3791       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3792       Ops.push_back(Chain);
3793
3794       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3795       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3796                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3797
3798       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3799       // Transfer memoperands.
3800       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3801       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3802       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3803
3804       ReplaceNode(N, St);
3805       return;
3806     }
3807
3808     case Intrinsic::arm_neon_vld1: {
3809       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3810                                            ARM::VLD1d32, ARM::VLD1d64 };
3811       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3812                                            ARM::VLD1q32, ARM::VLD1q64};
3813       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3814       return;
3815     }
3816
3817     case Intrinsic::arm_neon_vld2: {
3818       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3819                                            ARM::VLD2d32, ARM::VLD1q64 };
3820       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3821                                            ARM::VLD2q32Pseudo };
3822       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3823       return;
3824     }
3825
3826     case Intrinsic::arm_neon_vld3: {
3827       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3828                                            ARM::VLD3d16Pseudo,
3829                                            ARM::VLD3d32Pseudo,
3830                                            ARM::VLD1d64TPseudo };
3831       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3832                                             ARM::VLD3q16Pseudo_UPD,
3833                                             ARM::VLD3q32Pseudo_UPD };
3834       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3835                                             ARM::VLD3q16oddPseudo,
3836                                             ARM::VLD3q32oddPseudo };
3837       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3838       return;
3839     }
3840
3841     case Intrinsic::arm_neon_vld4: {
3842       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3843                                            ARM::VLD4d16Pseudo,
3844                                            ARM::VLD4d32Pseudo,
3845                                            ARM::VLD1d64QPseudo };
3846       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3847                                             ARM::VLD4q16Pseudo_UPD,
3848                                             ARM::VLD4q32Pseudo_UPD };
3849       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3850                                             ARM::VLD4q16oddPseudo,
3851                                             ARM::VLD4q32oddPseudo };
3852       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3853       return;
3854     }
3855
3856     case Intrinsic::arm_neon_vld2lane: {
3857       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3858                                            ARM::VLD2LNd16Pseudo,
3859                                            ARM::VLD2LNd32Pseudo };
3860       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3861                                            ARM::VLD2LNq32Pseudo };
3862       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3863       return;
3864     }
3865
3866     case Intrinsic::arm_neon_vld3lane: {
3867       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3868                                            ARM::VLD3LNd16Pseudo,
3869                                            ARM::VLD3LNd32Pseudo };
3870       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3871                                            ARM::VLD3LNq32Pseudo };
3872       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3873       return;
3874     }
3875
3876     case Intrinsic::arm_neon_vld4lane: {
3877       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3878                                            ARM::VLD4LNd16Pseudo,
3879                                            ARM::VLD4LNd32Pseudo };
3880       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3881                                            ARM::VLD4LNq32Pseudo };
3882       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3883       return;
3884     }
3885
3886     case Intrinsic::arm_neon_vst1: {
3887       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3888                                            ARM::VST1d32, ARM::VST1d64 };
3889       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3890                                            ARM::VST1q32, ARM::VST1q64 };
3891       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3892       return;
3893     }
3894
3895     case Intrinsic::arm_neon_vst2: {
3896       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3897                                            ARM::VST2d32, ARM::VST1q64 };
3898       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3899                                            ARM::VST2q32Pseudo };
3900       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3901       return;
3902     }
3903
3904     case Intrinsic::arm_neon_vst3: {
3905       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3906                                            ARM::VST3d16Pseudo,
3907                                            ARM::VST3d32Pseudo,
3908                                            ARM::VST1d64TPseudo };
3909       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3910                                             ARM::VST3q16Pseudo_UPD,
3911                                             ARM::VST3q32Pseudo_UPD };
3912       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3913                                             ARM::VST3q16oddPseudo,
3914                                             ARM::VST3q32oddPseudo };
3915       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3916       return;
3917     }
3918
3919     case Intrinsic::arm_neon_vst4: {
3920       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3921                                            ARM::VST4d16Pseudo,
3922                                            ARM::VST4d32Pseudo,
3923                                            ARM::VST1d64QPseudo };
3924       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3925                                             ARM::VST4q16Pseudo_UPD,
3926                                             ARM::VST4q32Pseudo_UPD };
3927       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3928                                             ARM::VST4q16oddPseudo,
3929                                             ARM::VST4q32oddPseudo };
3930       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3931       return;
3932     }
3933
3934     case Intrinsic::arm_neon_vst2lane: {
3935       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3936                                            ARM::VST2LNd16Pseudo,
3937                                            ARM::VST2LNd32Pseudo };
3938       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3939                                            ARM::VST2LNq32Pseudo };
3940       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3941       return;
3942     }
3943
3944     case Intrinsic::arm_neon_vst3lane: {
3945       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3946                                            ARM::VST3LNd16Pseudo,
3947                                            ARM::VST3LNd32Pseudo };
3948       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3949                                            ARM::VST3LNq32Pseudo };
3950       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3951       return;
3952     }
3953
3954     case Intrinsic::arm_neon_vst4lane: {
3955       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3956                                            ARM::VST4LNd16Pseudo,
3957                                            ARM::VST4LNd32Pseudo };
3958       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3959                                            ARM::VST4LNq32Pseudo };
3960       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3961       return;
3962     }
3963     }
3964     break;
3965   }
3966
3967   case ISD::INTRINSIC_WO_CHAIN: {
3968     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3969     switch (IntNo) {
3970     default:
3971       break;
3972
3973     case Intrinsic::arm_neon_vtbl2:
3974       SelectVTBL(N, false, 2, ARM::VTBL2);
3975       return;
3976     case Intrinsic::arm_neon_vtbl3:
3977       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3978       return;
3979     case Intrinsic::arm_neon_vtbl4:
3980       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3981       return;
3982
3983     case Intrinsic::arm_neon_vtbx2:
3984       SelectVTBL(N, true, 2, ARM::VTBX2);
3985       return;
3986     case Intrinsic::arm_neon_vtbx3:
3987       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3988       return;
3989     case Intrinsic::arm_neon_vtbx4:
3990       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3991       return;
3992     }
3993     break;
3994   }
3995
3996   case ARMISD::VTBL1: {
3997     SDLoc dl(N);
3998     EVT VT = N->getValueType(0);
3999     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
4000                      getAL(CurDAG, dl),                 // Predicate
4001                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
4002     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
4003     return;
4004   }
4005   case ARMISD::VTBL2: {
4006     SDLoc dl(N);
4007     EVT VT = N->getValueType(0);
4008
4009     // Form a REG_SEQUENCE to force register allocation.
4010     SDValue V0 = N->getOperand(0);
4011     SDValue V1 = N->getOperand(1);
4012     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
4013
4014     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
4015                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
4016     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
4017     return;
4018   }
4019
4020   case ISD::CONCAT_VECTORS:
4021     SelectConcatVector(N);
4022     return;
4023
4024   case ISD::ATOMIC_CMP_SWAP:
4025     SelectCMP_SWAP(N);
4026     return;
4027   }
4028
4029   SelectCode(N);
4030 }
4031
4032 // Inspect a register string of the form
4033 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4034 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4035 // and obtain the integer operands from them, adding these operands to the
4036 // provided vector.
4037 static void getIntOperandsFromRegisterString(StringRef RegString,
4038                                              SelectionDAG *CurDAG,
4039                                              const SDLoc &DL,
4040                                              std::vector<SDValue> &Ops) {
4041   SmallVector<StringRef, 5> Fields;
4042   RegString.split(Fields, ':');
4043
4044   if (Fields.size() > 1) {
4045     bool AllIntFields = true;
4046
4047     for (StringRef Field : Fields) {
4048       // Need to trim out leading 'cp' characters and get the integer field.
4049       unsigned IntField;
4050       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4051       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4052     }
4053
4054     assert(AllIntFields &&
4055             "Unexpected non-integer value in special register string.");
4056   }
4057 }
4058
4059 // Maps a Banked Register string to its mask value. The mask value returned is
4060 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4061 // mask operand, which expresses which register is to be used, e.g. r8, and in
4062 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4063 // was invalid.
4064 static inline int getBankedRegisterMask(StringRef RegString) {
4065   return StringSwitch<int>(RegString.lower())
4066           .Case("r8_usr", 0x00)
4067           .Case("r9_usr", 0x01)
4068           .Case("r10_usr", 0x02)
4069           .Case("r11_usr", 0x03)
4070           .Case("r12_usr", 0x04)
4071           .Case("sp_usr", 0x05)
4072           .Case("lr_usr", 0x06)
4073           .Case("r8_fiq", 0x08)
4074           .Case("r9_fiq", 0x09)
4075           .Case("r10_fiq", 0x0a)
4076           .Case("r11_fiq", 0x0b)
4077           .Case("r12_fiq", 0x0c)
4078           .Case("sp_fiq", 0x0d)
4079           .Case("lr_fiq", 0x0e)
4080           .Case("lr_irq", 0x10)
4081           .Case("sp_irq", 0x11)
4082           .Case("lr_svc", 0x12)
4083           .Case("sp_svc", 0x13)
4084           .Case("lr_abt", 0x14)
4085           .Case("sp_abt", 0x15)
4086           .Case("lr_und", 0x16)
4087           .Case("sp_und", 0x17)
4088           .Case("lr_mon", 0x1c)
4089           .Case("sp_mon", 0x1d)
4090           .Case("elr_hyp", 0x1e)
4091           .Case("sp_hyp", 0x1f)
4092           .Case("spsr_fiq", 0x2e)
4093           .Case("spsr_irq", 0x30)
4094           .Case("spsr_svc", 0x32)
4095           .Case("spsr_abt", 0x34)
4096           .Case("spsr_und", 0x36)
4097           .Case("spsr_mon", 0x3c)
4098           .Case("spsr_hyp", 0x3e)
4099           .Default(-1);
4100 }
4101
4102 // Maps a MClass special register string to its value for use in the
4103 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
4104 // Returns -1 to signify that the string was invalid.
4105 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
4106   return StringSwitch<int>(RegString.lower())
4107           .Case("apsr", 0x0)
4108           .Case("iapsr", 0x1)
4109           .Case("eapsr", 0x2)
4110           .Case("xpsr", 0x3)
4111           .Case("ipsr", 0x5)
4112           .Case("epsr", 0x6)
4113           .Case("iepsr", 0x7)
4114           .Case("msp", 0x8)
4115           .Case("psp", 0x9)
4116           .Case("primask", 0x10)
4117           .Case("basepri", 0x11)
4118           .Case("basepri_max", 0x12)
4119           .Case("faultmask", 0x13)
4120           .Case("control", 0x14)
4121           .Case("msplim", 0x0a)
4122           .Case("psplim", 0x0b)
4123           .Case("sp", 0x18)
4124           .Default(-1);
4125 }
4126
4127 // The flags here are common to those allowed for apsr in the A class cores and
4128 // those allowed for the special registers in the M class cores. Returns a
4129 // value representing which flags were present, -1 if invalid.
4130 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
4131   if (Flags.empty())
4132     return 0x2 | (int)hasDSP;
4133
4134   return StringSwitch<int>(Flags)
4135           .Case("g", 0x1)
4136           .Case("nzcvq", 0x2)
4137           .Case("nzcvqg", 0x3)
4138           .Default(-1);
4139 }
4140
4141 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
4142                                  const ARMSubtarget *Subtarget) {
4143   // Ensure that the register (without flags) was a valid M Class special
4144   // register.
4145   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
4146   if (SYSmvalue == -1)
4147     return -1;
4148
4149   // basepri, basepri_max and faultmask are only valid for V7m.
4150   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
4151     return -1;
4152
4153   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
4154     Flags = "";
4155     SYSmvalue |= 0x80;
4156   }
4157
4158   if (!Subtarget->has8MSecExt() &&
4159       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
4160     return -1;
4161
4162   if (!Subtarget->hasV8MMainlineOps() &&
4163       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
4164        SYSmvalue == 0x93))
4165     return -1;
4166
4167   // If it was a read then we won't be expecting flags and so at this point
4168   // we can return the mask.
4169   if (IsRead) {
4170     if (Flags.empty())
4171       return SYSmvalue;
4172     else
4173       return -1;
4174   }
4175
4176   // We know we are now handling a write so need to get the mask for the flags.
4177   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
4178
4179   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
4180   // shouldn't have flags present.
4181   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
4182     return -1;
4183
4184   // The _g and _nzcvqg versions are only valid if the DSP extension is
4185   // available.
4186   if (!Subtarget->hasDSP() && (Mask & 0x1))
4187     return -1;
4188
4189   // The register was valid so need to put the mask in the correct place
4190   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
4191   // construct the operand for the instruction node.
4192   if (SYSmvalue < 0x4)
4193     return SYSmvalue | Mask << 10;
4194
4195   return SYSmvalue;
4196 }
4197
4198 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4199   // The mask operand contains the special register (R Bit) in bit 4, whether
4200   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4201   // bits 3-0 contains the fields to be accessed in the special register, set by
4202   // the flags provided with the register.
4203   int Mask = 0;
4204   if (Reg == "apsr") {
4205     // The flags permitted for apsr are the same flags that are allowed in
4206     // M class registers. We get the flag value and then shift the flags into
4207     // the correct place to combine with the mask.
4208     Mask = getMClassFlagsMask(Flags, true);
4209     if (Mask == -1)
4210       return -1;
4211     return Mask << 2;
4212   }
4213
4214   if (Reg != "cpsr" && Reg != "spsr") {
4215     return -1;
4216   }
4217
4218   // This is the same as if the flags were "fc"
4219   if (Flags.empty() || Flags == "all")
4220     return Mask | 0x9;
4221
4222   // Inspect the supplied flags string and set the bits in the mask for
4223   // the relevant and valid flags allowed for cpsr and spsr.
4224   for (char Flag : Flags) {
4225     int FlagVal;
4226     switch (Flag) {
4227       case 'c':
4228         FlagVal = 0x1;
4229         break;
4230       case 'x':
4231         FlagVal = 0x2;
4232         break;
4233       case 's':
4234         FlagVal = 0x4;
4235         break;
4236       case 'f':
4237         FlagVal = 0x8;
4238         break;
4239       default:
4240         FlagVal = 0;
4241     }
4242
4243     // This avoids allowing strings where the same flag bit appears twice.
4244     if (!FlagVal || (Mask & FlagVal))
4245       return -1;
4246     Mask |= FlagVal;
4247   }
4248
4249   // If the register is spsr then we need to set the R bit.
4250   if (Reg == "spsr")
4251     Mask |= 0x10;
4252
4253   return Mask;
4254 }
4255
4256 // Lower the read_register intrinsic to ARM specific DAG nodes
4257 // using the supplied metadata string to select the instruction node to use
4258 // and the registers/masks to construct as operands for the node.
4259 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4260   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4261   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4262   bool IsThumb2 = Subtarget->isThumb2();
4263   SDLoc DL(N);
4264
4265   std::vector<SDValue> Ops;
4266   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4267
4268   if (!Ops.empty()) {
4269     // If the special register string was constructed of fields (as defined
4270     // in the ACLE) then need to lower to MRC node (32 bit) or
4271     // MRRC node(64 bit), we can make the distinction based on the number of
4272     // operands we have.
4273     unsigned Opcode;
4274     SmallVector<EVT, 3> ResTypes;
4275     if (Ops.size() == 5){
4276       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4277       ResTypes.append({ MVT::i32, MVT::Other });
4278     } else {
4279       assert(Ops.size() == 3 &&
4280               "Invalid number of fields in special register string.");
4281       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4282       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4283     }
4284
4285     Ops.push_back(getAL(CurDAG, DL));
4286     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4287     Ops.push_back(N->getOperand(0));
4288     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4289     return true;
4290   }
4291
4292   std::string SpecialReg = RegString->getString().lower();
4293
4294   int BankedReg = getBankedRegisterMask(SpecialReg);
4295   if (BankedReg != -1) {
4296     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4297             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4298             N->getOperand(0) };
4299     ReplaceNode(
4300         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4301                                   DL, MVT::i32, MVT::Other, Ops));
4302     return true;
4303   }
4304
4305   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4306   // corresponding to the register that is being read from. So we switch on the
4307   // string to find which opcode we need to use.
4308   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4309                     .Case("fpscr", ARM::VMRS)
4310                     .Case("fpexc", ARM::VMRS_FPEXC)
4311                     .Case("fpsid", ARM::VMRS_FPSID)
4312                     .Case("mvfr0", ARM::VMRS_MVFR0)
4313                     .Case("mvfr1", ARM::VMRS_MVFR1)
4314                     .Case("mvfr2", ARM::VMRS_MVFR2)
4315                     .Case("fpinst", ARM::VMRS_FPINST)
4316                     .Case("fpinst2", ARM::VMRS_FPINST2)
4317                     .Default(0);
4318
4319   // If an opcode was found then we can lower the read to a VFP instruction.
4320   if (Opcode) {
4321     if (!Subtarget->hasVFP2())
4322       return false;
4323     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4324       return false;
4325
4326     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4327             N->getOperand(0) };
4328     ReplaceNode(N,
4329                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4330     return true;
4331   }
4332
4333   // If the target is M Class then need to validate that the register string
4334   // is an acceptable value, so check that a mask can be constructed from the
4335   // string.
4336   if (Subtarget->isMClass()) {
4337     StringRef Flags = "", Reg = SpecialReg;
4338     if (Reg.endswith("_ns")) {
4339       Flags = "ns";
4340       Reg = Reg.drop_back(3);
4341     }
4342
4343     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4344     if (SYSmValue == -1)
4345       return false;
4346
4347     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4348                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4349                       N->getOperand(0) };
4350     ReplaceNode(
4351         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4352     return true;
4353   }
4354
4355   // Here we know the target is not M Class so we need to check if it is one
4356   // of the remaining possible values which are apsr, cpsr or spsr.
4357   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4358     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4359             N->getOperand(0) };
4360     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4361                                           DL, MVT::i32, MVT::Other, Ops));
4362     return true;
4363   }
4364
4365   if (SpecialReg == "spsr") {
4366     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4367             N->getOperand(0) };
4368     ReplaceNode(
4369         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4370                                   MVT::i32, MVT::Other, Ops));
4371     return true;
4372   }
4373
4374   return false;
4375 }
4376
4377 // Lower the write_register intrinsic to ARM specific DAG nodes
4378 // using the supplied metadata string to select the instruction node to use
4379 // and the registers/masks to use in the nodes
4380 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4381   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4382   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4383   bool IsThumb2 = Subtarget->isThumb2();
4384   SDLoc DL(N);
4385
4386   std::vector<SDValue> Ops;
4387   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4388
4389   if (!Ops.empty()) {
4390     // If the special register string was constructed of fields (as defined
4391     // in the ACLE) then need to lower to MCR node (32 bit) or
4392     // MCRR node(64 bit), we can make the distinction based on the number of
4393     // operands we have.
4394     unsigned Opcode;
4395     if (Ops.size() == 5) {
4396       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4397       Ops.insert(Ops.begin()+2, N->getOperand(2));
4398     } else {
4399       assert(Ops.size() == 3 &&
4400               "Invalid number of fields in special register string.");
4401       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4402       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4403       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4404     }
4405
4406     Ops.push_back(getAL(CurDAG, DL));
4407     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4408     Ops.push_back(N->getOperand(0));
4409
4410     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4411     return true;
4412   }
4413
4414   std::string SpecialReg = RegString->getString().lower();
4415   int BankedReg = getBankedRegisterMask(SpecialReg);
4416   if (BankedReg != -1) {
4417     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4418             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4419             N->getOperand(0) };
4420     ReplaceNode(
4421         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4422                                   DL, MVT::Other, Ops));
4423     return true;
4424   }
4425
4426   // The VFP registers are written to by creating SelectionDAG nodes with
4427   // opcodes corresponding to the register that is being written. So we switch
4428   // on the string to find which opcode we need to use.
4429   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4430                     .Case("fpscr", ARM::VMSR)
4431                     .Case("fpexc", ARM::VMSR_FPEXC)
4432                     .Case("fpsid", ARM::VMSR_FPSID)
4433                     .Case("fpinst", ARM::VMSR_FPINST)
4434                     .Case("fpinst2", ARM::VMSR_FPINST2)
4435                     .Default(0);
4436
4437   if (Opcode) {
4438     if (!Subtarget->hasVFP2())
4439       return false;
4440     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4441             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4442     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4443     return true;
4444   }
4445
4446   std::pair<StringRef, StringRef> Fields;
4447   Fields = StringRef(SpecialReg).rsplit('_');
4448   std::string Reg = Fields.first.str();
4449   StringRef Flags = Fields.second;
4450
4451   // If the target was M Class then need to validate the special register value
4452   // and retrieve the mask for use in the instruction node.
4453   if (Subtarget->isMClass()) {
4454     // basepri_max gets split so need to correct Reg and Flags.
4455     if (SpecialReg == "basepri_max") {
4456       Reg = SpecialReg;
4457       Flags = "";
4458     }
4459     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4460     if (SYSmValue == -1)
4461       return false;
4462
4463     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4464                       N->getOperand(2), getAL(CurDAG, DL),
4465                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4466     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4467     return true;
4468   }
4469
4470   // We then check to see if a valid mask can be constructed for one of the
4471   // register string values permitted for the A and R class cores. These values
4472   // are apsr, spsr and cpsr; these are also valid on older cores.
4473   int Mask = getARClassRegisterMask(Reg, Flags);
4474   if (Mask != -1) {
4475     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4476             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4477             N->getOperand(0) };
4478     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4479                                           DL, MVT::Other, Ops));
4480     return true;
4481   }
4482
4483   return false;
4484 }
4485
4486 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4487   std::vector<SDValue> AsmNodeOperands;
4488   unsigned Flag, Kind;
4489   bool Changed = false;
4490   unsigned NumOps = N->getNumOperands();
4491
4492   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4493   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4494   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4495   // respectively. Since there is no constraint to explicitly specify a
4496   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4497   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4498   // them into a GPRPair.
4499
4500   SDLoc dl(N);
4501   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4502                                    : SDValue(nullptr,0);
4503
4504   SmallVector<bool, 8> OpChanged;
4505   // Glue node will be appended late.
4506   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4507     SDValue op = N->getOperand(i);
4508     AsmNodeOperands.push_back(op);
4509
4510     if (i < InlineAsm::Op_FirstOperand)
4511       continue;
4512
4513     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4514       Flag = C->getZExtValue();
4515       Kind = InlineAsm::getKind(Flag);
4516     }
4517     else
4518       continue;
4519
4520     // Immediate operands to inline asm in the SelectionDAG are modeled with
4521     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4522     // the second is a constant with the value of the immediate. If we get here
4523     // and we have a Kind_Imm, skip the next operand, and continue.
4524     if (Kind == InlineAsm::Kind_Imm) {
4525       SDValue op = N->getOperand(++i);
4526       AsmNodeOperands.push_back(op);
4527       continue;
4528     }
4529
4530     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4531     if (NumRegs)
4532       OpChanged.push_back(false);
4533
4534     unsigned DefIdx = 0;
4535     bool IsTiedToChangedOp = false;
4536     // If it's a use that is tied with a previous def, it has no
4537     // reg class constraint.
4538     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4539       IsTiedToChangedOp = OpChanged[DefIdx];
4540
4541     // Memory operands to inline asm in the SelectionDAG are modeled with two
4542     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4543     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4544     // it doesn't get misinterpreted), and continue. We do this here because
4545     // it's important to update the OpChanged array correctly before moving on.
4546     if (Kind == InlineAsm::Kind_Mem) {
4547       SDValue op = N->getOperand(++i);
4548       AsmNodeOperands.push_back(op);
4549       continue;
4550     }
4551
4552     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4553         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4554       continue;
4555
4556     unsigned RC;
4557     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4558     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4559         || NumRegs != 2)
4560       continue;
4561
4562     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4563     SDValue V0 = N->getOperand(i+1);
4564     SDValue V1 = N->getOperand(i+2);
4565     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4566     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4567     SDValue PairedReg;
4568     MachineRegisterInfo &MRI = MF->getRegInfo();
4569
4570     if (Kind == InlineAsm::Kind_RegDef ||
4571         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4572       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4573       // the original GPRs.
4574
4575       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4576       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4577       SDValue Chain = SDValue(N,0);
4578
4579       SDNode *GU = N->getGluedUser();
4580       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4581                                                Chain.getValue(1));
4582
4583       // Extract values from a GPRPair reg and copy to the original GPR reg.
4584       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4585                                                     RegCopy);
4586       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4587                                                     RegCopy);
4588       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4589                                         RegCopy.getValue(1));
4590       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4591
4592       // Update the original glue user.
4593       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4594       Ops.push_back(T1.getValue(1));
4595       CurDAG->UpdateNodeOperands(GU, Ops);
4596     }
4597     else {
4598       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4599       // GPRPair and then pass the GPRPair to the inline asm.
4600       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4601
4602       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4603       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4604                                           Chain.getValue(1));
4605       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4606                                           T0.getValue(1));
4607       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4608
4609       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4610       // i32 VRs of inline asm with it.
4611       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4612       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4613       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4614
4615       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4616       Glue = Chain.getValue(1);
4617     }
4618
4619     Changed = true;
4620
4621     if(PairedReg.getNode()) {
4622       OpChanged[OpChanged.size() -1 ] = true;
4623       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4624       if (IsTiedToChangedOp)
4625         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4626       else
4627         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4628       // Replace the current flag.
4629       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4630           Flag, dl, MVT::i32);
4631       // Add the new register node and skip the original two GPRs.
4632       AsmNodeOperands.push_back(PairedReg);
4633       // Skip the next two GPRs.
4634       i += 2;
4635     }
4636   }
4637
4638   if (Glue.getNode())
4639     AsmNodeOperands.push_back(Glue);
4640   if (!Changed)
4641     return false;
4642
4643   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4644       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4645   New->setNodeId(-1);
4646   ReplaceNode(N, New.getNode());
4647   return true;
4648 }
4649
4650
4651 bool ARMDAGToDAGISel::
4652 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4653                              std::vector<SDValue> &OutOps) {
4654   switch(ConstraintID) {
4655   default:
4656     llvm_unreachable("Unexpected asm memory constraint");
4657   case InlineAsm::Constraint_i:
4658     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4659     //        be an immediate and not a memory constraint.
4660     LLVM_FALLTHROUGH;
4661   case InlineAsm::Constraint_m:
4662   case InlineAsm::Constraint_o:
4663   case InlineAsm::Constraint_Q:
4664   case InlineAsm::Constraint_Um:
4665   case InlineAsm::Constraint_Un:
4666   case InlineAsm::Constraint_Uq:
4667   case InlineAsm::Constraint_Us:
4668   case InlineAsm::Constraint_Ut:
4669   case InlineAsm::Constraint_Uv:
4670   case InlineAsm::Constraint_Uy:
4671     // Require the address to be in a register.  That is safe for all ARM
4672     // variants and it is hard to do anything much smarter without knowing
4673     // how the operand is used.
4674     OutOps.push_back(Op);
4675     return false;
4676   }
4677   return true;
4678 }
4679
4680 /// createARMISelDag - This pass converts a legalized DAG into a
4681 /// ARM-specific DAG, ready for instruction scheduling.
4682 ///
4683 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4684                                      CodeGenOpt::Level OptLevel) {
4685   return new ARMDAGToDAGISel(TM, OptLevel);
4686 }