contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "Utils/ARMBaseInfo.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/CodeGen/SelectionDAGISel.h"
  26 #include "llvm/CodeGen/TargetLowering.h"
  27 #include "llvm/IR/CallingConv.h"
  28 #include "llvm/IR/Constants.h"
  29 #include "llvm/IR/DerivedTypes.h"
  30 #include "llvm/IR/Function.h"
  31 #include "llvm/IR/Intrinsics.h"
  32 #include "llvm/IR/LLVMContext.h"
  33 #include "llvm/Support/CommandLine.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 //===--------------------------------------------------------------------===//
  48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  49 /// instructions for SelectionDAG operations.
  50 ///
  51 namespace {
  52
  53 class ARMDAGToDAGISel : public SelectionDAGISel {
  54   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  55   /// make the right decision when generating code for different targets.
  56   const ARMSubtarget *Subtarget;
  57
  58 public:
  59   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  60       : SelectionDAGISel(tm, OptLevel) {}
  61
  62   bool runOnMachineFunction(MachineFunction &MF) override {
  63     // Reset the subtarget each time through.
  64     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  65     SelectionDAGISel::runOnMachineFunction(MF);
  66     return true;
  67   }
  68
  69   StringRef getPassName() const override { return "ARM Instruction Selection"; }
  70
  71   void PreprocessISelDAG() override;
  72
  73   /// getI32Imm - Return a target constant of type i32 with the specified
  74   /// value.
  75   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  76     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  77   }
  78
  79   void Select(SDNode *N) override;
  80
  81   bool hasNoVMLxHazardUse(SDNode *N) const;
  82   bool isShifterOpProfitable(const SDValue &Shift,
  83                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  84   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  85                                SDValue &B, SDValue &C,
  86                                bool CheckProfitability = true);
  87   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  88                                SDValue &B, bool CheckProfitability = true);
  89   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  90                                     SDValue &B, SDValue &C) {
  91     // Don't apply the profitability check
  92     return SelectRegShifterOperand(N, A, B, C, false);
  93   }
  94   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
  95                                     SDValue &B) {
  96     // Don't apply the profitability check
  97     return SelectImmShifterOperand(N, A, B, false);
  98   }
  99
 100   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 101   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 102
 103   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 104     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 105     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 106     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 107     return true;
 108   }
 109
 110   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 111                              SDValue &Offset, SDValue &Opc);
 112   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 113                              SDValue &Offset, SDValue &Opc);
 114   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 115                              SDValue &Offset, SDValue &Opc);
 116   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 117   bool SelectAddrMode3(SDValue N, SDValue &Base,
 118                        SDValue &Offset, SDValue &Opc);
 119   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 120                              SDValue &Offset, SDValue &Opc);
 121   bool SelectAddrMode5(SDValue N, SDValue &Base,
 122                        SDValue &Offset);
 123   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 124   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 125
 126   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 127
 128   // Thumb Addressing Modes:
 129   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 130   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 131                                 SDValue &OffImm);
 132   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 133                                  SDValue &OffImm);
 134   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 135                                  SDValue &OffImm);
 136   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 137                                  SDValue &OffImm);
 138   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 139
 140   // Thumb 2 Addressing Modes:
 141   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 142   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 143                             SDValue &OffImm);
 144   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 145                                  SDValue &OffImm);
 146   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 147                              SDValue &OffReg, SDValue &ShImm);
 148   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 149
 150   inline bool is_so_imm(unsigned Imm) const {
 151     return ARM_AM::getSOImmVal(Imm) != -1;
 152   }
 153
 154   inline bool is_so_imm_not(unsigned Imm) const {
 155     return ARM_AM::getSOImmVal(~Imm) != -1;
 156   }
 157
 158   inline bool is_t2_so_imm(unsigned Imm) const {
 159     return ARM_AM::getT2SOImmVal(Imm) != -1;
 160   }
 161
 162   inline bool is_t2_so_imm_not(unsigned Imm) const {
 163     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 164   }
 165
 166   // Include the pieces autogenerated from the target description.
 167 #include "ARMGenDAGISel.inc"
 168
 169 private:
 170   void transferMemOperands(SDNode *Src, SDNode *Dst);
 171
 172   /// Indexed (pre/post inc/dec) load matching code for ARM.
 173   bool tryARMIndexedLoad(SDNode *N);
 174   bool tryT1IndexedLoad(SDNode *N);
 175   bool tryT2IndexedLoad(SDNode *N);
 176
 177   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 178   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 179   /// loads of D registers and even subregs and odd subregs of Q registers.
 180   /// For NumVecs <= 2, QOpcodes1 is not used.
 181   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 182                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 183                  const uint16_t *QOpcodes1);
 184
 185   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 186   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 187   /// stores of D registers and even subregs and odd subregs of Q registers.
 188   /// For NumVecs <= 2, QOpcodes1 is not used.
 189   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 190                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
 191                  const uint16_t *QOpcodes1);
 192
 193   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 194   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 195   /// load/store of D registers and Q registers.
 196   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 197                        unsigned NumVecs, const uint16_t *DOpcodes,
 198                        const uint16_t *QOpcodes);
 199
 200   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 201   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
 202   /// for loading D registers.  (Q registers are not supported.)
 203   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 204                     const uint16_t *DOpcodes,
 205                     const uint16_t *QOpcodes = nullptr);
 206
 207   /// Try to select SBFX/UBFX instructions for ARM.
 208   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 209
 210   // Select special operations if node forms integer ABS pattern
 211   bool tryABSOp(SDNode *N);
 212
 213   bool tryReadRegister(SDNode *N);
 214   bool tryWriteRegister(SDNode *N);
 215
 216   bool tryInlineAsm(SDNode *N);
 217
 218   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 219
 220   void SelectCMP_SWAP(SDNode *N);
 221
 222   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 223   /// inline asm expressions.
 224   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 225                                     std::vector<SDValue> &OutOps) override;
 226
 227   // Form pairs of consecutive R, S, D, or Q registers.
 228   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 229   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 230   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 231   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 232
 233   // Form sequences of 4 consecutive S, D, or Q registers.
 234   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 235   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 236   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 237
 238   // Get the alignment operand for a NEON VLD or VST instruction.
 239   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
 240                         bool is64BitVector);
 241
 242   /// Returns the number of instructions required to materialize the given
 243   /// constant in a register, or 3 if a literal pool load is needed.
 244   unsigned ConstantMaterializationCost(unsigned Val) const;
 245
 246   /// Checks if N is a multiplication by a constant where we can extract out a
 247   /// power of two from the constant so that it can be used in a shift, but only
 248   /// if it simplifies the materialization of the constant. Returns true if it
 249   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 250   /// out and to NewMulConst the new constant to be multiplied by.
 251   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 252                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 253
 254   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 255   /// selected when N would have been selected.
 256   void replaceDAGValue(const SDValue &N, SDValue M);
 257 };
 258 }
 259
 260 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 261 /// operand. If so Imm will receive the 32-bit value.
 262 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 263   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 264     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 265     return true;
 266   }
 267   return false;
 268 }
 269
 270 // isInt32Immediate - This method tests to see if a constant operand.
 271 // If so Imm will receive the 32 bit value.
 272 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 273   return isInt32Immediate(N.getNode(), Imm);
 274 }
 275
 276 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 277 // opcode and that it has a immediate integer right operand.
 278 // If so Imm will receive the 32 bit value.
 279 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 280   return N->getOpcode() == Opc &&
 281          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 282 }
 283
 284 /// \brief Check whether a particular node is a constant value representable as
 285 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 286 ///
 287 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 288 static bool isScaledConstantInRange(SDValue Node, int Scale,
 289                                     int RangeMin, int RangeMax,
 290                                     int &ScaledConstant) {
 291   assert(Scale > 0 && "Invalid scale!");
 292
 293   // Check that this is a constant.
 294   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 295   if (!C)
 296     return false;
 297
 298   ScaledConstant = (int) C->getZExtValue();
 299   if ((ScaledConstant % Scale) != 0)
 300     return false;
 301
 302   ScaledConstant /= Scale;
 303   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 304 }
 305
 306 void ARMDAGToDAGISel::PreprocessISelDAG() {
 307   if (!Subtarget->hasV6T2Ops())
 308     return;
 309
 310   bool isThumb2 = Subtarget->isThumb();
 311   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 312        E = CurDAG->allnodes_end(); I != E; ) {
 313     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 314
 315     if (N->getOpcode() != ISD::ADD)
 316       continue;
 317
 318     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 319     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 320     // trailing zeros, e.g. 1020.
 321     // Transform the expression to
 322     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 323     // of trailing zeros of c2. The left shift would be folded as an shifter
 324     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 325     // node (UBFX).
 326
 327     SDValue N0 = N->getOperand(0);
 328     SDValue N1 = N->getOperand(1);
 329     unsigned And_imm = 0;
 330     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 331       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 332         std::swap(N0, N1);
 333     }
 334     if (!And_imm)
 335       continue;
 336
 337     // Check if the AND mask is an immediate of the form: 000.....1111111100
 338     unsigned TZ = countTrailingZeros(And_imm);
 339     if (TZ != 1 && TZ != 2)
 340       // Be conservative here. Shifter operands aren't always free. e.g. On
 341       // Swift, left shifter operand of 1 / 2 for free but others are not.
 342       // e.g.
 343       //  ubfx   r3, r1, #16, #8
 344       //  ldr.w  r3, [r0, r3, lsl #2]
 345       // vs.
 346       //  mov.w  r9, #1020
 347       //  and.w  r2, r9, r1, lsr #14
 348       //  ldr    r2, [r0, r2]
 349       continue;
 350     And_imm >>= TZ;
 351     if (And_imm & (And_imm + 1))
 352       continue;
 353
 354     // Look for (and (srl X, c1), c2).
 355     SDValue Srl = N1.getOperand(0);
 356     unsigned Srl_imm = 0;
 357     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 358         (Srl_imm <= 2))
 359       continue;
 360
 361     // Make sure first operand is not a shifter operand which would prevent
 362     // folding of the left shift.
 363     SDValue CPTmp0;
 364     SDValue CPTmp1;
 365     SDValue CPTmp2;
 366     if (isThumb2) {
 367       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 368         continue;
 369     } else {
 370       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 371           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 372         continue;
 373     }
 374
 375     // Now make the transformation.
 376     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 377                           Srl.getOperand(0),
 378                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 379                                               MVT::i32));
 380     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 381                          Srl,
 382                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 383     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 384                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 385     CurDAG->UpdateNodeOperands(N, N0, N1);
 386   }
 387 }
 388
 389 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 390 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 391 /// least on current ARM implementations) which should be avoidded.
 392 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 393   if (OptLevel == CodeGenOpt::None)
 394     return true;
 395
 396   if (!Subtarget->hasVMLxHazards())
 397     return true;
 398
 399   if (!N->hasOneUse())
 400     return false;
 401
 402   SDNode *Use = *N->use_begin();
 403   if (Use->getOpcode() == ISD::CopyToReg)
 404     return true;
 405   if (Use->isMachineOpcode()) {
 406     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 407         CurDAG->getSubtarget().getInstrInfo());
 408
 409     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 410     if (MCID.mayStore())
 411       return true;
 412     unsigned Opcode = MCID.getOpcode();
 413     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 414       return true;
 415     // vmlx feeding into another vmlx. We actually want to unfold
 416     // the use later in the MLxExpansion pass. e.g.
 417     // vmla
 418     // vmla (stall 8 cycles)
 419     //
 420     // vmul (5 cycles)
 421     // vadd (5 cycles)
 422     // vmla
 423     // This adds up to about 18 - 19 cycles.
 424     //
 425     // vmla
 426     // vmul (stall 4 cycles)
 427     // vadd adds up to about 14 cycles.
 428     return TII->isFpMLxInstruction(Opcode);
 429   }
 430
 431   return false;
 432 }
 433
 434 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 435                                             ARM_AM::ShiftOpc ShOpcVal,
 436                                             unsigned ShAmt) {
 437   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 438     return true;
 439   if (Shift.hasOneUse())
 440     return true;
 441   // R << 2 is free.
 442   return ShOpcVal == ARM_AM::lsl &&
 443          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 444 }
 445
 446 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 447   if (Subtarget->isThumb()) {
 448     if (Val <= 255) return 1;                               // MOV
 449     if (Subtarget->hasV6T2Ops() &&
 450         (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
 451       return 1; // MOVW
 452     if (Val <= 510) return 2;                               // MOV + ADDi8
 453     if (~Val <= 255) return 2;                              // MOV + MVN
 454     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 455   } else {
 456     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 457     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 458     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 459     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 460   }
 461   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 462   return 3; // Literal pool load
 463 }
 464
 465 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 466                                              unsigned MaxShift,
 467                                              unsigned &PowerOfTwo,
 468                                              SDValue &NewMulConst) const {
 469   assert(N.getOpcode() == ISD::MUL);
 470   assert(MaxShift > 0);
 471
 472   // If the multiply is used in more than one place then changing the constant
 473   // will make other uses incorrect, so don't.
 474   if (!N.hasOneUse()) return false;
 475   // Check if the multiply is by a constant
 476   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 477   if (!MulConst) return false;
 478   // If the constant is used in more than one place then modifying it will mean
 479   // we need to materialize two constants instead of one, which is a bad idea.
 480   if (!MulConst->hasOneUse()) return false;
 481   unsigned MulConstVal = MulConst->getZExtValue();
 482   if (MulConstVal == 0) return false;
 483
 484   // Find the largest power of 2 that MulConstVal is a multiple of
 485   PowerOfTwo = MaxShift;
 486   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 487     --PowerOfTwo;
 488     if (PowerOfTwo == 0) return false;
 489   }
 490
 491   // Only optimise if the new cost is better
 492   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 493   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 494   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 495   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 496   return NewCost < OldCost;
 497 }
 498
 499 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 500   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 501   CurDAG->ReplaceAllUsesWith(N, M);
 502 }
 503
 504 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 505                                               SDValue &BaseReg,
 506                                               SDValue &Opc,
 507                                               bool CheckProfitability) {
 508   if (DisableShifterOp)
 509     return false;
 510
 511   // If N is a multiply-by-constant and it's profitable to extract a shift and
 512   // use it in a shifted operand do so.
 513   if (N.getOpcode() == ISD::MUL) {
 514     unsigned PowerOfTwo = 0;
 515     SDValue NewMulConst;
 516     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 517       HandleSDNode Handle(N);
 518       SDLoc Loc(N);
 519       replaceDAGValue(N.getOperand(1), NewMulConst);
 520       BaseReg = Handle.getValue();
 521       Opc = CurDAG->getTargetConstant(
 522           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
 523       return true;
 524     }
 525   }
 526
 527   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 528
 529   // Don't match base register only case. That is matched to a separate
 530   // lower complexity pattern with explicit register operand.
 531   if (ShOpcVal == ARM_AM::no_shift) return false;
 532
 533   BaseReg = N.getOperand(0);
 534   unsigned ShImmVal = 0;
 535   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 536   if (!RHS) return false;
 537   ShImmVal = RHS->getZExtValue() & 31;
 538   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 539                                   SDLoc(N), MVT::i32);
 540   return true;
 541 }
 542
 543 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 544                                               SDValue &BaseReg,
 545                                               SDValue &ShReg,
 546                                               SDValue &Opc,
 547                                               bool CheckProfitability) {
 548   if (DisableShifterOp)
 549     return false;
 550
 551   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 552
 553   // Don't match base register only case. That is matched to a separate
 554   // lower complexity pattern with explicit register operand.
 555   if (ShOpcVal == ARM_AM::no_shift) return false;
 556
 557   BaseReg = N.getOperand(0);
 558   unsigned ShImmVal = 0;
 559   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 560   if (RHS) return false;
 561
 562   ShReg = N.getOperand(1);
 563   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 564     return false;
 565   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 566                                   SDLoc(N), MVT::i32);
 567   return true;
 568 }
 569
 570
 571 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 572                                           SDValue &Base,
 573                                           SDValue &OffImm) {
 574   // Match simple R + imm12 operands.
 575
 576   // Base only.
 577   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 578       !CurDAG->isBaseWithConstantOffset(N)) {
 579     if (N.getOpcode() == ISD::FrameIndex) {
 580       // Match frame index.
 581       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 582       Base = CurDAG->getTargetFrameIndex(
 583           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 584       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 585       return true;
 586     }
 587
 588     if (N.getOpcode() == ARMISD::Wrapper &&
 589         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 590         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 591         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 592       Base = N.getOperand(0);
 593     } else
 594       Base = N;
 595     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 596     return true;
 597   }
 598
 599   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 600     int RHSC = (int)RHS->getSExtValue();
 601     if (N.getOpcode() == ISD::SUB)
 602       RHSC = -RHSC;
 603
 604     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 605       Base   = N.getOperand(0);
 606       if (Base.getOpcode() == ISD::FrameIndex) {
 607         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 608         Base = CurDAG->getTargetFrameIndex(
 609             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 610       }
 611       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 612       return true;
 613     }
 614   }
 615
 616   // Base only.
 617   Base = N;
 618   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 619   return true;
 620 }
 621
 622
 623
 624 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 625                                       SDValue &Opc) {
 626   if (N.getOpcode() == ISD::MUL &&
 627       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 628     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 629       // X * [3,5,9] -> X + X * [2,4,8] etc.
 630       int RHSC = (int)RHS->getZExtValue();
 631       if (RHSC & 1) {
 632         RHSC = RHSC & ~1;
 633         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 634         if (RHSC < 0) {
 635           AddSub = ARM_AM::sub;
 636           RHSC = - RHSC;
 637         }
 638         if (isPowerOf2_32(RHSC)) {
 639           unsigned ShAmt = Log2_32(RHSC);
 640           Base = Offset = N.getOperand(0);
 641           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 642                                                             ARM_AM::lsl),
 643                                           SDLoc(N), MVT::i32);
 644           return true;
 645         }
 646       }
 647     }
 648   }
 649
 650   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 651       // ISD::OR that is equivalent to an ISD::ADD.
 652       !CurDAG->isBaseWithConstantOffset(N))
 653     return false;
 654
 655   // Leave simple R +/- imm12 operands for LDRi12
 656   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 657     int RHSC;
 658     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 659                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 660       return false;
 661   }
 662
 663   // Otherwise this is R +/- [possibly shifted] R.
 664   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 665   ARM_AM::ShiftOpc ShOpcVal =
 666     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 667   unsigned ShAmt = 0;
 668
 669   Base   = N.getOperand(0);
 670   Offset = N.getOperand(1);
 671
 672   if (ShOpcVal != ARM_AM::no_shift) {
 673     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 674     // it.
 675     if (ConstantSDNode *Sh =
 676            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 677       ShAmt = Sh->getZExtValue();
 678       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 679         Offset = N.getOperand(1).getOperand(0);
 680       else {
 681         ShAmt = 0;
 682         ShOpcVal = ARM_AM::no_shift;
 683       }
 684     } else {
 685       ShOpcVal = ARM_AM::no_shift;
 686     }
 687   }
 688
 689   // Try matching (R shl C) + (R).
 690   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 691       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 692         N.getOperand(0).hasOneUse())) {
 693     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 694     if (ShOpcVal != ARM_AM::no_shift) {
 695       // Check to see if the RHS of the shift is a constant, if not, we can't
 696       // fold it.
 697       if (ConstantSDNode *Sh =
 698           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 699         ShAmt = Sh->getZExtValue();
 700         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 701           Offset = N.getOperand(0).getOperand(0);
 702           Base = N.getOperand(1);
 703         } else {
 704           ShAmt = 0;
 705           ShOpcVal = ARM_AM::no_shift;
 706         }
 707       } else {
 708         ShOpcVal = ARM_AM::no_shift;
 709       }
 710     }
 711   }
 712
 713   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 714   // and use it in a shifted operand do so.
 715   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 716     unsigned PowerOfTwo = 0;
 717     SDValue NewMulConst;
 718     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 719       HandleSDNode Handle(Offset);
 720       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 721       Offset = Handle.getValue();
 722       ShAmt = PowerOfTwo;
 723       ShOpcVal = ARM_AM::lsl;
 724     }
 725   }
 726
 727   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 728                                   SDLoc(N), MVT::i32);
 729   return true;
 730 }
 731
 732 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 733                                             SDValue &Offset, SDValue &Opc) {
 734   unsigned Opcode = Op->getOpcode();
 735   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 736     ? cast<LoadSDNode>(Op)->getAddressingMode()
 737     : cast<StoreSDNode>(Op)->getAddressingMode();
 738   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 739     ? ARM_AM::add : ARM_AM::sub;
 740   int Val;
 741   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 742     return false;
 743
 744   Offset = N;
 745   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 746   unsigned ShAmt = 0;
 747   if (ShOpcVal != ARM_AM::no_shift) {
 748     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 749     // it.
 750     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 751       ShAmt = Sh->getZExtValue();
 752       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 753         Offset = N.getOperand(0);
 754       else {
 755         ShAmt = 0;
 756         ShOpcVal = ARM_AM::no_shift;
 757       }
 758     } else {
 759       ShOpcVal = ARM_AM::no_shift;
 760     }
 761   }
 762
 763   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 764                                   SDLoc(N), MVT::i32);
 765   return true;
 766 }
 767
 768 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 769                                             SDValue &Offset, SDValue &Opc) {
 770   unsigned Opcode = Op->getOpcode();
 771   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 772     ? cast<LoadSDNode>(Op)->getAddressingMode()
 773     : cast<StoreSDNode>(Op)->getAddressingMode();
 774   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 775     ? ARM_AM::add : ARM_AM::sub;
 776   int Val;
 777   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 778     if (AddSub == ARM_AM::sub) Val *= -1;
 779     Offset = CurDAG->getRegister(0, MVT::i32);
 780     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 781     return true;
 782   }
 783
 784   return false;
 785 }
 786
 787
 788 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 789                                             SDValue &Offset, SDValue &Opc) {
 790   unsigned Opcode = Op->getOpcode();
 791   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 792     ? cast<LoadSDNode>(Op)->getAddressingMode()
 793     : cast<StoreSDNode>(Op)->getAddressingMode();
 794   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 795     ? ARM_AM::add : ARM_AM::sub;
 796   int Val;
 797   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 798     Offset = CurDAG->getRegister(0, MVT::i32);
 799     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 800                                                       ARM_AM::no_shift),
 801                                     SDLoc(Op), MVT::i32);
 802     return true;
 803   }
 804
 805   return false;
 806 }
 807
 808 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 809   Base = N;
 810   return true;
 811 }
 812
 813 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 814                                       SDValue &Base, SDValue &Offset,
 815                                       SDValue &Opc) {
 816   if (N.getOpcode() == ISD::SUB) {
 817     // X - C  is canonicalize to X + -C, no need to handle it here.
 818     Base = N.getOperand(0);
 819     Offset = N.getOperand(1);
 820     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 821                                     MVT::i32);
 822     return true;
 823   }
 824
 825   if (!CurDAG->isBaseWithConstantOffset(N)) {
 826     Base = N;
 827     if (N.getOpcode() == ISD::FrameIndex) {
 828       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 829       Base = CurDAG->getTargetFrameIndex(
 830           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 831     }
 832     Offset = CurDAG->getRegister(0, MVT::i32);
 833     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 834                                     MVT::i32);
 835     return true;
 836   }
 837
 838   // If the RHS is +/- imm8, fold into addr mode.
 839   int RHSC;
 840   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 841                               -256 + 1, 256, RHSC)) { // 8 bits.
 842     Base = N.getOperand(0);
 843     if (Base.getOpcode() == ISD::FrameIndex) {
 844       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 845       Base = CurDAG->getTargetFrameIndex(
 846           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 847     }
 848     Offset = CurDAG->getRegister(0, MVT::i32);
 849
 850     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 851     if (RHSC < 0) {
 852       AddSub = ARM_AM::sub;
 853       RHSC = -RHSC;
 854     }
 855     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 856                                     MVT::i32);
 857     return true;
 858   }
 859
 860   Base = N.getOperand(0);
 861   Offset = N.getOperand(1);
 862   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 863                                   MVT::i32);
 864   return true;
 865 }
 866
 867 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 868                                             SDValue &Offset, SDValue &Opc) {
 869   unsigned Opcode = Op->getOpcode();
 870   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 871     ? cast<LoadSDNode>(Op)->getAddressingMode()
 872     : cast<StoreSDNode>(Op)->getAddressingMode();
 873   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 874     ? ARM_AM::add : ARM_AM::sub;
 875   int Val;
 876   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 877     Offset = CurDAG->getRegister(0, MVT::i32);
 878     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 879                                     MVT::i32);
 880     return true;
 881   }
 882
 883   Offset = N;
 884   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 885                                   MVT::i32);
 886   return true;
 887 }
 888
 889 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 890                                       SDValue &Base, SDValue &Offset) {
 891   if (!CurDAG->isBaseWithConstantOffset(N)) {
 892     Base = N;
 893     if (N.getOpcode() == ISD::FrameIndex) {
 894       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 895       Base = CurDAG->getTargetFrameIndex(
 896           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 897     } else if (N.getOpcode() == ARMISD::Wrapper &&
 898                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 899                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
 900                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 901       Base = N.getOperand(0);
 902     }
 903     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 904                                        SDLoc(N), MVT::i32);
 905     return true;
 906   }
 907
 908   // If the RHS is +/- imm8, fold into addr mode.
 909   int RHSC;
 910   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
 911                               -256 + 1, 256, RHSC)) {
 912     Base = N.getOperand(0);
 913     if (Base.getOpcode() == ISD::FrameIndex) {
 914       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 915       Base = CurDAG->getTargetFrameIndex(
 916           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 917     }
 918
 919     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 920     if (RHSC < 0) {
 921       AddSub = ARM_AM::sub;
 922       RHSC = -RHSC;
 923     }
 924     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 925                                        SDLoc(N), MVT::i32);
 926     return true;
 927   }
 928
 929   Base = N;
 930   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 931                                      SDLoc(N), MVT::i32);
 932   return true;
 933 }
 934
 935 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 936                                       SDValue &Align) {
 937   Addr = N;
 938
 939   unsigned Alignment = 0;
 940
 941   MemSDNode *MemN = cast<MemSDNode>(Parent);
 942
 943   if (isa<LSBaseSDNode>(MemN) ||
 944       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
 945         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
 946        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
 947     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 948     // The maximum alignment is equal to the memory size being referenced.
 949     unsigned MMOAlign = MemN->getAlignment();
 950     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
 951     if (MMOAlign >= MemSize && MemSize > 1)
 952       Alignment = MemSize;
 953   } else {
 954     // All other uses of addrmode6 are for intrinsics.  For now just record
 955     // the raw alignment value; it will be refined later based on the legal
 956     // alignment operands for the intrinsic.
 957     Alignment = MemN->getAlignment();
 958   }
 959
 960   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
 961   return true;
 962 }
 963
 964 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
 965                                             SDValue &Offset) {
 966   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
 967   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
 968   if (AM != ISD::POST_INC)
 969     return false;
 970   Offset = N;
 971   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
 972     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
 973       Offset = CurDAG->getRegister(0, MVT::i32);
 974   }
 975   return true;
 976 }
 977
 978 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
 979                                        SDValue &Offset, SDValue &Label) {
 980   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
 981     Offset = N.getOperand(0);
 982     SDValue N1 = N.getOperand(1);
 983     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
 984                                       SDLoc(N), MVT::i32);
 985     return true;
 986   }
 987
 988   return false;
 989 }
 990
 991
 992 //===----------------------------------------------------------------------===//
 993 //                         Thumb Addressing Modes
 994 //===----------------------------------------------------------------------===//
 995
 996 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
 997                                             SDValue &Base, SDValue &Offset){
 998   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
 999     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1000     if (!NC || !NC->isNullValue())
1001       return false;
1002
1003     Base = Offset = N;
1004     return true;
1005   }
1006
1007   Base = N.getOperand(0);
1008   Offset = N.getOperand(1);
1009   return true;
1010 }
1011
1012 bool
1013 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1014                                           SDValue &Base, SDValue &OffImm) {
1015   if (!CurDAG->isBaseWithConstantOffset(N)) {
1016     if (N.getOpcode() == ISD::ADD) {
1017       return false; // We want to select register offset instead
1018     } else if (N.getOpcode() == ARMISD::Wrapper &&
1019         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1020         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1021         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1022         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1023       Base = N.getOperand(0);
1024     } else {
1025       Base = N;
1026     }
1027
1028     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1029     return true;
1030   }
1031
1032   // If the RHS is + imm5 * scale, fold into addr mode.
1033   int RHSC;
1034   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1035     Base = N.getOperand(0);
1036     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1037     return true;
1038   }
1039
1040   // Offset is too large, so use register offset instead.
1041   return false;
1042 }
1043
1044 bool
1045 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1046                                            SDValue &OffImm) {
1047   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1048 }
1049
1050 bool
1051 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1052                                            SDValue &OffImm) {
1053   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1054 }
1055
1056 bool
1057 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1058                                            SDValue &OffImm) {
1059   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1060 }
1061
1062 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1063                                             SDValue &Base, SDValue &OffImm) {
1064   if (N.getOpcode() == ISD::FrameIndex) {
1065     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1066     // Only multiples of 4 are allowed for the offset, so the frame object
1067     // alignment must be at least 4.
1068     MachineFrameInfo &MFI = MF->getFrameInfo();
1069     if (MFI.getObjectAlignment(FI) < 4)
1070       MFI.setObjectAlignment(FI, 4);
1071     Base = CurDAG->getTargetFrameIndex(
1072         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1073     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1074     return true;
1075   }
1076
1077   if (!CurDAG->isBaseWithConstantOffset(N))
1078     return false;
1079
1080   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1081   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1082       (LHSR && LHSR->getReg() == ARM::SP)) {
1083     // If the RHS is + imm8 * scale, fold into addr mode.
1084     int RHSC;
1085     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1086       Base = N.getOperand(0);
1087       if (Base.getOpcode() == ISD::FrameIndex) {
1088         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1089         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1090         // indexed by the LHS must be 4-byte aligned.
1091         MachineFrameInfo &MFI = MF->getFrameInfo();
1092         if (MFI.getObjectAlignment(FI) < 4)
1093           MFI.setObjectAlignment(FI, 4);
1094         Base = CurDAG->getTargetFrameIndex(
1095             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1096       }
1097       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1098       return true;
1099     }
1100   }
1101
1102   return false;
1103 }
1104
1105
1106 //===----------------------------------------------------------------------===//
1107 //                        Thumb 2 Addressing Modes
1108 //===----------------------------------------------------------------------===//
1109
1110
1111 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1112                                             SDValue &Base, SDValue &OffImm) {
1113   // Match simple R + imm12 operands.
1114
1115   // Base only.
1116   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1117       !CurDAG->isBaseWithConstantOffset(N)) {
1118     if (N.getOpcode() == ISD::FrameIndex) {
1119       // Match frame index.
1120       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1121       Base = CurDAG->getTargetFrameIndex(
1122           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1123       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1124       return true;
1125     }
1126
1127     if (N.getOpcode() == ARMISD::Wrapper &&
1128         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1129         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1130         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1131       Base = N.getOperand(0);
1132       if (Base.getOpcode() == ISD::TargetConstantPool)
1133         return false;  // We want to select t2LDRpci instead.
1134     } else
1135       Base = N;
1136     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1137     return true;
1138   }
1139
1140   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1141     if (SelectT2AddrModeImm8(N, Base, OffImm))
1142       // Let t2LDRi8 handle (R - imm8).
1143       return false;
1144
1145     int RHSC = (int)RHS->getZExtValue();
1146     if (N.getOpcode() == ISD::SUB)
1147       RHSC = -RHSC;
1148
1149     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1150       Base   = N.getOperand(0);
1151       if (Base.getOpcode() == ISD::FrameIndex) {
1152         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1153         Base = CurDAG->getTargetFrameIndex(
1154             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1155       }
1156       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1157       return true;
1158     }
1159   }
1160
1161   // Base only.
1162   Base = N;
1163   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1164   return true;
1165 }
1166
1167 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1168                                            SDValue &Base, SDValue &OffImm) {
1169   // Match simple R - imm8 operands.
1170   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1171       !CurDAG->isBaseWithConstantOffset(N))
1172     return false;
1173
1174   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1175     int RHSC = (int)RHS->getSExtValue();
1176     if (N.getOpcode() == ISD::SUB)
1177       RHSC = -RHSC;
1178
1179     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1180       Base = N.getOperand(0);
1181       if (Base.getOpcode() == ISD::FrameIndex) {
1182         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1183         Base = CurDAG->getTargetFrameIndex(
1184             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1185       }
1186       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1187       return true;
1188     }
1189   }
1190
1191   return false;
1192 }
1193
1194 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1195                                                  SDValue &OffImm){
1196   unsigned Opcode = Op->getOpcode();
1197   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1198     ? cast<LoadSDNode>(Op)->getAddressingMode()
1199     : cast<StoreSDNode>(Op)->getAddressingMode();
1200   int RHSC;
1201   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1202     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1203       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1204       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1205     return true;
1206   }
1207
1208   return false;
1209 }
1210
1211 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1212                                             SDValue &Base,
1213                                             SDValue &OffReg, SDValue &ShImm) {
1214   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1215   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1216     return false;
1217
1218   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1219   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1220     int RHSC = (int)RHS->getZExtValue();
1221     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1222       return false;
1223     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1224       return false;
1225   }
1226
1227   // Look for (R + R) or (R + (R << [1,2,3])).
1228   unsigned ShAmt = 0;
1229   Base   = N.getOperand(0);
1230   OffReg = N.getOperand(1);
1231
1232   // Swap if it is ((R << c) + R).
1233   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1234   if (ShOpcVal != ARM_AM::lsl) {
1235     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1236     if (ShOpcVal == ARM_AM::lsl)
1237       std::swap(Base, OffReg);
1238   }
1239
1240   if (ShOpcVal == ARM_AM::lsl) {
1241     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1242     // it.
1243     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1244       ShAmt = Sh->getZExtValue();
1245       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1246         OffReg = OffReg.getOperand(0);
1247       else {
1248         ShAmt = 0;
1249       }
1250     }
1251   }
1252
1253   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1254   // and use it in a shifted operand do so.
1255   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1256     unsigned PowerOfTwo = 0;
1257     SDValue NewMulConst;
1258     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1259       HandleSDNode Handle(OffReg);
1260       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1261       OffReg = Handle.getValue();
1262       ShAmt = PowerOfTwo;
1263     }
1264   }
1265
1266   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1267
1268   return true;
1269 }
1270
1271 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1272                                                 SDValue &OffImm) {
1273   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1274   // instructions.
1275   Base = N;
1276   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1277
1278   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1279     return true;
1280
1281   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1282   if (!RHS)
1283     return true;
1284
1285   uint32_t RHSC = (int)RHS->getZExtValue();
1286   if (RHSC > 1020 || RHSC % 4 != 0)
1287     return true;
1288
1289   Base = N.getOperand(0);
1290   if (Base.getOpcode() == ISD::FrameIndex) {
1291     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1292     Base = CurDAG->getTargetFrameIndex(
1293         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1294   }
1295
1296   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1297   return true;
1298 }
1299
1300 //===--------------------------------------------------------------------===//
1301
1302 /// getAL - Returns a ARMCC::AL immediate node.
1303 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1304   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1305 }
1306
1307 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1308   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1309   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1310   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1311 }
1312
1313 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1314   LoadSDNode *LD = cast<LoadSDNode>(N);
1315   ISD::MemIndexedMode AM = LD->getAddressingMode();
1316   if (AM == ISD::UNINDEXED)
1317     return false;
1318
1319   EVT LoadedVT = LD->getMemoryVT();
1320   SDValue Offset, AMOpc;
1321   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1322   unsigned Opcode = 0;
1323   bool Match = false;
1324   if (LoadedVT == MVT::i32 && isPre &&
1325       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1326     Opcode = ARM::LDR_PRE_IMM;
1327     Match = true;
1328   } else if (LoadedVT == MVT::i32 && !isPre &&
1329       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1330     Opcode = ARM::LDR_POST_IMM;
1331     Match = true;
1332   } else if (LoadedVT == MVT::i32 &&
1333       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1334     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1335     Match = true;
1336
1337   } else if (LoadedVT == MVT::i16 &&
1338              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1339     Match = true;
1340     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1341       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1342       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1343   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1344     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1345       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1346         Match = true;
1347         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1348       }
1349     } else {
1350       if (isPre &&
1351           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1352         Match = true;
1353         Opcode = ARM::LDRB_PRE_IMM;
1354       } else if (!isPre &&
1355                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1356         Match = true;
1357         Opcode = ARM::LDRB_POST_IMM;
1358       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1359         Match = true;
1360         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1361       }
1362     }
1363   }
1364
1365   if (Match) {
1366     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1367       SDValue Chain = LD->getChain();
1368       SDValue Base = LD->getBasePtr();
1369       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1370                        CurDAG->getRegister(0, MVT::i32), Chain };
1371       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1372                                            MVT::Other, Ops);
1373       transferMemOperands(N, New);
1374       ReplaceNode(N, New);
1375       return true;
1376     } else {
1377       SDValue Chain = LD->getChain();
1378       SDValue Base = LD->getBasePtr();
1379       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1380                        CurDAG->getRegister(0, MVT::i32), Chain };
1381       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1382                                            MVT::Other, Ops);
1383       transferMemOperands(N, New);
1384       ReplaceNode(N, New);
1385       return true;
1386     }
1387   }
1388
1389   return false;
1390 }
1391
1392 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1393   LoadSDNode *LD = cast<LoadSDNode>(N);
1394   EVT LoadedVT = LD->getMemoryVT();
1395   ISD::MemIndexedMode AM = LD->getAddressingMode();
1396   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1397       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1398     return false;
1399
1400   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1401   if (!COffs || COffs->getZExtValue() != 4)
1402     return false;
1403
1404   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1405   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1406   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1407   // ISel.
1408   SDValue Chain = LD->getChain();
1409   SDValue Base = LD->getBasePtr();
1410   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1411                    CurDAG->getRegister(0, MVT::i32), Chain };
1412   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1413                                        MVT::i32, MVT::Other, Ops);
1414   transferMemOperands(N, New);
1415   ReplaceNode(N, New);
1416   return true;
1417 }
1418
1419 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1420   LoadSDNode *LD = cast<LoadSDNode>(N);
1421   ISD::MemIndexedMode AM = LD->getAddressingMode();
1422   if (AM == ISD::UNINDEXED)
1423     return false;
1424
1425   EVT LoadedVT = LD->getMemoryVT();
1426   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1427   SDValue Offset;
1428   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1429   unsigned Opcode = 0;
1430   bool Match = false;
1431   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1432     switch (LoadedVT.getSimpleVT().SimpleTy) {
1433     case MVT::i32:
1434       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1435       break;
1436     case MVT::i16:
1437       if (isSExtLd)
1438         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1439       else
1440         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1441       break;
1442     case MVT::i8:
1443     case MVT::i1:
1444       if (isSExtLd)
1445         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1446       else
1447         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1448       break;
1449     default:
1450       return false;
1451     }
1452     Match = true;
1453   }
1454
1455   if (Match) {
1456     SDValue Chain = LD->getChain();
1457     SDValue Base = LD->getBasePtr();
1458     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1459                      CurDAG->getRegister(0, MVT::i32), Chain };
1460     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1461                                          MVT::Other, Ops);
1462     transferMemOperands(N, New);
1463     ReplaceNode(N, New);
1464     return true;
1465   }
1466
1467   return false;
1468 }
1469
1470 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1471 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1472   SDLoc dl(V0.getNode());
1473   SDValue RegClass =
1474     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1475   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1476   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1477   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1478   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1479 }
1480
1481 /// \brief Form a D register from a pair of S registers.
1482 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1483   SDLoc dl(V0.getNode());
1484   SDValue RegClass =
1485     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1486   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1487   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1488   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1489   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1490 }
1491
1492 /// \brief Form a quad register from a pair of D registers.
1493 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1494   SDLoc dl(V0.getNode());
1495   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1496                                                MVT::i32);
1497   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1498   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1499   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1500   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1501 }
1502
1503 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1504 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1505   SDLoc dl(V0.getNode());
1506   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1507                                                MVT::i32);
1508   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1509   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1510   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1511   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1512 }
1513
1514 /// \brief Form 4 consecutive S registers.
1515 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1516                                    SDValue V2, SDValue V3) {
1517   SDLoc dl(V0.getNode());
1518   SDValue RegClass =
1519     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1520   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1521   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1522   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1523   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1524   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1525                                     V2, SubReg2, V3, SubReg3 };
1526   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1527 }
1528
1529 /// \brief Form 4 consecutive D registers.
1530 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1531                                    SDValue V2, SDValue V3) {
1532   SDLoc dl(V0.getNode());
1533   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1534                                                MVT::i32);
1535   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1536   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1537   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1538   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1539   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1540                                     V2, SubReg2, V3, SubReg3 };
1541   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1542 }
1543
1544 /// \brief Form 4 consecutive Q registers.
1545 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1546                                    SDValue V2, SDValue V3) {
1547   SDLoc dl(V0.getNode());
1548   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1549                                                MVT::i32);
1550   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1551   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1552   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1553   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1554   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1555                                     V2, SubReg2, V3, SubReg3 };
1556   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1557 }
1558
1559 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1560 /// of a NEON VLD or VST instruction.  The supported values depend on the
1561 /// number of registers being loaded.
1562 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1563                                        unsigned NumVecs, bool is64BitVector) {
1564   unsigned NumRegs = NumVecs;
1565   if (!is64BitVector && NumVecs < 3)
1566     NumRegs *= 2;
1567
1568   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1569   if (Alignment >= 32 && NumRegs == 4)
1570     Alignment = 32;
1571   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1572     Alignment = 16;
1573   else if (Alignment >= 8)
1574     Alignment = 8;
1575   else
1576     Alignment = 0;
1577
1578   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1579 }
1580
1581 static bool isVLDfixed(unsigned Opc)
1582 {
1583   switch (Opc) {
1584   default: return false;
1585   case ARM::VLD1d8wb_fixed : return true;
1586   case ARM::VLD1d16wb_fixed : return true;
1587   case ARM::VLD1d64Qwb_fixed : return true;
1588   case ARM::VLD1d32wb_fixed : return true;
1589   case ARM::VLD1d64wb_fixed : return true;
1590   case ARM::VLD1d64TPseudoWB_fixed : return true;
1591   case ARM::VLD1d64QPseudoWB_fixed : return true;
1592   case ARM::VLD1q8wb_fixed : return true;
1593   case ARM::VLD1q16wb_fixed : return true;
1594   case ARM::VLD1q32wb_fixed : return true;
1595   case ARM::VLD1q64wb_fixed : return true;
1596   case ARM::VLD1DUPd8wb_fixed : return true;
1597   case ARM::VLD1DUPd16wb_fixed : return true;
1598   case ARM::VLD1DUPd32wb_fixed : return true;
1599   case ARM::VLD1DUPq8wb_fixed : return true;
1600   case ARM::VLD1DUPq16wb_fixed : return true;
1601   case ARM::VLD1DUPq32wb_fixed : return true;
1602   case ARM::VLD2d8wb_fixed : return true;
1603   case ARM::VLD2d16wb_fixed : return true;
1604   case ARM::VLD2d32wb_fixed : return true;
1605   case ARM::VLD2q8PseudoWB_fixed : return true;
1606   case ARM::VLD2q16PseudoWB_fixed : return true;
1607   case ARM::VLD2q32PseudoWB_fixed : return true;
1608   case ARM::VLD2DUPd8wb_fixed : return true;
1609   case ARM::VLD2DUPd16wb_fixed : return true;
1610   case ARM::VLD2DUPd32wb_fixed : return true;
1611   }
1612 }
1613
1614 static bool isVSTfixed(unsigned Opc)
1615 {
1616   switch (Opc) {
1617   default: return false;
1618   case ARM::VST1d8wb_fixed : return true;
1619   case ARM::VST1d16wb_fixed : return true;
1620   case ARM::VST1d32wb_fixed : return true;
1621   case ARM::VST1d64wb_fixed : return true;
1622   case ARM::VST1q8wb_fixed : return true;
1623   case ARM::VST1q16wb_fixed : return true;
1624   case ARM::VST1q32wb_fixed : return true;
1625   case ARM::VST1q64wb_fixed : return true;
1626   case ARM::VST1d64TPseudoWB_fixed : return true;
1627   case ARM::VST1d64QPseudoWB_fixed : return true;
1628   case ARM::VST2d8wb_fixed : return true;
1629   case ARM::VST2d16wb_fixed : return true;
1630   case ARM::VST2d32wb_fixed : return true;
1631   case ARM::VST2q8PseudoWB_fixed : return true;
1632   case ARM::VST2q16PseudoWB_fixed : return true;
1633   case ARM::VST2q32PseudoWB_fixed : return true;
1634   }
1635 }
1636
1637 // Get the register stride update opcode of a VLD/VST instruction that
1638 // is otherwise equivalent to the given fixed stride updating instruction.
1639 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1640   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1641     && "Incorrect fixed stride updating instruction.");
1642   switch (Opc) {
1643   default: break;
1644   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1645   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1646   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1647   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1648   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1649   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1650   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1651   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1652   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1653   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1654   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1655   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1656   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1657   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1658   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1659   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1660   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1661   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1662
1663   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1664   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1665   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1666   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1667   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1668   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1669   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1670   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1671   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1672   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1673
1674   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1675   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1676   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1677   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1678   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1679   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1680
1681   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1682   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1683   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1684   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1685   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1686   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1687
1688   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1689   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1690   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1691   }
1692   return Opc; // If not one we handle, return it unchanged.
1693 }
1694
1695 /// Returns true if the given increment is a Constant known to be equal to the
1696 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1697 /// be used.
1698 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1699   auto C = dyn_cast<ConstantSDNode>(Inc);
1700   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1701 }
1702
1703 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1704                                 const uint16_t *DOpcodes,
1705                                 const uint16_t *QOpcodes0,
1706                                 const uint16_t *QOpcodes1) {
1707   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1708   SDLoc dl(N);
1709
1710   SDValue MemAddr, Align;
1711   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1712   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1713     return;
1714
1715   SDValue Chain = N->getOperand(0);
1716   EVT VT = N->getValueType(0);
1717   bool is64BitVector = VT.is64BitVector();
1718   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1719
1720   unsigned OpcodeIndex;
1721   switch (VT.getSimpleVT().SimpleTy) {
1722   default: llvm_unreachable("unhandled vld type");
1723     // Double-register operations:
1724   case MVT::v8i8:  OpcodeIndex = 0; break;
1725   case MVT::v4i16: OpcodeIndex = 1; break;
1726   case MVT::v2f32:
1727   case MVT::v2i32: OpcodeIndex = 2; break;
1728   case MVT::v1i64: OpcodeIndex = 3; break;
1729     // Quad-register operations:
1730   case MVT::v16i8: OpcodeIndex = 0; break;
1731   case MVT::v8i16: OpcodeIndex = 1; break;
1732   case MVT::v4f32:
1733   case MVT::v4i32: OpcodeIndex = 2; break;
1734   case MVT::v2f64:
1735   case MVT::v2i64: OpcodeIndex = 3;
1736     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1737     break;
1738   }
1739
1740   EVT ResTy;
1741   if (NumVecs == 1)
1742     ResTy = VT;
1743   else {
1744     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1745     if (!is64BitVector)
1746       ResTyElts *= 2;
1747     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1748   }
1749   std::vector<EVT> ResTys;
1750   ResTys.push_back(ResTy);
1751   if (isUpdating)
1752     ResTys.push_back(MVT::i32);
1753   ResTys.push_back(MVT::Other);
1754
1755   SDValue Pred = getAL(CurDAG, dl);
1756   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1757   SDNode *VLd;
1758   SmallVector<SDValue, 7> Ops;
1759
1760   // Double registers and VLD1/VLD2 quad registers are directly supported.
1761   if (is64BitVector || NumVecs <= 2) {
1762     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1763                     QOpcodes0[OpcodeIndex]);
1764     Ops.push_back(MemAddr);
1765     Ops.push_back(Align);
1766     if (isUpdating) {
1767       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1768       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1769       // case entirely when the rest are updated to that form, too.
1770       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1771       if ((NumVecs <= 2) && !IsImmUpdate)
1772         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1773       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1774       // check for that explicitly too. Horribly hacky, but temporary.
1775       if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
1776         Ops.push_back(IsImmUpdate ? Reg0 : Inc);
1777     }
1778     Ops.push_back(Pred);
1779     Ops.push_back(Reg0);
1780     Ops.push_back(Chain);
1781     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1782
1783   } else {
1784     // Otherwise, quad registers are loaded with two separate instructions,
1785     // where one loads the even registers and the other loads the odd registers.
1786     EVT AddrTy = MemAddr.getValueType();
1787
1788     // Load the even subregs.  This is always an updating load, so that it
1789     // provides the address to the second load for the odd subregs.
1790     SDValue ImplDef =
1791       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1792     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1793     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1794                                           ResTy, AddrTy, MVT::Other, OpsA);
1795     Chain = SDValue(VLdA, 2);
1796
1797     // Load the odd subregs.
1798     Ops.push_back(SDValue(VLdA, 1));
1799     Ops.push_back(Align);
1800     if (isUpdating) {
1801       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1802       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1803              "only constant post-increment update allowed for VLD3/4");
1804       (void)Inc;
1805       Ops.push_back(Reg0);
1806     }
1807     Ops.push_back(SDValue(VLdA, 0));
1808     Ops.push_back(Pred);
1809     Ops.push_back(Reg0);
1810     Ops.push_back(Chain);
1811     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1812   }
1813
1814   // Transfer memoperands.
1815   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1816   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1817   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1818
1819   if (NumVecs == 1) {
1820     ReplaceNode(N, VLd);
1821     return;
1822   }
1823
1824   // Extract out the subregisters.
1825   SDValue SuperReg = SDValue(VLd, 0);
1826   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1827                     ARM::qsub_3 == ARM::qsub_0 + 3,
1828                 "Unexpected subreg numbering");
1829   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1830   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1831     ReplaceUses(SDValue(N, Vec),
1832                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1833   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1834   if (isUpdating)
1835     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1836   CurDAG->RemoveDeadNode(N);
1837 }
1838
1839 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1840                                 const uint16_t *DOpcodes,
1841                                 const uint16_t *QOpcodes0,
1842                                 const uint16_t *QOpcodes1) {
1843   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1844   SDLoc dl(N);
1845
1846   SDValue MemAddr, Align;
1847   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1848   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1849   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1850     return;
1851
1852   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1853   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1854
1855   SDValue Chain = N->getOperand(0);
1856   EVT VT = N->getOperand(Vec0Idx).getValueType();
1857   bool is64BitVector = VT.is64BitVector();
1858   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1859
1860   unsigned OpcodeIndex;
1861   switch (VT.getSimpleVT().SimpleTy) {
1862   default: llvm_unreachable("unhandled vst type");
1863     // Double-register operations:
1864   case MVT::v8i8:  OpcodeIndex = 0; break;
1865   case MVT::v4i16: OpcodeIndex = 1; break;
1866   case MVT::v2f32:
1867   case MVT::v2i32: OpcodeIndex = 2; break;
1868   case MVT::v1i64: OpcodeIndex = 3; break;
1869     // Quad-register operations:
1870   case MVT::v16i8: OpcodeIndex = 0; break;
1871   case MVT::v8i16: OpcodeIndex = 1; break;
1872   case MVT::v4f32:
1873   case MVT::v4i32: OpcodeIndex = 2; break;
1874   case MVT::v2f64:
1875   case MVT::v2i64: OpcodeIndex = 3;
1876     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1877     break;
1878   }
1879
1880   std::vector<EVT> ResTys;
1881   if (isUpdating)
1882     ResTys.push_back(MVT::i32);
1883   ResTys.push_back(MVT::Other);
1884
1885   SDValue Pred = getAL(CurDAG, dl);
1886   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1887   SmallVector<SDValue, 7> Ops;
1888
1889   // Double registers and VST1/VST2 quad registers are directly supported.
1890   if (is64BitVector || NumVecs <= 2) {
1891     SDValue SrcReg;
1892     if (NumVecs == 1) {
1893       SrcReg = N->getOperand(Vec0Idx);
1894     } else if (is64BitVector) {
1895       // Form a REG_SEQUENCE to force register allocation.
1896       SDValue V0 = N->getOperand(Vec0Idx + 0);
1897       SDValue V1 = N->getOperand(Vec0Idx + 1);
1898       if (NumVecs == 2)
1899         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1900       else {
1901         SDValue V2 = N->getOperand(Vec0Idx + 2);
1902         // If it's a vst3, form a quad D-register and leave the last part as
1903         // an undef.
1904         SDValue V3 = (NumVecs == 3)
1905           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1906           : N->getOperand(Vec0Idx + 3);
1907         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1908       }
1909     } else {
1910       // Form a QQ register.
1911       SDValue Q0 = N->getOperand(Vec0Idx);
1912       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1913       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1914     }
1915
1916     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1917                     QOpcodes0[OpcodeIndex]);
1918     Ops.push_back(MemAddr);
1919     Ops.push_back(Align);
1920     if (isUpdating) {
1921       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1922       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
1923       // case entirely when the rest are updated to that form, too.
1924       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1925       if (NumVecs <= 2 && !IsImmUpdate)
1926         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1927       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
1928       // check for that explicitly too. Horribly hacky, but temporary.
1929       if  (!IsImmUpdate)
1930         Ops.push_back(Inc);
1931       else if (NumVecs > 2 && !isVSTfixed(Opc))
1932         Ops.push_back(Reg0);
1933     }
1934     Ops.push_back(SrcReg);
1935     Ops.push_back(Pred);
1936     Ops.push_back(Reg0);
1937     Ops.push_back(Chain);
1938     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1939
1940     // Transfer memoperands.
1941     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
1942
1943     ReplaceNode(N, VSt);
1944     return;
1945   }
1946
1947   // Otherwise, quad registers are stored with two separate instructions,
1948   // where one stores the even registers and the other stores the odd registers.
1949
1950   // Form the QQQQ REG_SEQUENCE.
1951   SDValue V0 = N->getOperand(Vec0Idx + 0);
1952   SDValue V1 = N->getOperand(Vec0Idx + 1);
1953   SDValue V2 = N->getOperand(Vec0Idx + 2);
1954   SDValue V3 = (NumVecs == 3)
1955     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1956     : N->getOperand(Vec0Idx + 3);
1957   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
1958
1959   // Store the even D registers.  This is always an updating store, so that it
1960   // provides the address to the second store for the odd subregs.
1961   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
1962   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1963                                         MemAddr.getValueType(),
1964                                         MVT::Other, OpsA);
1965   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
1966   Chain = SDValue(VStA, 1);
1967
1968   // Store the odd D registers.
1969   Ops.push_back(SDValue(VStA, 0));
1970   Ops.push_back(Align);
1971   if (isUpdating) {
1972     SDValue Inc = N->getOperand(AddrOpIdx + 1);
1973     assert(isa<ConstantSDNode>(Inc.getNode()) &&
1974            "only constant post-increment update allowed for VST3/4");
1975     (void)Inc;
1976     Ops.push_back(Reg0);
1977   }
1978   Ops.push_back(RegSeq);
1979   Ops.push_back(Pred);
1980   Ops.push_back(Reg0);
1981   Ops.push_back(Chain);
1982   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
1983                                         Ops);
1984   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
1985   ReplaceNode(N, VStB);
1986 }
1987
1988 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
1989                                       unsigned NumVecs,
1990                                       const uint16_t *DOpcodes,
1991                                       const uint16_t *QOpcodes) {
1992   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
1993   SDLoc dl(N);
1994
1995   SDValue MemAddr, Align;
1996   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1997   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1998   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1999     return;
2000
2001   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2002   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2003
2004   SDValue Chain = N->getOperand(0);
2005   unsigned Lane =
2006     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2007   EVT VT = N->getOperand(Vec0Idx).getValueType();
2008   bool is64BitVector = VT.is64BitVector();
2009
2010   unsigned Alignment = 0;
2011   if (NumVecs != 3) {
2012     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2013     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2014     if (Alignment > NumBytes)
2015       Alignment = NumBytes;
2016     if (Alignment < 8 && Alignment < NumBytes)
2017       Alignment = 0;
2018     // Alignment must be a power of two; make sure of that.
2019     Alignment = (Alignment & -Alignment);
2020     if (Alignment == 1)
2021       Alignment = 0;
2022   }
2023   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2024
2025   unsigned OpcodeIndex;
2026   switch (VT.getSimpleVT().SimpleTy) {
2027   default: llvm_unreachable("unhandled vld/vst lane type");
2028     // Double-register operations:
2029   case MVT::v8i8:  OpcodeIndex = 0; break;
2030   case MVT::v4i16: OpcodeIndex = 1; break;
2031   case MVT::v2f32:
2032   case MVT::v2i32: OpcodeIndex = 2; break;
2033     // Quad-register operations:
2034   case MVT::v8i16: OpcodeIndex = 0; break;
2035   case MVT::v4f32:
2036   case MVT::v4i32: OpcodeIndex = 1; break;
2037   }
2038
2039   std::vector<EVT> ResTys;
2040   if (IsLoad) {
2041     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2042     if (!is64BitVector)
2043       ResTyElts *= 2;
2044     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2045                                       MVT::i64, ResTyElts));
2046   }
2047   if (isUpdating)
2048     ResTys.push_back(MVT::i32);
2049   ResTys.push_back(MVT::Other);
2050
2051   SDValue Pred = getAL(CurDAG, dl);
2052   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2053
2054   SmallVector<SDValue, 8> Ops;
2055   Ops.push_back(MemAddr);
2056   Ops.push_back(Align);
2057   if (isUpdating) {
2058     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2059     bool IsImmUpdate =
2060         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2061     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2062   }
2063
2064   SDValue SuperReg;
2065   SDValue V0 = N->getOperand(Vec0Idx + 0);
2066   SDValue V1 = N->getOperand(Vec0Idx + 1);
2067   if (NumVecs == 2) {
2068     if (is64BitVector)
2069       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2070     else
2071       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2072   } else {
2073     SDValue V2 = N->getOperand(Vec0Idx + 2);
2074     SDValue V3 = (NumVecs == 3)
2075       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2076       : N->getOperand(Vec0Idx + 3);
2077     if (is64BitVector)
2078       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2079     else
2080       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2081   }
2082   Ops.push_back(SuperReg);
2083   Ops.push_back(getI32Imm(Lane, dl));
2084   Ops.push_back(Pred);
2085   Ops.push_back(Reg0);
2086   Ops.push_back(Chain);
2087
2088   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2089                                   QOpcodes[OpcodeIndex]);
2090   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2091   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2092   if (!IsLoad) {
2093     ReplaceNode(N, VLdLn);
2094     return;
2095   }
2096
2097   // Extract the subregisters.
2098   SuperReg = SDValue(VLdLn, 0);
2099   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2100                     ARM::qsub_3 == ARM::qsub_0 + 3,
2101                 "Unexpected subreg numbering");
2102   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2103   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2104     ReplaceUses(SDValue(N, Vec),
2105                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2106   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2107   if (isUpdating)
2108     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2109   CurDAG->RemoveDeadNode(N);
2110 }
2111
2112 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2113                                    const uint16_t *DOpcodes,
2114                                    const uint16_t *QOpcodes) {
2115   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2116   SDLoc dl(N);
2117
2118   SDValue MemAddr, Align;
2119   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2120     return;
2121
2122   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2123   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2124
2125   SDValue Chain = N->getOperand(0);
2126   EVT VT = N->getValueType(0);
2127
2128   unsigned Alignment = 0;
2129   if (NumVecs != 3) {
2130     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2131     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2132     if (Alignment > NumBytes)
2133       Alignment = NumBytes;
2134     if (Alignment < 8 && Alignment < NumBytes)
2135       Alignment = 0;
2136     // Alignment must be a power of two; make sure of that.
2137     Alignment = (Alignment & -Alignment);
2138     if (Alignment == 1)
2139       Alignment = 0;
2140   }
2141   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2142
2143   unsigned Opc;
2144   switch (VT.getSimpleVT().SimpleTy) {
2145   default: llvm_unreachable("unhandled vld-dup type");
2146   case MVT::v8i8:  Opc = DOpcodes[0]; break;
2147   case MVT::v16i8: Opc = QOpcodes[0]; break;
2148   case MVT::v4i16: Opc = DOpcodes[1]; break;
2149   case MVT::v8i16: Opc = QOpcodes[1]; break;
2150   case MVT::v2f32:
2151   case MVT::v2i32: Opc = DOpcodes[2]; break;
2152   case MVT::v4f32:
2153   case MVT::v4i32: Opc = QOpcodes[2]; break;
2154   }
2155
2156   SDValue Pred = getAL(CurDAG, dl);
2157   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2158   SmallVector<SDValue, 6> Ops;
2159   Ops.push_back(MemAddr);
2160   Ops.push_back(Align);
2161   if (isUpdating) {
2162     // fixed-stride update instructions don't have an explicit writeback
2163     // operand. It's implicit in the opcode itself.
2164     SDValue Inc = N->getOperand(2);
2165     bool IsImmUpdate =
2166         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2167     if (NumVecs <= 2 && !IsImmUpdate)
2168       Opc = getVLDSTRegisterUpdateOpcode(Opc);
2169     if (!IsImmUpdate)
2170       Ops.push_back(Inc);
2171     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2172     else if (NumVecs > 2)
2173       Ops.push_back(Reg0);
2174   }
2175   Ops.push_back(Pred);
2176   Ops.push_back(Reg0);
2177   Ops.push_back(Chain);
2178
2179   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2180   std::vector<EVT> ResTys;
2181   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2182   if (isUpdating)
2183     ResTys.push_back(MVT::i32);
2184   ResTys.push_back(MVT::Other);
2185   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2186   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2187
2188   // Extract the subregisters.
2189   if (NumVecs == 1) {
2190     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2191   } else {
2192     SDValue SuperReg = SDValue(VLdDup, 0);
2193     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2194     unsigned SubIdx = ARM::dsub_0;
2195     for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2196       ReplaceUses(SDValue(N, Vec),
2197                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2198   }
2199   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2200   if (isUpdating)
2201     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2202   CurDAG->RemoveDeadNode(N);
2203 }
2204
2205 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2206   if (!Subtarget->hasV6T2Ops())
2207     return false;
2208
2209   unsigned Opc = isSigned
2210     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2211     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2212   SDLoc dl(N);
2213
2214   // For unsigned extracts, check for a shift right and mask
2215   unsigned And_imm = 0;
2216   if (N->getOpcode() == ISD::AND) {
2217     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2218
2219       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2220       if (And_imm & (And_imm + 1))
2221         return false;
2222
2223       unsigned Srl_imm = 0;
2224       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2225                                 Srl_imm)) {
2226         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2227
2228         // Note: The width operand is encoded as width-1.
2229         unsigned Width = countTrailingOnes(And_imm) - 1;
2230         unsigned LSB = Srl_imm;
2231
2232         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2233
2234         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2235           // It's cheaper to use a right shift to extract the top bits.
2236           if (Subtarget->isThumb()) {
2237             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2238             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2239                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2240                               getAL(CurDAG, dl), Reg0, Reg0 };
2241             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2242             return true;
2243           }
2244
2245           // ARM models shift instructions as MOVsi with shifter operand.
2246           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2247           SDValue ShOpc =
2248             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2249                                       MVT::i32);
2250           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2251                             getAL(CurDAG, dl), Reg0, Reg0 };
2252           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2253           return true;
2254         }
2255
2256         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2257                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2258                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2259                           getAL(CurDAG, dl), Reg0 };
2260         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2261         return true;
2262       }
2263     }
2264     return false;
2265   }
2266
2267   // Otherwise, we're looking for a shift of a shift
2268   unsigned Shl_imm = 0;
2269   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2270     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2271     unsigned Srl_imm = 0;
2272     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2273       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2274       // Note: The width operand is encoded as width-1.
2275       unsigned Width = 32 - Srl_imm - 1;
2276       int LSB = Srl_imm - Shl_imm;
2277       if (LSB < 0)
2278         return false;
2279       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2280       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2281                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2282                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2283                         getAL(CurDAG, dl), Reg0 };
2284       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2285       return true;
2286     }
2287   }
2288
2289   // Or we are looking for a shift of an and, with a mask operand
2290   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2291       isShiftedMask_32(And_imm)) {
2292     unsigned Srl_imm = 0;
2293     unsigned LSB = countTrailingZeros(And_imm);
2294     // Shift must be the same as the ands lsb
2295     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2296       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2297       unsigned MSB = 31 - countLeadingZeros(And_imm);
2298       // Note: The width operand is encoded as width-1.
2299       unsigned Width = MSB - LSB;
2300       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2301       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2302                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2303                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2304                         getAL(CurDAG, dl), Reg0 };
2305       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2306       return true;
2307     }
2308   }
2309
2310   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2311     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2312     unsigned LSB = 0;
2313     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2314         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2315       return false;
2316
2317     if (LSB + Width > 32)
2318       return false;
2319
2320     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2321     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2322                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2323                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2324                       getAL(CurDAG, dl), Reg0 };
2325     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2326     return true;
2327   }
2328
2329   return false;
2330 }
2331
2332 /// Target-specific DAG combining for ISD::XOR.
2333 /// Target-independent combining lowers SELECT_CC nodes of the form
2334 /// select_cc setg[ge] X,  0,  X, -X
2335 /// select_cc setgt    X, -1,  X, -X
2336 /// select_cc setl[te] X,  0, -X,  X
2337 /// select_cc setlt    X,  1, -X,  X
2338 /// which represent Integer ABS into:
2339 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2340 /// ARM instruction selection detects the latter and matches it to
2341 /// ARM::ABS or ARM::t2ABS machine node.
2342 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2343   SDValue XORSrc0 = N->getOperand(0);
2344   SDValue XORSrc1 = N->getOperand(1);
2345   EVT VT = N->getValueType(0);
2346
2347   if (Subtarget->isThumb1Only())
2348     return false;
2349
2350   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2351     return false;
2352
2353   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2354   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2355   SDValue SRASrc0 = XORSrc1.getOperand(0);
2356   SDValue SRASrc1 = XORSrc1.getOperand(1);
2357   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2358   EVT XType = SRASrc0.getValueType();
2359   unsigned Size = XType.getSizeInBits() - 1;
2360
2361   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2362       XType.isInteger() && SRAConstant != nullptr &&
2363       Size == SRAConstant->getZExtValue()) {
2364     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2365     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2366     return true;
2367   }
2368
2369   return false;
2370 }
2371
2372 /// We've got special pseudo-instructions for these
2373 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2374   unsigned Opcode;
2375   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2376   if (MemTy == MVT::i8)
2377     Opcode = ARM::CMP_SWAP_8;
2378   else if (MemTy == MVT::i16)
2379     Opcode = ARM::CMP_SWAP_16;
2380   else if (MemTy == MVT::i32)
2381     Opcode = ARM::CMP_SWAP_32;
2382   else
2383     llvm_unreachable("Unknown AtomicCmpSwap type");
2384
2385   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2386                    N->getOperand(0)};
2387   SDNode *CmpSwap = CurDAG->getMachineNode(
2388       Opcode, SDLoc(N),
2389       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2390
2391   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2392   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2393   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2394
2395   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2396   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2397   CurDAG->RemoveDeadNode(N);
2398 }
2399
2400 static Optional<std::pair<unsigned, unsigned>>
2401 getContiguousRangeOfSetBits(const APInt &A) {
2402   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2403   unsigned LastOne = A.countTrailingZeros();
2404   if (A.countPopulation() != (FirstOne - LastOne + 1))
2405     return Optional<std::pair<unsigned,unsigned>>();
2406   return std::make_pair(FirstOne, LastOne);
2407 }
2408
2409 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2410   assert(N->getOpcode() == ARMISD::CMPZ);
2411   SwitchEQNEToPLMI = false;
2412
2413   if (!Subtarget->isThumb())
2414     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2415     // LSR don't exist as standalone instructions - they need the barrel shifter.
2416     return;
2417
2418   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2419   SDValue And = N->getOperand(0);
2420   if (!And->hasOneUse())
2421     return;
2422
2423   SDValue Zero = N->getOperand(1);
2424   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2425       And->getOpcode() != ISD::AND)
2426     return;
2427   SDValue X = And.getOperand(0);
2428   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2429
2430   if (!C || !X->hasOneUse())
2431     return;
2432   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2433   if (!Range)
2434     return;
2435
2436   // There are several ways to lower this:
2437   SDNode *NewN;
2438   SDLoc dl(N);
2439
2440   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2441     if (Subtarget->isThumb2()) {
2442       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2443       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2444                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2445                         CurDAG->getRegister(0, MVT::i32) };
2446       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2447     } else {
2448       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2449                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2450                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2451       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2452     }
2453   };
2454
2455   if (Range->second == 0) {
2456     //  1. Mask includes the LSB -> Simply shift the top N bits off
2457     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2458     ReplaceNode(And.getNode(), NewN);
2459   } else if (Range->first == 31) {
2460     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2461     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2462     ReplaceNode(And.getNode(), NewN);
2463   } else if (Range->first == Range->second) {
2464     //  3. Only one bit is set. We can shift this into the sign bit and use a
2465     //     PL/MI comparison.
2466     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2467     ReplaceNode(And.getNode(), NewN);
2468
2469     SwitchEQNEToPLMI = true;
2470   } else if (!Subtarget->hasV6T2Ops()) {
2471     //  4. Do a double shift to clear bottom and top bits, but only in
2472     //     thumb-1 mode as in thumb-2 we can use UBFX.
2473     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2474     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2475                      Range->second + (31 - Range->first));
2476     ReplaceNode(And.getNode(), NewN);
2477   }
2478
2479 }
2480
2481 void ARMDAGToDAGISel::Select(SDNode *N) {
2482   SDLoc dl(N);
2483
2484   if (N->isMachineOpcode()) {
2485     N->setNodeId(-1);
2486     return;   // Already selected.
2487   }
2488
2489   switch (N->getOpcode()) {
2490   default: break;
2491   case ISD::WRITE_REGISTER:
2492     if (tryWriteRegister(N))
2493       return;
2494     break;
2495   case ISD::READ_REGISTER:
2496     if (tryReadRegister(N))
2497       return;
2498     break;
2499   case ISD::INLINEASM:
2500     if (tryInlineAsm(N))
2501       return;
2502     break;
2503   case ISD::XOR:
2504     // Select special operations if XOR node forms integer ABS pattern
2505     if (tryABSOp(N))
2506       return;
2507     // Other cases are autogenerated.
2508     break;
2509   case ISD::Constant: {
2510     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2511     // If we can't materialize the constant we need to use a literal pool
2512     if (ConstantMaterializationCost(Val) > 2) {
2513       SDValue CPIdx = CurDAG->getTargetConstantPool(
2514           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2515           TLI->getPointerTy(CurDAG->getDataLayout()));
2516
2517       SDNode *ResNode;
2518       if (Subtarget->isThumb()) {
2519         SDValue Ops[] = {
2520           CPIdx,
2521           getAL(CurDAG, dl),
2522           CurDAG->getRegister(0, MVT::i32),
2523           CurDAG->getEntryNode()
2524         };
2525         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2526                                          Ops);
2527       } else {
2528         SDValue Ops[] = {
2529           CPIdx,
2530           CurDAG->getTargetConstant(0, dl, MVT::i32),
2531           getAL(CurDAG, dl),
2532           CurDAG->getRegister(0, MVT::i32),
2533           CurDAG->getEntryNode()
2534         };
2535         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2536                                          Ops);
2537       }
2538       // Annotate the Node with memory operand information so that MachineInstr
2539       // queries work properly. This e.g. gives the register allocation the
2540       // required information for rematerialization.
2541       MachineFunction& MF = CurDAG->getMachineFunction();
2542       MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
2543       MemOp[0] = MF.getMachineMemOperand(
2544           MachinePointerInfo::getConstantPool(MF),
2545           MachineMemOperand::MOLoad, 4, 4);
2546
2547       cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
2548
2549       ReplaceNode(N, ResNode);
2550       return;
2551     }
2552
2553     // Other cases are autogenerated.
2554     break;
2555   }
2556   case ISD::FrameIndex: {
2557     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2558     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2559     SDValue TFI = CurDAG->getTargetFrameIndex(
2560         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2561     if (Subtarget->isThumb1Only()) {
2562       // Set the alignment of the frame object to 4, to avoid having to generate
2563       // more than one ADD
2564       MachineFrameInfo &MFI = MF->getFrameInfo();
2565       if (MFI.getObjectAlignment(FI) < 4)
2566         MFI.setObjectAlignment(FI, 4);
2567       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2568                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2569       return;
2570     } else {
2571       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2572                       ARM::t2ADDri : ARM::ADDri);
2573       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2574                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2575                         CurDAG->getRegister(0, MVT::i32) };
2576       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2577       return;
2578     }
2579   }
2580   case ISD::SRL:
2581     if (tryV6T2BitfieldExtractOp(N, false))
2582       return;
2583     break;
2584   case ISD::SIGN_EXTEND_INREG:
2585   case ISD::SRA:
2586     if (tryV6T2BitfieldExtractOp(N, true))
2587       return;
2588     break;
2589   case ISD::MUL:
2590     if (Subtarget->isThumb1Only())
2591       break;
2592     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2593       unsigned RHSV = C->getZExtValue();
2594       if (!RHSV) break;
2595       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2596         unsigned ShImm = Log2_32(RHSV-1);
2597         if (ShImm >= 32)
2598           break;
2599         SDValue V = N->getOperand(0);
2600         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2601         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2602         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2603         if (Subtarget->isThumb()) {
2604           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2605           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2606           return;
2607         } else {
2608           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2609                             Reg0 };
2610           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2611           return;
2612         }
2613       }
2614       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2615         unsigned ShImm = Log2_32(RHSV+1);
2616         if (ShImm >= 32)
2617           break;
2618         SDValue V = N->getOperand(0);
2619         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2620         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2621         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2622         if (Subtarget->isThumb()) {
2623           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2624           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2625           return;
2626         } else {
2627           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2628                             Reg0 };
2629           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2630           return;
2631         }
2632       }
2633     }
2634     break;
2635   case ISD::AND: {
2636     // Check for unsigned bitfield extract
2637     if (tryV6T2BitfieldExtractOp(N, false))
2638       return;
2639
2640     // If an immediate is used in an AND node, it is possible that the immediate
2641     // can be more optimally materialized when negated. If this is the case we
2642     // can negate the immediate and use a BIC instead.
2643     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2644     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2645       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2646
2647       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2648       // immediate can be negated and fit in the immediate operand of
2649       // a t2BIC, don't do any manual transform here as this can be
2650       // handled by the generic ISel machinery.
2651       bool PreferImmediateEncoding =
2652         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2653       if (!PreferImmediateEncoding &&
2654           ConstantMaterializationCost(Imm) >
2655               ConstantMaterializationCost(~Imm)) {
2656         // The current immediate costs more to materialize than a negated
2657         // immediate, so negate the immediate and use a BIC.
2658         SDValue NewImm =
2659           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2660         // If the new constant didn't exist before, reposition it in the topological
2661         // ordering so it is just before N. Otherwise, don't touch its location.
2662         if (NewImm->getNodeId() == -1)
2663           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2664
2665         if (!Subtarget->hasThumb2()) {
2666           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2667                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2668                            CurDAG->getRegister(0, MVT::i32)};
2669           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2670           return;
2671         } else {
2672           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2673                            CurDAG->getRegister(0, MVT::i32),
2674                            CurDAG->getRegister(0, MVT::i32)};
2675           ReplaceNode(N,
2676                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2677           return;
2678         }
2679       }
2680     }
2681
2682     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2683     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2684     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2685     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2686     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2687     EVT VT = N->getValueType(0);
2688     if (VT != MVT::i32)
2689       break;
2690     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2691       ? ARM::t2MOVTi16
2692       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2693     if (!Opc)
2694       break;
2695     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2696     N1C = dyn_cast<ConstantSDNode>(N1);
2697     if (!N1C)
2698       break;
2699     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2700       SDValue N2 = N0.getOperand(1);
2701       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2702       if (!N2C)
2703         break;
2704       unsigned N1CVal = N1C->getZExtValue();
2705       unsigned N2CVal = N2C->getZExtValue();
2706       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2707           (N1CVal & 0xffffU) == 0xffffU &&
2708           (N2CVal & 0xffffU) == 0x0U) {
2709         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2710                                                   dl, MVT::i32);
2711         SDValue Ops[] = { N0.getOperand(0), Imm16,
2712                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2713         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2714         return;
2715       }
2716     }
2717
2718     break;
2719   }
2720   case ARMISD::UMAAL: {
2721     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2722     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2723                       N->getOperand(2), N->getOperand(3),
2724                       getAL(CurDAG, dl),
2725                       CurDAG->getRegister(0, MVT::i32) };
2726     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2727     return;
2728   }
2729   case ARMISD::UMLAL:{
2730     if (Subtarget->isThumb()) {
2731       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2732                         N->getOperand(3), getAL(CurDAG, dl),
2733                         CurDAG->getRegister(0, MVT::i32)};
2734       ReplaceNode(
2735           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2736       return;
2737     }else{
2738       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2739                         N->getOperand(3), getAL(CurDAG, dl),
2740                         CurDAG->getRegister(0, MVT::i32),
2741                         CurDAG->getRegister(0, MVT::i32) };
2742       ReplaceNode(N, CurDAG->getMachineNode(
2743                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2744                          MVT::i32, MVT::i32, Ops));
2745       return;
2746     }
2747   }
2748   case ARMISD::SMLAL:{
2749     if (Subtarget->isThumb()) {
2750       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2751                         N->getOperand(3), getAL(CurDAG, dl),
2752                         CurDAG->getRegister(0, MVT::i32)};
2753       ReplaceNode(
2754           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2755       return;
2756     }else{
2757       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2758                         N->getOperand(3), getAL(CurDAG, dl),
2759                         CurDAG->getRegister(0, MVT::i32),
2760                         CurDAG->getRegister(0, MVT::i32) };
2761       ReplaceNode(N, CurDAG->getMachineNode(
2762                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2763                          MVT::i32, MVT::i32, Ops));
2764       return;
2765     }
2766   }
2767   case ARMISD::SUBE: {
2768     if (!Subtarget->hasV6Ops())
2769       break;
2770     // Look for a pattern to match SMMLS
2771     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2772     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2773         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2774         !SDValue(N, 1).use_empty())
2775       break;
2776
2777     if (Subtarget->isThumb())
2778       assert(Subtarget->hasThumb2() &&
2779              "This pattern should not be generated for Thumb");
2780
2781     SDValue SmulLoHi = N->getOperand(1);
2782     SDValue Subc = N->getOperand(2);
2783     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2784
2785     if (!Zero || Zero->getZExtValue() != 0 ||
2786         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2787         N->getOperand(1) != SmulLoHi.getValue(1) ||
2788         N->getOperand(2) != Subc.getValue(1))
2789       break;
2790
2791     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2792     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2793                       N->getOperand(0), getAL(CurDAG, dl),
2794                       CurDAG->getRegister(0, MVT::i32) };
2795     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2796     return;
2797   }
2798   case ISD::LOAD: {
2799     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2800       if (tryT2IndexedLoad(N))
2801         return;
2802     } else if (Subtarget->isThumb()) {
2803       if (tryT1IndexedLoad(N))
2804         return;
2805     } else if (tryARMIndexedLoad(N))
2806       return;
2807     // Other cases are autogenerated.
2808     break;
2809   }
2810   case ARMISD::BRCOND: {
2811     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2812     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2813     // Pattern complexity = 6  cost = 1  size = 0
2814
2815     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2816     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2817     // Pattern complexity = 6  cost = 1  size = 0
2818
2819     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2820     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2821     // Pattern complexity = 6  cost = 1  size = 0
2822
2823     unsigned Opc = Subtarget->isThumb() ?
2824       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2825     SDValue Chain = N->getOperand(0);
2826     SDValue N1 = N->getOperand(1);
2827     SDValue N2 = N->getOperand(2);
2828     SDValue N3 = N->getOperand(3);
2829     SDValue InFlag = N->getOperand(4);
2830     assert(N1.getOpcode() == ISD::BasicBlock);
2831     assert(N2.getOpcode() == ISD::Constant);
2832     assert(N3.getOpcode() == ISD::Register);
2833
2834     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2835
2836     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2837       bool SwitchEQNEToPLMI;
2838       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2839       InFlag = N->getOperand(4);
2840
2841       if (SwitchEQNEToPLMI) {
2842         switch ((ARMCC::CondCodes)CC) {
2843         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2844         case ARMCC::NE:
2845           CC = (unsigned)ARMCC::MI;
2846           break;
2847         case ARMCC::EQ:
2848           CC = (unsigned)ARMCC::PL;
2849           break;
2850         }
2851       }
2852     }
2853
2854     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
2855     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2856     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2857                                              MVT::Glue, Ops);
2858     Chain = SDValue(ResNode, 0);
2859     if (N->getNumValues() == 2) {
2860       InFlag = SDValue(ResNode, 1);
2861       ReplaceUses(SDValue(N, 1), InFlag);
2862     }
2863     ReplaceUses(SDValue(N, 0),
2864                 SDValue(Chain.getNode(), Chain.getResNo()));
2865     CurDAG->RemoveDeadNode(N);
2866     return;
2867   }
2868
2869   case ARMISD::CMPZ: {
2870     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
2871     //   This allows us to avoid materializing the expensive negative constant.
2872     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
2873     //   for its glue output.
2874     SDValue X = N->getOperand(0);
2875     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
2876     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
2877       int64_t Addend = -C->getSExtValue();
2878
2879       SDNode *Add = nullptr;
2880       // ADDS can be better than CMN if the immediate fits in a
2881       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
2882       // Outside that range we can just use a CMN which is 32-bit but has a
2883       // 12-bit immediate range.
2884       if (Addend < 1<<8) {
2885         if (Subtarget->isThumb2()) {
2886           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2887                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2888                             CurDAG->getRegister(0, MVT::i32) };
2889           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
2890         } else {
2891           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
2892           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
2893                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
2894                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2895           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2896         }
2897       }
2898       if (Add) {
2899         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
2900         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
2901       }
2902     }
2903     // Other cases are autogenerated.
2904     break;
2905   }
2906
2907   case ARMISD::CMOV: {
2908     SDValue InFlag = N->getOperand(4);
2909
2910     if (InFlag.getOpcode() == ARMISD::CMPZ) {
2911       bool SwitchEQNEToPLMI;
2912       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2913
2914       if (SwitchEQNEToPLMI) {
2915         SDValue ARMcc = N->getOperand(2);
2916         ARMCC::CondCodes CC =
2917           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
2918
2919         switch (CC) {
2920         default: llvm_unreachable("CMPZ must be either NE or EQ!");
2921         case ARMCC::NE:
2922           CC = ARMCC::MI;
2923           break;
2924         case ARMCC::EQ:
2925           CC = ARMCC::PL;
2926           break;
2927         }
2928         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
2929         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
2930                          N->getOperand(3), N->getOperand(4)};
2931         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
2932       }
2933
2934     }
2935     // Other cases are autogenerated.
2936     break;
2937   }
2938
2939   case ARMISD::VZIP: {
2940     unsigned Opc = 0;
2941     EVT VT = N->getValueType(0);
2942     switch (VT.getSimpleVT().SimpleTy) {
2943     default: return;
2944     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2945     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2946     case MVT::v2f32:
2947     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2948     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2949     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2950     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2951     case MVT::v4f32:
2952     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2953     }
2954     SDValue Pred = getAL(CurDAG, dl);
2955     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2956     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2957     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
2958     return;
2959   }
2960   case ARMISD::VUZP: {
2961     unsigned Opc = 0;
2962     EVT VT = N->getValueType(0);
2963     switch (VT.getSimpleVT().SimpleTy) {
2964     default: return;
2965     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2966     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2967     case MVT::v2f32:
2968     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2969     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2970     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2971     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2972     case MVT::v4f32:
2973     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2974     }
2975     SDValue Pred = getAL(CurDAG, dl);
2976     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2977     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2978     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
2979     return;
2980   }
2981   case ARMISD::VTRN: {
2982     unsigned Opc = 0;
2983     EVT VT = N->getValueType(0);
2984     switch (VT.getSimpleVT().SimpleTy) {
2985     default: return;
2986     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2987     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2988     case MVT::v2f32:
2989     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2990     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2991     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2992     case MVT::v4f32:
2993     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2994     }
2995     SDValue Pred = getAL(CurDAG, dl);
2996     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2997     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2998     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
2999     return;
3000   }
3001   case ARMISD::BUILD_VECTOR: {
3002     EVT VecVT = N->getValueType(0);
3003     EVT EltVT = VecVT.getVectorElementType();
3004     unsigned NumElts = VecVT.getVectorNumElements();
3005     if (EltVT == MVT::f64) {
3006       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3007       ReplaceNode(
3008           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3009       return;
3010     }
3011     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3012     if (NumElts == 2) {
3013       ReplaceNode(
3014           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3015       return;
3016     }
3017     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3018     ReplaceNode(N,
3019                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3020                                     N->getOperand(2), N->getOperand(3)));
3021     return;
3022   }
3023
3024   case ARMISD::VLD1DUP: {
3025     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3026                                          ARM::VLD1DUPd32 };
3027     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3028                                          ARM::VLD1DUPq32 };
3029     SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3030     return;
3031   }
3032
3033   case ARMISD::VLD2DUP: {
3034     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3035                                         ARM::VLD2DUPd32 };
3036     SelectVLDDup(N, false, 2, Opcodes);
3037     return;
3038   }
3039
3040   case ARMISD::VLD3DUP: {
3041     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3042                                         ARM::VLD3DUPd16Pseudo,
3043                                         ARM::VLD3DUPd32Pseudo };
3044     SelectVLDDup(N, false, 3, Opcodes);
3045     return;
3046   }
3047
3048   case ARMISD::VLD4DUP: {
3049     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3050                                         ARM::VLD4DUPd16Pseudo,
3051                                         ARM::VLD4DUPd32Pseudo };
3052     SelectVLDDup(N, false, 4, Opcodes);
3053     return;
3054   }
3055
3056   case ARMISD::VLD1DUP_UPD: {
3057     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3058                                          ARM::VLD1DUPd16wb_fixed,
3059                                          ARM::VLD1DUPd32wb_fixed };
3060     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3061                                          ARM::VLD1DUPq16wb_fixed,
3062                                          ARM::VLD1DUPq32wb_fixed };
3063     SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3064     return;
3065   }
3066
3067   case ARMISD::VLD2DUP_UPD: {
3068     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3069                                         ARM::VLD2DUPd16wb_fixed,
3070                                         ARM::VLD2DUPd32wb_fixed };
3071     SelectVLDDup(N, true, 2, Opcodes);
3072     return;
3073   }
3074
3075   case ARMISD::VLD3DUP_UPD: {
3076     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3077                                         ARM::VLD3DUPd16Pseudo_UPD,
3078                                         ARM::VLD3DUPd32Pseudo_UPD };
3079     SelectVLDDup(N, true, 3, Opcodes);
3080     return;
3081   }
3082
3083   case ARMISD::VLD4DUP_UPD: {
3084     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3085                                         ARM::VLD4DUPd16Pseudo_UPD,
3086                                         ARM::VLD4DUPd32Pseudo_UPD };
3087     SelectVLDDup(N, true, 4, Opcodes);
3088     return;
3089   }
3090
3091   case ARMISD::VLD1_UPD: {
3092     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3093                                          ARM::VLD1d16wb_fixed,
3094                                          ARM::VLD1d32wb_fixed,
3095                                          ARM::VLD1d64wb_fixed };
3096     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3097                                          ARM::VLD1q16wb_fixed,
3098                                          ARM::VLD1q32wb_fixed,
3099                                          ARM::VLD1q64wb_fixed };
3100     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3101     return;
3102   }
3103
3104   case ARMISD::VLD2_UPD: {
3105     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3106                                          ARM::VLD2d16wb_fixed,
3107                                          ARM::VLD2d32wb_fixed,
3108                                          ARM::VLD1q64wb_fixed};
3109     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3110                                          ARM::VLD2q16PseudoWB_fixed,
3111                                          ARM::VLD2q32PseudoWB_fixed };
3112     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3113     return;
3114   }
3115
3116   case ARMISD::VLD3_UPD: {
3117     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3118                                          ARM::VLD3d16Pseudo_UPD,
3119                                          ARM::VLD3d32Pseudo_UPD,
3120                                          ARM::VLD1d64TPseudoWB_fixed};
3121     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3122                                           ARM::VLD3q16Pseudo_UPD,
3123                                           ARM::VLD3q32Pseudo_UPD };
3124     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3125                                           ARM::VLD3q16oddPseudo_UPD,
3126                                           ARM::VLD3q32oddPseudo_UPD };
3127     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3128     return;
3129   }
3130
3131   case ARMISD::VLD4_UPD: {
3132     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3133                                          ARM::VLD4d16Pseudo_UPD,
3134                                          ARM::VLD4d32Pseudo_UPD,
3135                                          ARM::VLD1d64QPseudoWB_fixed};
3136     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3137                                           ARM::VLD4q16Pseudo_UPD,
3138                                           ARM::VLD4q32Pseudo_UPD };
3139     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3140                                           ARM::VLD4q16oddPseudo_UPD,
3141                                           ARM::VLD4q32oddPseudo_UPD };
3142     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3143     return;
3144   }
3145
3146   case ARMISD::VLD2LN_UPD: {
3147     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3148                                          ARM::VLD2LNd16Pseudo_UPD,
3149                                          ARM::VLD2LNd32Pseudo_UPD };
3150     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3151                                          ARM::VLD2LNq32Pseudo_UPD };
3152     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3153     return;
3154   }
3155
3156   case ARMISD::VLD3LN_UPD: {
3157     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3158                                          ARM::VLD3LNd16Pseudo_UPD,
3159                                          ARM::VLD3LNd32Pseudo_UPD };
3160     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3161                                          ARM::VLD3LNq32Pseudo_UPD };
3162     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3163     return;
3164   }
3165
3166   case ARMISD::VLD4LN_UPD: {
3167     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3168                                          ARM::VLD4LNd16Pseudo_UPD,
3169                                          ARM::VLD4LNd32Pseudo_UPD };
3170     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3171                                          ARM::VLD4LNq32Pseudo_UPD };
3172     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3173     return;
3174   }
3175
3176   case ARMISD::VST1_UPD: {
3177     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3178                                          ARM::VST1d16wb_fixed,
3179                                          ARM::VST1d32wb_fixed,
3180                                          ARM::VST1d64wb_fixed };
3181     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3182                                          ARM::VST1q16wb_fixed,
3183                                          ARM::VST1q32wb_fixed,
3184                                          ARM::VST1q64wb_fixed };
3185     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3186     return;
3187   }
3188
3189   case ARMISD::VST2_UPD: {
3190     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3191                                          ARM::VST2d16wb_fixed,
3192                                          ARM::VST2d32wb_fixed,
3193                                          ARM::VST1q64wb_fixed};
3194     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3195                                          ARM::VST2q16PseudoWB_fixed,
3196                                          ARM::VST2q32PseudoWB_fixed };
3197     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3198     return;
3199   }
3200
3201   case ARMISD::VST3_UPD: {
3202     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3203                                          ARM::VST3d16Pseudo_UPD,
3204                                          ARM::VST3d32Pseudo_UPD,
3205                                          ARM::VST1d64TPseudoWB_fixed};
3206     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3207                                           ARM::VST3q16Pseudo_UPD,
3208                                           ARM::VST3q32Pseudo_UPD };
3209     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3210                                           ARM::VST3q16oddPseudo_UPD,
3211                                           ARM::VST3q32oddPseudo_UPD };
3212     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3213     return;
3214   }
3215
3216   case ARMISD::VST4_UPD: {
3217     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3218                                          ARM::VST4d16Pseudo_UPD,
3219                                          ARM::VST4d32Pseudo_UPD,
3220                                          ARM::VST1d64QPseudoWB_fixed};
3221     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3222                                           ARM::VST4q16Pseudo_UPD,
3223                                           ARM::VST4q32Pseudo_UPD };
3224     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3225                                           ARM::VST4q16oddPseudo_UPD,
3226                                           ARM::VST4q32oddPseudo_UPD };
3227     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3228     return;
3229   }
3230
3231   case ARMISD::VST2LN_UPD: {
3232     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3233                                          ARM::VST2LNd16Pseudo_UPD,
3234                                          ARM::VST2LNd32Pseudo_UPD };
3235     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3236                                          ARM::VST2LNq32Pseudo_UPD };
3237     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3238     return;
3239   }
3240
3241   case ARMISD::VST3LN_UPD: {
3242     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3243                                          ARM::VST3LNd16Pseudo_UPD,
3244                                          ARM::VST3LNd32Pseudo_UPD };
3245     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3246                                          ARM::VST3LNq32Pseudo_UPD };
3247     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3248     return;
3249   }
3250
3251   case ARMISD::VST4LN_UPD: {
3252     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3253                                          ARM::VST4LNd16Pseudo_UPD,
3254                                          ARM::VST4LNd32Pseudo_UPD };
3255     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3256                                          ARM::VST4LNq32Pseudo_UPD };
3257     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3258     return;
3259   }
3260
3261   case ISD::INTRINSIC_VOID:
3262   case ISD::INTRINSIC_W_CHAIN: {
3263     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3264     switch (IntNo) {
3265     default:
3266       break;
3267
3268     case Intrinsic::arm_mrrc:
3269     case Intrinsic::arm_mrrc2: {
3270       SDLoc dl(N);
3271       SDValue Chain = N->getOperand(0);
3272       unsigned Opc;
3273
3274       if (Subtarget->isThumb())
3275         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3276       else
3277         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3278
3279       SmallVector<SDValue, 5> Ops;
3280       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3281       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3282       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3283
3284       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3285       // instruction will always be '1111' but it is possible in assembly language to specify
3286       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3287       if (Opc != ARM::MRRC2) {
3288         Ops.push_back(getAL(CurDAG, dl));
3289         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3290       }
3291
3292       Ops.push_back(Chain);
3293
3294       // Writes to two registers.
3295       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3296
3297       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3298       return;
3299     }
3300     case Intrinsic::arm_ldaexd:
3301     case Intrinsic::arm_ldrexd: {
3302       SDLoc dl(N);
3303       SDValue Chain = N->getOperand(0);
3304       SDValue MemAddr = N->getOperand(2);
3305       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3306
3307       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3308       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3309                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3310
3311       // arm_ldrexd returns a i64 value in {i32, i32}
3312       std::vector<EVT> ResTys;
3313       if (isThumb) {
3314         ResTys.push_back(MVT::i32);
3315         ResTys.push_back(MVT::i32);
3316       } else
3317         ResTys.push_back(MVT::Untyped);
3318       ResTys.push_back(MVT::Other);
3319
3320       // Place arguments in the right order.
3321       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3322                        CurDAG->getRegister(0, MVT::i32), Chain};
3323       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3324       // Transfer memoperands.
3325       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3326       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3327       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3328
3329       // Remap uses.
3330       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3331       if (!SDValue(N, 0).use_empty()) {
3332         SDValue Result;
3333         if (isThumb)
3334           Result = SDValue(Ld, 0);
3335         else {
3336           SDValue SubRegIdx =
3337             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3338           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3339               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3340           Result = SDValue(ResNode,0);
3341         }
3342         ReplaceUses(SDValue(N, 0), Result);
3343       }
3344       if (!SDValue(N, 1).use_empty()) {
3345         SDValue Result;
3346         if (isThumb)
3347           Result = SDValue(Ld, 1);
3348         else {
3349           SDValue SubRegIdx =
3350             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3351           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3352               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3353           Result = SDValue(ResNode,0);
3354         }
3355         ReplaceUses(SDValue(N, 1), Result);
3356       }
3357       ReplaceUses(SDValue(N, 2), OutChain);
3358       CurDAG->RemoveDeadNode(N);
3359       return;
3360     }
3361     case Intrinsic::arm_stlexd:
3362     case Intrinsic::arm_strexd: {
3363       SDLoc dl(N);
3364       SDValue Chain = N->getOperand(0);
3365       SDValue Val0 = N->getOperand(2);
3366       SDValue Val1 = N->getOperand(3);
3367       SDValue MemAddr = N->getOperand(4);
3368
3369       // Store exclusive double return a i32 value which is the return status
3370       // of the issued store.
3371       const EVT ResTys[] = {MVT::i32, MVT::Other};
3372
3373       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3374       // Place arguments in the right order.
3375       SmallVector<SDValue, 7> Ops;
3376       if (isThumb) {
3377         Ops.push_back(Val0);
3378         Ops.push_back(Val1);
3379       } else
3380         // arm_strexd uses GPRPair.
3381         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3382       Ops.push_back(MemAddr);
3383       Ops.push_back(getAL(CurDAG, dl));
3384       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3385       Ops.push_back(Chain);
3386
3387       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3388       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3389                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3390
3391       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3392       // Transfer memoperands.
3393       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3394       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3395       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3396
3397       ReplaceNode(N, St);
3398       return;
3399     }
3400
3401     case Intrinsic::arm_neon_vld1: {
3402       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3403                                            ARM::VLD1d32, ARM::VLD1d64 };
3404       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3405                                            ARM::VLD1q32, ARM::VLD1q64};
3406       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3407       return;
3408     }
3409
3410     case Intrinsic::arm_neon_vld2: {
3411       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3412                                            ARM::VLD2d32, ARM::VLD1q64 };
3413       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3414                                            ARM::VLD2q32Pseudo };
3415       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3416       return;
3417     }
3418
3419     case Intrinsic::arm_neon_vld3: {
3420       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3421                                            ARM::VLD3d16Pseudo,
3422                                            ARM::VLD3d32Pseudo,
3423                                            ARM::VLD1d64TPseudo };
3424       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3425                                             ARM::VLD3q16Pseudo_UPD,
3426                                             ARM::VLD3q32Pseudo_UPD };
3427       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3428                                             ARM::VLD3q16oddPseudo,
3429                                             ARM::VLD3q32oddPseudo };
3430       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3431       return;
3432     }
3433
3434     case Intrinsic::arm_neon_vld4: {
3435       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3436                                            ARM::VLD4d16Pseudo,
3437                                            ARM::VLD4d32Pseudo,
3438                                            ARM::VLD1d64QPseudo };
3439       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3440                                             ARM::VLD4q16Pseudo_UPD,
3441                                             ARM::VLD4q32Pseudo_UPD };
3442       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3443                                             ARM::VLD4q16oddPseudo,
3444                                             ARM::VLD4q32oddPseudo };
3445       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3446       return;
3447     }
3448
3449     case Intrinsic::arm_neon_vld2lane: {
3450       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3451                                            ARM::VLD2LNd16Pseudo,
3452                                            ARM::VLD2LNd32Pseudo };
3453       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3454                                            ARM::VLD2LNq32Pseudo };
3455       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3456       return;
3457     }
3458
3459     case Intrinsic::arm_neon_vld3lane: {
3460       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3461                                            ARM::VLD3LNd16Pseudo,
3462                                            ARM::VLD3LNd32Pseudo };
3463       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3464                                            ARM::VLD3LNq32Pseudo };
3465       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3466       return;
3467     }
3468
3469     case Intrinsic::arm_neon_vld4lane: {
3470       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3471                                            ARM::VLD4LNd16Pseudo,
3472                                            ARM::VLD4LNd32Pseudo };
3473       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3474                                            ARM::VLD4LNq32Pseudo };
3475       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3476       return;
3477     }
3478
3479     case Intrinsic::arm_neon_vst1: {
3480       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3481                                            ARM::VST1d32, ARM::VST1d64 };
3482       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3483                                            ARM::VST1q32, ARM::VST1q64 };
3484       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3485       return;
3486     }
3487
3488     case Intrinsic::arm_neon_vst2: {
3489       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3490                                            ARM::VST2d32, ARM::VST1q64 };
3491       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3492                                            ARM::VST2q32Pseudo };
3493       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3494       return;
3495     }
3496
3497     case Intrinsic::arm_neon_vst3: {
3498       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3499                                            ARM::VST3d16Pseudo,
3500                                            ARM::VST3d32Pseudo,
3501                                            ARM::VST1d64TPseudo };
3502       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3503                                             ARM::VST3q16Pseudo_UPD,
3504                                             ARM::VST3q32Pseudo_UPD };
3505       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3506                                             ARM::VST3q16oddPseudo,
3507                                             ARM::VST3q32oddPseudo };
3508       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3509       return;
3510     }
3511
3512     case Intrinsic::arm_neon_vst4: {
3513       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3514                                            ARM::VST4d16Pseudo,
3515                                            ARM::VST4d32Pseudo,
3516                                            ARM::VST1d64QPseudo };
3517       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3518                                             ARM::VST4q16Pseudo_UPD,
3519                                             ARM::VST4q32Pseudo_UPD };
3520       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3521                                             ARM::VST4q16oddPseudo,
3522                                             ARM::VST4q32oddPseudo };
3523       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3524       return;
3525     }
3526
3527     case Intrinsic::arm_neon_vst2lane: {
3528       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3529                                            ARM::VST2LNd16Pseudo,
3530                                            ARM::VST2LNd32Pseudo };
3531       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3532                                            ARM::VST2LNq32Pseudo };
3533       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3534       return;
3535     }
3536
3537     case Intrinsic::arm_neon_vst3lane: {
3538       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3539                                            ARM::VST3LNd16Pseudo,
3540                                            ARM::VST3LNd32Pseudo };
3541       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3542                                            ARM::VST3LNq32Pseudo };
3543       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3544       return;
3545     }
3546
3547     case Intrinsic::arm_neon_vst4lane: {
3548       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3549                                            ARM::VST4LNd16Pseudo,
3550                                            ARM::VST4LNd32Pseudo };
3551       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3552                                            ARM::VST4LNq32Pseudo };
3553       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3554       return;
3555     }
3556     }
3557     break;
3558   }
3559
3560   case ISD::ATOMIC_CMP_SWAP:
3561     SelectCMP_SWAP(N);
3562     return;
3563   }
3564
3565   SelectCode(N);
3566 }
3567
3568 // Inspect a register string of the form
3569 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3570 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3571 // and obtain the integer operands from them, adding these operands to the
3572 // provided vector.
3573 static void getIntOperandsFromRegisterString(StringRef RegString,
3574                                              SelectionDAG *CurDAG,
3575                                              const SDLoc &DL,
3576                                              std::vector<SDValue> &Ops) {
3577   SmallVector<StringRef, 5> Fields;
3578   RegString.split(Fields, ':');
3579
3580   if (Fields.size() > 1) {
3581     bool AllIntFields = true;
3582
3583     for (StringRef Field : Fields) {
3584       // Need to trim out leading 'cp' characters and get the integer field.
3585       unsigned IntField;
3586       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3587       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3588     }
3589
3590     assert(AllIntFields &&
3591             "Unexpected non-integer value in special register string.");
3592   }
3593 }
3594
3595 // Maps a Banked Register string to its mask value. The mask value returned is
3596 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3597 // mask operand, which expresses which register is to be used, e.g. r8, and in
3598 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3599 // was invalid.
3600 static inline int getBankedRegisterMask(StringRef RegString) {
3601   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3602   if (!TheReg)
3603      return -1;
3604   return TheReg->Encoding;
3605 }
3606
3607 // The flags here are common to those allowed for apsr in the A class cores and
3608 // those allowed for the special registers in the M class cores. Returns a
3609 // value representing which flags were present, -1 if invalid.
3610 static inline int getMClassFlagsMask(StringRef Flags) {
3611   return StringSwitch<int>(Flags)
3612           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3613                          // correct when flags are not permitted
3614           .Case("g", 0x1)
3615           .Case("nzcvq", 0x2)
3616           .Case("nzcvqg", 0x3)
3617           .Default(-1);
3618 }
3619
3620 // Maps MClass special registers string to its value for use in the
3621 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3622 // Returns -1 to signify that the string was invalid.
3623 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3624   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3625   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3626   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3627     return -1;
3628   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
3629 }
3630
3631 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3632   // The mask operand contains the special register (R Bit) in bit 4, whether
3633   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3634   // bits 3-0 contains the fields to be accessed in the special register, set by
3635   // the flags provided with the register.
3636   int Mask = 0;
3637   if (Reg == "apsr") {
3638     // The flags permitted for apsr are the same flags that are allowed in
3639     // M class registers. We get the flag value and then shift the flags into
3640     // the correct place to combine with the mask.
3641     Mask = getMClassFlagsMask(Flags);
3642     if (Mask == -1)
3643       return -1;
3644     return Mask << 2;
3645   }
3646
3647   if (Reg != "cpsr" && Reg != "spsr") {
3648     return -1;
3649   }
3650
3651   // This is the same as if the flags were "fc"
3652   if (Flags.empty() || Flags == "all")
3653     return Mask | 0x9;
3654
3655   // Inspect the supplied flags string and set the bits in the mask for
3656   // the relevant and valid flags allowed for cpsr and spsr.
3657   for (char Flag : Flags) {
3658     int FlagVal;
3659     switch (Flag) {
3660       case 'c':
3661         FlagVal = 0x1;
3662         break;
3663       case 'x':
3664         FlagVal = 0x2;
3665         break;
3666       case 's':
3667         FlagVal = 0x4;
3668         break;
3669       case 'f':
3670         FlagVal = 0x8;
3671         break;
3672       default:
3673         FlagVal = 0;
3674     }
3675
3676     // This avoids allowing strings where the same flag bit appears twice.
3677     if (!FlagVal || (Mask & FlagVal))
3678       return -1;
3679     Mask |= FlagVal;
3680   }
3681
3682   // If the register is spsr then we need to set the R bit.
3683   if (Reg == "spsr")
3684     Mask |= 0x10;
3685
3686   return Mask;
3687 }
3688
3689 // Lower the read_register intrinsic to ARM specific DAG nodes
3690 // using the supplied metadata string to select the instruction node to use
3691 // and the registers/masks to construct as operands for the node.
3692 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3693   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3694   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3695   bool IsThumb2 = Subtarget->isThumb2();
3696   SDLoc DL(N);
3697
3698   std::vector<SDValue> Ops;
3699   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3700
3701   if (!Ops.empty()) {
3702     // If the special register string was constructed of fields (as defined
3703     // in the ACLE) then need to lower to MRC node (32 bit) or
3704     // MRRC node(64 bit), we can make the distinction based on the number of
3705     // operands we have.
3706     unsigned Opcode;
3707     SmallVector<EVT, 3> ResTypes;
3708     if (Ops.size() == 5){
3709       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3710       ResTypes.append({ MVT::i32, MVT::Other });
3711     } else {
3712       assert(Ops.size() == 3 &&
3713               "Invalid number of fields in special register string.");
3714       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3715       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3716     }
3717
3718     Ops.push_back(getAL(CurDAG, DL));
3719     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3720     Ops.push_back(N->getOperand(0));
3721     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3722     return true;
3723   }
3724
3725   std::string SpecialReg = RegString->getString().lower();
3726
3727   int BankedReg = getBankedRegisterMask(SpecialReg);
3728   if (BankedReg != -1) {
3729     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3730             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3731             N->getOperand(0) };
3732     ReplaceNode(
3733         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3734                                   DL, MVT::i32, MVT::Other, Ops));
3735     return true;
3736   }
3737
3738   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3739   // corresponding to the register that is being read from. So we switch on the
3740   // string to find which opcode we need to use.
3741   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3742                     .Case("fpscr", ARM::VMRS)
3743                     .Case("fpexc", ARM::VMRS_FPEXC)
3744                     .Case("fpsid", ARM::VMRS_FPSID)
3745                     .Case("mvfr0", ARM::VMRS_MVFR0)
3746                     .Case("mvfr1", ARM::VMRS_MVFR1)
3747                     .Case("mvfr2", ARM::VMRS_MVFR2)
3748                     .Case("fpinst", ARM::VMRS_FPINST)
3749                     .Case("fpinst2", ARM::VMRS_FPINST2)
3750                     .Default(0);
3751
3752   // If an opcode was found then we can lower the read to a VFP instruction.
3753   if (Opcode) {
3754     if (!Subtarget->hasVFP2())
3755       return false;
3756     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3757       return false;
3758
3759     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3760             N->getOperand(0) };
3761     ReplaceNode(N,
3762                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
3763     return true;
3764   }
3765
3766   // If the target is M Class then need to validate that the register string
3767   // is an acceptable value, so check that a mask can be constructed from the
3768   // string.
3769   if (Subtarget->isMClass()) {
3770     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
3771     if (SYSmValue == -1)
3772       return false;
3773
3774     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3775                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3776                       N->getOperand(0) };
3777     ReplaceNode(
3778         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
3779     return true;
3780   }
3781
3782   // Here we know the target is not M Class so we need to check if it is one
3783   // of the remaining possible values which are apsr, cpsr or spsr.
3784   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3785     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3786             N->getOperand(0) };
3787     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
3788                                           DL, MVT::i32, MVT::Other, Ops));
3789     return true;
3790   }
3791
3792   if (SpecialReg == "spsr") {
3793     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3794             N->getOperand(0) };
3795     ReplaceNode(
3796         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
3797                                   MVT::i32, MVT::Other, Ops));
3798     return true;
3799   }
3800
3801   return false;
3802 }
3803
3804 // Lower the write_register intrinsic to ARM specific DAG nodes
3805 // using the supplied metadata string to select the instruction node to use
3806 // and the registers/masks to use in the nodes
3807 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
3808   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3809   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3810   bool IsThumb2 = Subtarget->isThumb2();
3811   SDLoc DL(N);
3812
3813   std::vector<SDValue> Ops;
3814   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3815
3816   if (!Ops.empty()) {
3817     // If the special register string was constructed of fields (as defined
3818     // in the ACLE) then need to lower to MCR node (32 bit) or
3819     // MCRR node(64 bit), we can make the distinction based on the number of
3820     // operands we have.
3821     unsigned Opcode;
3822     if (Ops.size() == 5) {
3823       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3824       Ops.insert(Ops.begin()+2, N->getOperand(2));
3825     } else {
3826       assert(Ops.size() == 3 &&
3827               "Invalid number of fields in special register string.");
3828       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3829       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3830       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3831     }
3832
3833     Ops.push_back(getAL(CurDAG, DL));
3834     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3835     Ops.push_back(N->getOperand(0));
3836
3837     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
3838     return true;
3839   }
3840
3841   std::string SpecialReg = RegString->getString().lower();
3842   int BankedReg = getBankedRegisterMask(SpecialReg);
3843   if (BankedReg != -1) {
3844     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3845             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3846             N->getOperand(0) };
3847     ReplaceNode(
3848         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3849                                   DL, MVT::Other, Ops));
3850     return true;
3851   }
3852
3853   // The VFP registers are written to by creating SelectionDAG nodes with
3854   // opcodes corresponding to the register that is being written. So we switch
3855   // on the string to find which opcode we need to use.
3856   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3857                     .Case("fpscr", ARM::VMSR)
3858                     .Case("fpexc", ARM::VMSR_FPEXC)
3859                     .Case("fpsid", ARM::VMSR_FPSID)
3860                     .Case("fpinst", ARM::VMSR_FPINST)
3861                     .Case("fpinst2", ARM::VMSR_FPINST2)
3862                     .Default(0);
3863
3864   if (Opcode) {
3865     if (!Subtarget->hasVFP2())
3866       return false;
3867     Ops = { N->getOperand(2), getAL(CurDAG, DL),
3868             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3869     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
3870     return true;
3871   }
3872
3873   std::pair<StringRef, StringRef> Fields;
3874   Fields = StringRef(SpecialReg).rsplit('_');
3875   std::string Reg = Fields.first.str();
3876   StringRef Flags = Fields.second;
3877
3878   // If the target was M Class then need to validate the special register value
3879   // and retrieve the mask for use in the instruction node.
3880   if (Subtarget->isMClass()) {
3881     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
3882     if (SYSmValue == -1)
3883       return false;
3884
3885     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3886                       N->getOperand(2), getAL(CurDAG, DL),
3887                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3888     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
3889     return true;
3890   }
3891
3892   // We then check to see if a valid mask can be constructed for one of the
3893   // register string values permitted for the A and R class cores. These values
3894   // are apsr, spsr and cpsr; these are also valid on older cores.
3895   int Mask = getARClassRegisterMask(Reg, Flags);
3896   if (Mask != -1) {
3897     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3898             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3899             N->getOperand(0) };
3900     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3901                                           DL, MVT::Other, Ops));
3902     return true;
3903   }
3904
3905   return false;
3906 }
3907
3908 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
3909   std::vector<SDValue> AsmNodeOperands;
3910   unsigned Flag, Kind;
3911   bool Changed = false;
3912   unsigned NumOps = N->getNumOperands();
3913
3914   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3915   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3916   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3917   // respectively. Since there is no constraint to explicitly specify a
3918   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3919   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3920   // them into a GPRPair.
3921
3922   SDLoc dl(N);
3923   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3924                                    : SDValue(nullptr,0);
3925
3926   SmallVector<bool, 8> OpChanged;
3927   // Glue node will be appended late.
3928   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3929     SDValue op = N->getOperand(i);
3930     AsmNodeOperands.push_back(op);
3931
3932     if (i < InlineAsm::Op_FirstOperand)
3933       continue;
3934
3935     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3936       Flag = C->getZExtValue();
3937       Kind = InlineAsm::getKind(Flag);
3938     }
3939     else
3940       continue;
3941
3942     // Immediate operands to inline asm in the SelectionDAG are modeled with
3943     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3944     // the second is a constant with the value of the immediate. If we get here
3945     // and we have a Kind_Imm, skip the next operand, and continue.
3946     if (Kind == InlineAsm::Kind_Imm) {
3947       SDValue op = N->getOperand(++i);
3948       AsmNodeOperands.push_back(op);
3949       continue;
3950     }
3951
3952     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3953     if (NumRegs)
3954       OpChanged.push_back(false);
3955
3956     unsigned DefIdx = 0;
3957     bool IsTiedToChangedOp = false;
3958     // If it's a use that is tied with a previous def, it has no
3959     // reg class constraint.
3960     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3961       IsTiedToChangedOp = OpChanged[DefIdx];
3962
3963     // Memory operands to inline asm in the SelectionDAG are modeled with two
3964     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
3965     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
3966     // it doesn't get misinterpreted), and continue. We do this here because
3967     // it's important to update the OpChanged array correctly before moving on.
3968     if (Kind == InlineAsm::Kind_Mem) {
3969       SDValue op = N->getOperand(++i);
3970       AsmNodeOperands.push_back(op);
3971       continue;
3972     }
3973
3974     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3975         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3976       continue;
3977
3978     unsigned RC;
3979     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3980     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3981         || NumRegs != 2)
3982       continue;
3983
3984     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3985     SDValue V0 = N->getOperand(i+1);
3986     SDValue V1 = N->getOperand(i+2);
3987     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3988     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3989     SDValue PairedReg;
3990     MachineRegisterInfo &MRI = MF->getRegInfo();
3991
3992     if (Kind == InlineAsm::Kind_RegDef ||
3993         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3994       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3995       // the original GPRs.
3996
3997       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3998       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3999       SDValue Chain = SDValue(N,0);
4000
4001       SDNode *GU = N->getGluedUser();
4002       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4003                                                Chain.getValue(1));
4004
4005       // Extract values from a GPRPair reg and copy to the original GPR reg.
4006       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4007                                                     RegCopy);
4008       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4009                                                     RegCopy);
4010       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4011                                         RegCopy.getValue(1));
4012       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4013
4014       // Update the original glue user.
4015       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4016       Ops.push_back(T1.getValue(1));
4017       CurDAG->UpdateNodeOperands(GU, Ops);
4018     }
4019     else {
4020       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4021       // GPRPair and then pass the GPRPair to the inline asm.
4022       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4023
4024       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4025       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4026                                           Chain.getValue(1));
4027       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4028                                           T0.getValue(1));
4029       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4030
4031       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4032       // i32 VRs of inline asm with it.
4033       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4034       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4035       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4036
4037       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4038       Glue = Chain.getValue(1);
4039     }
4040
4041     Changed = true;
4042
4043     if(PairedReg.getNode()) {
4044       OpChanged[OpChanged.size() -1 ] = true;
4045       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4046       if (IsTiedToChangedOp)
4047         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4048       else
4049         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4050       // Replace the current flag.
4051       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4052           Flag, dl, MVT::i32);
4053       // Add the new register node and skip the original two GPRs.
4054       AsmNodeOperands.push_back(PairedReg);
4055       // Skip the next two GPRs.
4056       i += 2;
4057     }
4058   }
4059
4060   if (Glue.getNode())
4061     AsmNodeOperands.push_back(Glue);
4062   if (!Changed)
4063     return false;
4064
4065   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4066       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4067   New->setNodeId(-1);
4068   ReplaceNode(N, New.getNode());
4069   return true;
4070 }
4071
4072
4073 bool ARMDAGToDAGISel::
4074 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4075                              std::vector<SDValue> &OutOps) {
4076   switch(ConstraintID) {
4077   default:
4078     llvm_unreachable("Unexpected asm memory constraint");
4079   case InlineAsm::Constraint_i:
4080     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4081     //        be an immediate and not a memory constraint.
4082     LLVM_FALLTHROUGH;
4083   case InlineAsm::Constraint_m:
4084   case InlineAsm::Constraint_o:
4085   case InlineAsm::Constraint_Q:
4086   case InlineAsm::Constraint_Um:
4087   case InlineAsm::Constraint_Un:
4088   case InlineAsm::Constraint_Uq:
4089   case InlineAsm::Constraint_Us:
4090   case InlineAsm::Constraint_Ut:
4091   case InlineAsm::Constraint_Uv:
4092   case InlineAsm::Constraint_Uy:
4093     // Require the address to be in a register.  That is safe for all ARM
4094     // variants and it is hard to do anything much smarter without knowing
4095     // how the operand is used.
4096     OutOps.push_back(Op);
4097     return false;
4098   }
4099   return true;
4100 }
4101
4102 /// createARMISelDag - This pass converts a legalized DAG into a
4103 /// ARM-specific DAG, ready for instruction scheduling.
4104 ///
4105 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4106                                      CodeGenOpt::Level OptLevel) {
4107   return new ARMDAGToDAGISel(TM, OptLevel);
4108 }