1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the ARM target.
12 //===----------------------------------------------------------------------===//
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMTargetMachine.h"
17 #include "MCTargetDesc/ARMAddressingModes.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Target/TargetLowering.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
53 AM2_BASE, // Simple AM2 (+-imm12)
54 AM2_SHOP // Shifter-op AM2
57 class ARMDAGToDAGISel : public SelectionDAGISel {
58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
59 /// make the right decision when generating code for different targets.
60 const ARMSubtarget *Subtarget;
63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
64 : SelectionDAGISel(tm, OptLevel) {}
66 bool runOnMachineFunction(MachineFunction &MF) override {
67 // Reset the subtarget each time through.
68 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 SelectionDAGISel::runOnMachineFunction(MF);
73 StringRef getPassName() const override { return "ARM Instruction Selection"; }
75 void PreprocessISelDAG() override;
77 /// getI32Imm - Return a target constant of type i32 with the specified
79 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
80 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
83 void Select(SDNode *N) override;
85 bool hasNoVMLxHazardUse(SDNode *N) const;
86 bool isShifterOpProfitable(const SDValue &Shift,
87 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
88 bool SelectRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C,
90 bool CheckProfitability = true);
91 bool SelectImmShifterOperand(SDValue N, SDValue &A,
92 SDValue &B, bool CheckProfitability = true);
93 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, SDValue &C) {
95 // Don't apply the profitability check
96 return SelectRegShifterOperand(N, A, B, C, false);
98 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
104 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
105 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
107 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
108 SDValue &Offset, SDValue &Opc);
109 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
111 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
114 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
116 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
119 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
121 SelectAddrMode2Worker(N, Base, Offset, Opc);
122 // return SelectAddrMode2ShOp(N, Base, Offset, Opc);
123 // This always matches one way or another.
127 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
128 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
129 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
130 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
134 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
135 SDValue &Offset, SDValue &Opc);
136 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
137 SDValue &Offset, SDValue &Opc);
138 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
139 SDValue &Offset, SDValue &Opc);
140 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
141 bool SelectAddrMode3(SDValue N, SDValue &Base,
142 SDValue &Offset, SDValue &Opc);
143 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
144 SDValue &Offset, SDValue &Opc);
145 bool SelectAddrMode5(SDValue N, SDValue &Base,
147 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
148 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
150 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
152 // Thumb Addressing Modes:
153 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
154 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
156 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
158 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
160 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
162 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
164 // Thumb 2 Addressing Modes:
165 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
166 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
168 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
170 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
171 SDValue &OffReg, SDValue &ShImm);
172 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174 inline bool is_so_imm(unsigned Imm) const {
175 return ARM_AM::getSOImmVal(Imm) != -1;
178 inline bool is_so_imm_not(unsigned Imm) const {
179 return ARM_AM::getSOImmVal(~Imm) != -1;
182 inline bool is_t2_so_imm(unsigned Imm) const {
183 return ARM_AM::getT2SOImmVal(Imm) != -1;
186 inline bool is_t2_so_imm_not(unsigned Imm) const {
187 return ARM_AM::getT2SOImmVal(~Imm) != -1;
190 // Include the pieces autogenerated from the target description.
191 #include "ARMGenDAGISel.inc"
194 void transferMemOperands(SDNode *Src, SDNode *Dst);
196 /// Indexed (pre/post inc/dec) load matching code for ARM.
197 bool tryARMIndexedLoad(SDNode *N);
198 bool tryT1IndexedLoad(SDNode *N);
199 bool tryT2IndexedLoad(SDNode *N);
201 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
202 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
203 /// loads of D registers and even subregs and odd subregs of Q registers.
204 /// For NumVecs <= 2, QOpcodes1 is not used.
205 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
206 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
207 const uint16_t *QOpcodes1);
209 /// SelectVST - Select NEON store intrinsics. NumVecs should
210 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
211 /// stores of D registers and even subregs and odd subregs of Q registers.
212 /// For NumVecs <= 2, QOpcodes1 is not used.
213 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
215 const uint16_t *QOpcodes1);
217 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
218 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
219 /// load/store of D registers and Q registers.
220 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
221 unsigned NumVecs, const uint16_t *DOpcodes,
222 const uint16_t *QOpcodes);
224 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
225 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
226 /// for loading D registers. (Q registers are not supported.)
227 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
228 const uint16_t *DOpcodes,
229 const uint16_t *QOpcodes = nullptr);
231 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
232 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
233 /// generated to force the table registers to be consecutive.
234 void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
236 /// Try to select SBFX/UBFX instructions for ARM.
237 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
239 // Select special operations if node forms integer ABS pattern
240 bool tryABSOp(SDNode *N);
242 bool tryReadRegister(SDNode *N);
243 bool tryWriteRegister(SDNode *N);
245 bool tryInlineAsm(SDNode *N);
247 void SelectConcatVector(SDNode *N);
248 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
250 bool trySMLAWSMULW(SDNode *N);
252 void SelectCMP_SWAP(SDNode *N);
254 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
255 /// inline asm expressions.
256 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
257 std::vector<SDValue> &OutOps) override;
259 // Form pairs of consecutive R, S, D, or Q registers.
260 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
261 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
262 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
263 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
265 // Form sequences of 4 consecutive S, D, or Q registers.
266 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
267 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
268 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
270 // Get the alignment operand for a NEON VLD or VST instruction.
271 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
274 /// Returns the number of instructions required to materialize the given
275 /// constant in a register, or 3 if a literal pool load is needed.
276 unsigned ConstantMaterializationCost(unsigned Val) const;
278 /// Checks if N is a multiplication by a constant where we can extract out a
279 /// power of two from the constant so that it can be used in a shift, but only
280 /// if it simplifies the materialization of the constant. Returns true if it
281 /// is, and assigns to PowerOfTwo the power of two that should be extracted
282 /// out and to NewMulConst the new constant to be multiplied by.
283 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
284 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
286 /// Replace N with M in CurDAG, in a way that also ensures that M gets
287 /// selected when N would have been selected.
288 void replaceDAGValue(const SDValue &N, SDValue M);
292 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
293 /// operand. If so Imm will receive the 32-bit value.
294 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
295 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
296 Imm = cast<ConstantSDNode>(N)->getZExtValue();
302 // isInt32Immediate - This method tests to see if a constant operand.
303 // If so Imm will receive the 32 bit value.
304 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
305 return isInt32Immediate(N.getNode(), Imm);
308 // isOpcWithIntImmediate - This method tests to see if the node is a specific
309 // opcode and that it has a immediate integer right operand.
310 // If so Imm will receive the 32 bit value.
311 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
312 return N->getOpcode() == Opc &&
313 isInt32Immediate(N->getOperand(1).getNode(), Imm);
316 /// \brief Check whether a particular node is a constant value representable as
317 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
319 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
320 static bool isScaledConstantInRange(SDValue Node, int Scale,
321 int RangeMin, int RangeMax,
322 int &ScaledConstant) {
323 assert(Scale > 0 && "Invalid scale!");
325 // Check that this is a constant.
326 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
330 ScaledConstant = (int) C->getZExtValue();
331 if ((ScaledConstant % Scale) != 0)
334 ScaledConstant /= Scale;
335 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
338 void ARMDAGToDAGISel::PreprocessISelDAG() {
339 if (!Subtarget->hasV6T2Ops())
342 bool isThumb2 = Subtarget->isThumb();
343 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
344 E = CurDAG->allnodes_end(); I != E; ) {
345 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
347 if (N->getOpcode() != ISD::ADD)
350 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
351 // leading zeros, followed by consecutive set bits, followed by 1 or 2
352 // trailing zeros, e.g. 1020.
353 // Transform the expression to
354 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
355 // of trailing zeros of c2. The left shift would be folded as an shifter
356 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
359 SDValue N0 = N->getOperand(0);
360 SDValue N1 = N->getOperand(1);
361 unsigned And_imm = 0;
362 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
363 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
369 // Check if the AND mask is an immediate of the form: 000.....1111111100
370 unsigned TZ = countTrailingZeros(And_imm);
371 if (TZ != 1 && TZ != 2)
372 // Be conservative here. Shifter operands aren't always free. e.g. On
373 // Swift, left shifter operand of 1 / 2 for free but others are not.
375 // ubfx r3, r1, #16, #8
376 // ldr.w r3, [r0, r3, lsl #2]
379 // and.w r2, r9, r1, lsr #14
383 if (And_imm & (And_imm + 1))
386 // Look for (and (srl X, c1), c2).
387 SDValue Srl = N1.getOperand(0);
388 unsigned Srl_imm = 0;
389 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
393 // Make sure first operand is not a shifter operand which would prevent
394 // folding of the left shift.
399 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
402 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
403 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
407 // Now make the transformation.
408 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
410 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
412 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
414 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
415 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
416 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
417 CurDAG->UpdateNodeOperands(N, N0, N1);
421 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
422 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
423 /// least on current ARM implementations) which should be avoidded.
424 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
425 if (OptLevel == CodeGenOpt::None)
428 if (!Subtarget->hasVMLxHazards())
434 SDNode *Use = *N->use_begin();
435 if (Use->getOpcode() == ISD::CopyToReg)
437 if (Use->isMachineOpcode()) {
438 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
439 CurDAG->getSubtarget().getInstrInfo());
441 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
444 unsigned Opcode = MCID.getOpcode();
445 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
447 // vmlx feeding into another vmlx. We actually want to unfold
448 // the use later in the MLxExpansion pass. e.g.
450 // vmla (stall 8 cycles)
455 // This adds up to about 18 - 19 cycles.
458 // vmul (stall 4 cycles)
459 // vadd adds up to about 14 cycles.
460 return TII->isFpMLxInstruction(Opcode);
466 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
467 ARM_AM::ShiftOpc ShOpcVal,
469 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
471 if (Shift.hasOneUse())
474 return ShOpcVal == ARM_AM::lsl &&
475 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
478 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
479 if (Subtarget->isThumb()) {
480 if (Val <= 255) return 1; // MOV
481 if (Subtarget->hasV6T2Ops() &&
482 (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
484 if (Val <= 510) return 2; // MOV + ADDi8
485 if (~Val <= 255) return 2; // MOV + MVN
486 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
488 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
489 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
490 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
491 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
493 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
494 return 3; // Literal pool load
497 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
499 unsigned &PowerOfTwo,
500 SDValue &NewMulConst) const {
501 assert(N.getOpcode() == ISD::MUL);
502 assert(MaxShift > 0);
504 // If the multiply is used in more than one place then changing the constant
505 // will make other uses incorrect, so don't.
506 if (!N.hasOneUse()) return false;
507 // Check if the multiply is by a constant
508 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
509 if (!MulConst) return false;
510 // If the constant is used in more than one place then modifying it will mean
511 // we need to materialize two constants instead of one, which is a bad idea.
512 if (!MulConst->hasOneUse()) return false;
513 unsigned MulConstVal = MulConst->getZExtValue();
514 if (MulConstVal == 0) return false;
516 // Find the largest power of 2 that MulConstVal is a multiple of
517 PowerOfTwo = MaxShift;
518 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
520 if (PowerOfTwo == 0) return false;
523 // Only optimise if the new cost is better
524 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
525 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
526 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
527 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
528 return NewCost < OldCost;
531 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
532 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
533 CurDAG->ReplaceAllUsesWith(N, M);
536 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
539 bool CheckProfitability) {
540 if (DisableShifterOp)
543 // If N is a multiply-by-constant and it's profitable to extract a shift and
544 // use it in a shifted operand do so.
545 if (N.getOpcode() == ISD::MUL) {
546 unsigned PowerOfTwo = 0;
548 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
549 HandleSDNode Handle(N);
550 replaceDAGValue(N.getOperand(1), NewMulConst);
551 BaseReg = Handle.getValue();
552 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
559 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
561 // Don't match base register only case. That is matched to a separate
562 // lower complexity pattern with explicit register operand.
563 if (ShOpcVal == ARM_AM::no_shift) return false;
565 BaseReg = N.getOperand(0);
566 unsigned ShImmVal = 0;
567 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
568 if (!RHS) return false;
569 ShImmVal = RHS->getZExtValue() & 31;
570 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
575 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
579 bool CheckProfitability) {
580 if (DisableShifterOp)
583 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
585 // Don't match base register only case. That is matched to a separate
586 // lower complexity pattern with explicit register operand.
587 if (ShOpcVal == ARM_AM::no_shift) return false;
589 BaseReg = N.getOperand(0);
590 unsigned ShImmVal = 0;
591 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
592 if (RHS) return false;
594 ShReg = N.getOperand(1);
595 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
597 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
603 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
606 // Match simple R + imm12 operands.
609 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
610 !CurDAG->isBaseWithConstantOffset(N)) {
611 if (N.getOpcode() == ISD::FrameIndex) {
612 // Match frame index.
613 int FI = cast<FrameIndexSDNode>(N)->getIndex();
614 Base = CurDAG->getTargetFrameIndex(
615 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
616 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
620 if (N.getOpcode() == ARMISD::Wrapper &&
621 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
622 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
623 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
624 Base = N.getOperand(0);
627 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
631 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
632 int RHSC = (int)RHS->getSExtValue();
633 if (N.getOpcode() == ISD::SUB)
636 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
637 Base = N.getOperand(0);
638 if (Base.getOpcode() == ISD::FrameIndex) {
639 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
640 Base = CurDAG->getTargetFrameIndex(
641 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
643 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
650 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
656 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
658 if (N.getOpcode() == ISD::MUL &&
659 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
660 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
661 // X * [3,5,9] -> X + X * [2,4,8] etc.
662 int RHSC = (int)RHS->getZExtValue();
665 ARM_AM::AddrOpc AddSub = ARM_AM::add;
667 AddSub = ARM_AM::sub;
670 if (isPowerOf2_32(RHSC)) {
671 unsigned ShAmt = Log2_32(RHSC);
672 Base = Offset = N.getOperand(0);
673 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
682 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
683 // ISD::OR that is equivalent to an ISD::ADD.
684 !CurDAG->isBaseWithConstantOffset(N))
687 // Leave simple R +/- imm12 operands for LDRi12
688 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
690 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
691 -0x1000+1, 0x1000, RHSC)) // 12 bits.
695 // Otherwise this is R +/- [possibly shifted] R.
696 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
697 ARM_AM::ShiftOpc ShOpcVal =
698 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
701 Base = N.getOperand(0);
702 Offset = N.getOperand(1);
704 if (ShOpcVal != ARM_AM::no_shift) {
705 // Check to see if the RHS of the shift is a constant, if not, we can't fold
707 if (ConstantSDNode *Sh =
708 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
709 ShAmt = Sh->getZExtValue();
710 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
711 Offset = N.getOperand(1).getOperand(0);
714 ShOpcVal = ARM_AM::no_shift;
717 ShOpcVal = ARM_AM::no_shift;
721 // Try matching (R shl C) + (R).
722 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
723 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
724 N.getOperand(0).hasOneUse())) {
725 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
726 if (ShOpcVal != ARM_AM::no_shift) {
727 // Check to see if the RHS of the shift is a constant, if not, we can't
729 if (ConstantSDNode *Sh =
730 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
731 ShAmt = Sh->getZExtValue();
732 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
733 Offset = N.getOperand(0).getOperand(0);
734 Base = N.getOperand(1);
737 ShOpcVal = ARM_AM::no_shift;
740 ShOpcVal = ARM_AM::no_shift;
745 // If Offset is a multiply-by-constant and it's profitable to extract a shift
746 // and use it in a shifted operand do so.
747 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
748 unsigned PowerOfTwo = 0;
750 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
751 replaceDAGValue(Offset.getOperand(1), NewMulConst);
753 ShOpcVal = ARM_AM::lsl;
757 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
765 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
769 if (N.getOpcode() == ISD::MUL &&
770 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
771 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
772 // X * [3,5,9] -> X + X * [2,4,8] etc.
773 int RHSC = (int)RHS->getZExtValue();
776 ARM_AM::AddrOpc AddSub = ARM_AM::add;
778 AddSub = ARM_AM::sub;
781 if (isPowerOf2_32(RHSC)) {
782 unsigned ShAmt = Log2_32(RHSC);
783 Base = Offset = N.getOperand(0);
784 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
793 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
794 // ISD::OR that is equivalent to an ADD.
795 !CurDAG->isBaseWithConstantOffset(N)) {
797 if (N.getOpcode() == ISD::FrameIndex) {
798 int FI = cast<FrameIndexSDNode>(N)->getIndex();
799 Base = CurDAG->getTargetFrameIndex(
800 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
801 } else if (N.getOpcode() == ARMISD::Wrapper &&
802 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
803 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
804 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
805 Base = N.getOperand(0);
807 Offset = CurDAG->getRegister(0, MVT::i32);
808 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
814 // Match simple R +/- imm12 operands.
815 if (N.getOpcode() != ISD::SUB) {
817 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
818 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
819 Base = N.getOperand(0);
820 if (Base.getOpcode() == ISD::FrameIndex) {
821 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
822 Base = CurDAG->getTargetFrameIndex(
823 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
825 Offset = CurDAG->getRegister(0, MVT::i32);
827 ARM_AM::AddrOpc AddSub = ARM_AM::add;
829 AddSub = ARM_AM::sub;
832 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
839 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
840 // Compute R +/- (R << N) and reuse it.
842 Offset = CurDAG->getRegister(0, MVT::i32);
843 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
849 // Otherwise this is R +/- [possibly shifted] R.
850 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
851 ARM_AM::ShiftOpc ShOpcVal =
852 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
855 Base = N.getOperand(0);
856 Offset = N.getOperand(1);
858 if (ShOpcVal != ARM_AM::no_shift) {
859 // Check to see if the RHS of the shift is a constant, if not, we can't fold
861 if (ConstantSDNode *Sh =
862 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
863 ShAmt = Sh->getZExtValue();
864 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
865 Offset = N.getOperand(1).getOperand(0);
868 ShOpcVal = ARM_AM::no_shift;
871 ShOpcVal = ARM_AM::no_shift;
875 // Try matching (R shl C) + (R).
876 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
877 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
878 N.getOperand(0).hasOneUse())) {
879 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
880 if (ShOpcVal != ARM_AM::no_shift) {
881 // Check to see if the RHS of the shift is a constant, if not, we can't
883 if (ConstantSDNode *Sh =
884 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
885 ShAmt = Sh->getZExtValue();
886 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
887 Offset = N.getOperand(0).getOperand(0);
888 Base = N.getOperand(1);
891 ShOpcVal = ARM_AM::no_shift;
894 ShOpcVal = ARM_AM::no_shift;
899 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
904 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
905 SDValue &Offset, SDValue &Opc) {
906 unsigned Opcode = Op->getOpcode();
907 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
908 ? cast<LoadSDNode>(Op)->getAddressingMode()
909 : cast<StoreSDNode>(Op)->getAddressingMode();
910 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
911 ? ARM_AM::add : ARM_AM::sub;
913 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
917 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
919 if (ShOpcVal != ARM_AM::no_shift) {
920 // Check to see if the RHS of the shift is a constant, if not, we can't fold
922 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
923 ShAmt = Sh->getZExtValue();
924 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
925 Offset = N.getOperand(0);
928 ShOpcVal = ARM_AM::no_shift;
931 ShOpcVal = ARM_AM::no_shift;
935 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
940 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
941 SDValue &Offset, SDValue &Opc) {
942 unsigned Opcode = Op->getOpcode();
943 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
944 ? cast<LoadSDNode>(Op)->getAddressingMode()
945 : cast<StoreSDNode>(Op)->getAddressingMode();
946 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
947 ? ARM_AM::add : ARM_AM::sub;
949 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
950 if (AddSub == ARM_AM::sub) Val *= -1;
951 Offset = CurDAG->getRegister(0, MVT::i32);
952 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
960 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
961 SDValue &Offset, SDValue &Opc) {
962 unsigned Opcode = Op->getOpcode();
963 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
964 ? cast<LoadSDNode>(Op)->getAddressingMode()
965 : cast<StoreSDNode>(Op)->getAddressingMode();
966 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
967 ? ARM_AM::add : ARM_AM::sub;
969 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
970 Offset = CurDAG->getRegister(0, MVT::i32);
971 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
973 SDLoc(Op), MVT::i32);
980 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
985 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
986 SDValue &Base, SDValue &Offset,
988 if (N.getOpcode() == ISD::SUB) {
989 // X - C is canonicalize to X + -C, no need to handle it here.
990 Base = N.getOperand(0);
991 Offset = N.getOperand(1);
992 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
997 if (!CurDAG->isBaseWithConstantOffset(N)) {
999 if (N.getOpcode() == ISD::FrameIndex) {
1000 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1001 Base = CurDAG->getTargetFrameIndex(
1002 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1004 Offset = CurDAG->getRegister(0, MVT::i32);
1005 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1010 // If the RHS is +/- imm8, fold into addr mode.
1012 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1013 -256 + 1, 256, RHSC)) { // 8 bits.
1014 Base = N.getOperand(0);
1015 if (Base.getOpcode() == ISD::FrameIndex) {
1016 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1017 Base = CurDAG->getTargetFrameIndex(
1018 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1020 Offset = CurDAG->getRegister(0, MVT::i32);
1022 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1024 AddSub = ARM_AM::sub;
1027 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1032 Base = N.getOperand(0);
1033 Offset = N.getOperand(1);
1034 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1039 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1040 SDValue &Offset, SDValue &Opc) {
1041 unsigned Opcode = Op->getOpcode();
1042 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1043 ? cast<LoadSDNode>(Op)->getAddressingMode()
1044 : cast<StoreSDNode>(Op)->getAddressingMode();
1045 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1046 ? ARM_AM::add : ARM_AM::sub;
1048 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1049 Offset = CurDAG->getRegister(0, MVT::i32);
1050 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1056 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1061 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1062 SDValue &Base, SDValue &Offset) {
1063 if (!CurDAG->isBaseWithConstantOffset(N)) {
1065 if (N.getOpcode() == ISD::FrameIndex) {
1066 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1067 Base = CurDAG->getTargetFrameIndex(
1068 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1069 } else if (N.getOpcode() == ARMISD::Wrapper &&
1070 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1071 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1072 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1073 Base = N.getOperand(0);
1075 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1076 SDLoc(N), MVT::i32);
1080 // If the RHS is +/- imm8, fold into addr mode.
1082 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1083 -256 + 1, 256, RHSC)) {
1084 Base = N.getOperand(0);
1085 if (Base.getOpcode() == ISD::FrameIndex) {
1086 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1087 Base = CurDAG->getTargetFrameIndex(
1088 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1091 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1093 AddSub = ARM_AM::sub;
1096 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1097 SDLoc(N), MVT::i32);
1102 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1103 SDLoc(N), MVT::i32);
1107 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1111 unsigned Alignment = 0;
1113 MemSDNode *MemN = cast<MemSDNode>(Parent);
1115 if (isa<LSBaseSDNode>(MemN) ||
1116 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1117 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1118 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1119 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1120 // The maximum alignment is equal to the memory size being referenced.
1121 unsigned MMOAlign = MemN->getAlignment();
1122 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1123 if (MMOAlign >= MemSize && MemSize > 1)
1124 Alignment = MemSize;
1126 // All other uses of addrmode6 are for intrinsics. For now just record
1127 // the raw alignment value; it will be refined later based on the legal
1128 // alignment operands for the intrinsic.
1129 Alignment = MemN->getAlignment();
1132 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1136 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1138 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1139 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1140 if (AM != ISD::POST_INC)
1143 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1144 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1145 Offset = CurDAG->getRegister(0, MVT::i32);
1150 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1151 SDValue &Offset, SDValue &Label) {
1152 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1153 Offset = N.getOperand(0);
1154 SDValue N1 = N.getOperand(1);
1155 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1156 SDLoc(N), MVT::i32);
1164 //===----------------------------------------------------------------------===//
1165 // Thumb Addressing Modes
1166 //===----------------------------------------------------------------------===//
1168 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1169 SDValue &Base, SDValue &Offset){
1170 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1171 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1172 if (!NC || !NC->isNullValue())
1179 Base = N.getOperand(0);
1180 Offset = N.getOperand(1);
1185 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1186 SDValue &Base, SDValue &OffImm) {
1187 if (!CurDAG->isBaseWithConstantOffset(N)) {
1188 if (N.getOpcode() == ISD::ADD) {
1189 return false; // We want to select register offset instead
1190 } else if (N.getOpcode() == ARMISD::Wrapper &&
1191 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1192 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1193 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1194 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1195 Base = N.getOperand(0);
1200 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1204 // If the RHS is + imm5 * scale, fold into addr mode.
1206 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1207 Base = N.getOperand(0);
1208 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1212 // Offset is too large, so use register offset instead.
1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1219 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1225 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1229 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1231 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1234 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1235 SDValue &Base, SDValue &OffImm) {
1236 if (N.getOpcode() == ISD::FrameIndex) {
1237 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1238 // Only multiples of 4 are allowed for the offset, so the frame object
1239 // alignment must be at least 4.
1240 MachineFrameInfo &MFI = MF->getFrameInfo();
1241 if (MFI.getObjectAlignment(FI) < 4)
1242 MFI.setObjectAlignment(FI, 4);
1243 Base = CurDAG->getTargetFrameIndex(
1244 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1245 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1249 if (!CurDAG->isBaseWithConstantOffset(N))
1252 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1253 if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1254 (LHSR && LHSR->getReg() == ARM::SP)) {
1255 // If the RHS is + imm8 * scale, fold into addr mode.
1257 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1258 Base = N.getOperand(0);
1259 if (Base.getOpcode() == ISD::FrameIndex) {
1260 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1261 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1262 // indexed by the LHS must be 4-byte aligned.
1263 MachineFrameInfo &MFI = MF->getFrameInfo();
1264 if (MFI.getObjectAlignment(FI) < 4)
1265 MFI.setObjectAlignment(FI, 4);
1266 Base = CurDAG->getTargetFrameIndex(
1267 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1269 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1278 //===----------------------------------------------------------------------===//
1279 // Thumb 2 Addressing Modes
1280 //===----------------------------------------------------------------------===//
1283 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1284 SDValue &Base, SDValue &OffImm) {
1285 // Match simple R + imm12 operands.
1288 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1289 !CurDAG->isBaseWithConstantOffset(N)) {
1290 if (N.getOpcode() == ISD::FrameIndex) {
1291 // Match frame index.
1292 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1293 Base = CurDAG->getTargetFrameIndex(
1294 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1295 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1299 if (N.getOpcode() == ARMISD::Wrapper &&
1300 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1301 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1302 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1303 Base = N.getOperand(0);
1304 if (Base.getOpcode() == ISD::TargetConstantPool)
1305 return false; // We want to select t2LDRpci instead.
1308 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1312 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1313 if (SelectT2AddrModeImm8(N, Base, OffImm))
1314 // Let t2LDRi8 handle (R - imm8).
1317 int RHSC = (int)RHS->getZExtValue();
1318 if (N.getOpcode() == ISD::SUB)
1321 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1322 Base = N.getOperand(0);
1323 if (Base.getOpcode() == ISD::FrameIndex) {
1324 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1325 Base = CurDAG->getTargetFrameIndex(
1326 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1328 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1335 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1339 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1340 SDValue &Base, SDValue &OffImm) {
1341 // Match simple R - imm8 operands.
1342 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1343 !CurDAG->isBaseWithConstantOffset(N))
1346 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1347 int RHSC = (int)RHS->getSExtValue();
1348 if (N.getOpcode() == ISD::SUB)
1351 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1352 Base = N.getOperand(0);
1353 if (Base.getOpcode() == ISD::FrameIndex) {
1354 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1355 Base = CurDAG->getTargetFrameIndex(
1356 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1358 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1366 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1368 unsigned Opcode = Op->getOpcode();
1369 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1370 ? cast<LoadSDNode>(Op)->getAddressingMode()
1371 : cast<StoreSDNode>(Op)->getAddressingMode();
1373 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1374 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1375 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1376 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1383 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1385 SDValue &OffReg, SDValue &ShImm) {
1386 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1387 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1390 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1391 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1392 int RHSC = (int)RHS->getZExtValue();
1393 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1395 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1399 // Look for (R + R) or (R + (R << [1,2,3])).
1401 Base = N.getOperand(0);
1402 OffReg = N.getOperand(1);
1404 // Swap if it is ((R << c) + R).
1405 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1406 if (ShOpcVal != ARM_AM::lsl) {
1407 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1408 if (ShOpcVal == ARM_AM::lsl)
1409 std::swap(Base, OffReg);
1412 if (ShOpcVal == ARM_AM::lsl) {
1413 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1415 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1416 ShAmt = Sh->getZExtValue();
1417 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1418 OffReg = OffReg.getOperand(0);
1425 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1426 // and use it in a shifted operand do so.
1427 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1428 unsigned PowerOfTwo = 0;
1429 SDValue NewMulConst;
1430 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1431 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1436 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1441 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1443 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1446 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1448 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1451 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1455 uint32_t RHSC = (int)RHS->getZExtValue();
1456 if (RHSC > 1020 || RHSC % 4 != 0)
1459 Base = N.getOperand(0);
1460 if (Base.getOpcode() == ISD::FrameIndex) {
1461 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1462 Base = CurDAG->getTargetFrameIndex(
1463 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1466 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1470 //===--------------------------------------------------------------------===//
1472 /// getAL - Returns a ARMCC::AL immediate node.
1473 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1474 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1477 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1478 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1479 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1480 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1483 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1484 LoadSDNode *LD = cast<LoadSDNode>(N);
1485 ISD::MemIndexedMode AM = LD->getAddressingMode();
1486 if (AM == ISD::UNINDEXED)
1489 EVT LoadedVT = LD->getMemoryVT();
1490 SDValue Offset, AMOpc;
1491 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1492 unsigned Opcode = 0;
1494 if (LoadedVT == MVT::i32 && isPre &&
1495 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1496 Opcode = ARM::LDR_PRE_IMM;
1498 } else if (LoadedVT == MVT::i32 && !isPre &&
1499 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1500 Opcode = ARM::LDR_POST_IMM;
1502 } else if (LoadedVT == MVT::i32 &&
1503 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1504 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1507 } else if (LoadedVT == MVT::i16 &&
1508 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1510 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1511 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1512 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1513 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1514 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1515 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1517 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1521 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1523 Opcode = ARM::LDRB_PRE_IMM;
1524 } else if (!isPre &&
1525 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1527 Opcode = ARM::LDRB_POST_IMM;
1528 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1530 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1536 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1537 SDValue Chain = LD->getChain();
1538 SDValue Base = LD->getBasePtr();
1539 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1540 CurDAG->getRegister(0, MVT::i32), Chain };
1541 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1543 transferMemOperands(N, New);
1544 ReplaceNode(N, New);
1547 SDValue Chain = LD->getChain();
1548 SDValue Base = LD->getBasePtr();
1549 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1550 CurDAG->getRegister(0, MVT::i32), Chain };
1551 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1553 transferMemOperands(N, New);
1554 ReplaceNode(N, New);
1562 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1563 LoadSDNode *LD = cast<LoadSDNode>(N);
1564 EVT LoadedVT = LD->getMemoryVT();
1565 ISD::MemIndexedMode AM = LD->getAddressingMode();
1566 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1567 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1570 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1571 if (!COffs || COffs->getZExtValue() != 4)
1574 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1575 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1576 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1578 SDValue Chain = LD->getChain();
1579 SDValue Base = LD->getBasePtr();
1580 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1581 CurDAG->getRegister(0, MVT::i32), Chain };
1582 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1583 MVT::i32, MVT::Other, Ops);
1584 transferMemOperands(N, New);
1585 ReplaceNode(N, New);
1589 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1590 LoadSDNode *LD = cast<LoadSDNode>(N);
1591 ISD::MemIndexedMode AM = LD->getAddressingMode();
1592 if (AM == ISD::UNINDEXED)
1595 EVT LoadedVT = LD->getMemoryVT();
1596 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1598 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1599 unsigned Opcode = 0;
1601 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1602 switch (LoadedVT.getSimpleVT().SimpleTy) {
1604 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1608 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1610 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1615 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1617 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1626 SDValue Chain = LD->getChain();
1627 SDValue Base = LD->getBasePtr();
1628 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1629 CurDAG->getRegister(0, MVT::i32), Chain };
1630 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1632 transferMemOperands(N, New);
1633 ReplaceNode(N, New);
1640 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1641 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1642 SDLoc dl(V0.getNode());
1644 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1645 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1646 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1647 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1648 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1651 /// \brief Form a D register from a pair of S registers.
1652 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1653 SDLoc dl(V0.getNode());
1655 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1656 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1657 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1658 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1659 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1662 /// \brief Form a quad register from a pair of D registers.
1663 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1664 SDLoc dl(V0.getNode());
1665 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1667 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1668 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1669 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1670 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1673 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1674 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1675 SDLoc dl(V0.getNode());
1676 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1678 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1679 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1680 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1681 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1684 /// \brief Form 4 consecutive S registers.
1685 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1686 SDValue V2, SDValue V3) {
1687 SDLoc dl(V0.getNode());
1689 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1690 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1691 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1692 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1693 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1694 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1695 V2, SubReg2, V3, SubReg3 };
1696 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1699 /// \brief Form 4 consecutive D registers.
1700 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1701 SDValue V2, SDValue V3) {
1702 SDLoc dl(V0.getNode());
1703 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1705 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1706 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1707 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1708 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1709 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1710 V2, SubReg2, V3, SubReg3 };
1711 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1714 /// \brief Form 4 consecutive Q registers.
1715 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1716 SDValue V2, SDValue V3) {
1717 SDLoc dl(V0.getNode());
1718 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1720 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1721 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1722 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1723 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1724 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1725 V2, SubReg2, V3, SubReg3 };
1726 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1729 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1730 /// of a NEON VLD or VST instruction. The supported values depend on the
1731 /// number of registers being loaded.
1732 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1733 unsigned NumVecs, bool is64BitVector) {
1734 unsigned NumRegs = NumVecs;
1735 if (!is64BitVector && NumVecs < 3)
1738 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1739 if (Alignment >= 32 && NumRegs == 4)
1741 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1743 else if (Alignment >= 8)
1748 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1751 static bool isVLDfixed(unsigned Opc)
1754 default: return false;
1755 case ARM::VLD1d8wb_fixed : return true;
1756 case ARM::VLD1d16wb_fixed : return true;
1757 case ARM::VLD1d64Qwb_fixed : return true;
1758 case ARM::VLD1d32wb_fixed : return true;
1759 case ARM::VLD1d64wb_fixed : return true;
1760 case ARM::VLD1d64TPseudoWB_fixed : return true;
1761 case ARM::VLD1d64QPseudoWB_fixed : return true;
1762 case ARM::VLD1q8wb_fixed : return true;
1763 case ARM::VLD1q16wb_fixed : return true;
1764 case ARM::VLD1q32wb_fixed : return true;
1765 case ARM::VLD1q64wb_fixed : return true;
1766 case ARM::VLD1DUPd8wb_fixed : return true;
1767 case ARM::VLD1DUPd16wb_fixed : return true;
1768 case ARM::VLD1DUPd32wb_fixed : return true;
1769 case ARM::VLD1DUPq8wb_fixed : return true;
1770 case ARM::VLD1DUPq16wb_fixed : return true;
1771 case ARM::VLD1DUPq32wb_fixed : return true;
1772 case ARM::VLD2d8wb_fixed : return true;
1773 case ARM::VLD2d16wb_fixed : return true;
1774 case ARM::VLD2d32wb_fixed : return true;
1775 case ARM::VLD2q8PseudoWB_fixed : return true;
1776 case ARM::VLD2q16PseudoWB_fixed : return true;
1777 case ARM::VLD2q32PseudoWB_fixed : return true;
1778 case ARM::VLD2DUPd8wb_fixed : return true;
1779 case ARM::VLD2DUPd16wb_fixed : return true;
1780 case ARM::VLD2DUPd32wb_fixed : return true;
1784 static bool isVSTfixed(unsigned Opc)
1787 default: return false;
1788 case ARM::VST1d8wb_fixed : return true;
1789 case ARM::VST1d16wb_fixed : return true;
1790 case ARM::VST1d32wb_fixed : return true;
1791 case ARM::VST1d64wb_fixed : return true;
1792 case ARM::VST1q8wb_fixed : return true;
1793 case ARM::VST1q16wb_fixed : return true;
1794 case ARM::VST1q32wb_fixed : return true;
1795 case ARM::VST1q64wb_fixed : return true;
1796 case ARM::VST1d64TPseudoWB_fixed : return true;
1797 case ARM::VST1d64QPseudoWB_fixed : return true;
1798 case ARM::VST2d8wb_fixed : return true;
1799 case ARM::VST2d16wb_fixed : return true;
1800 case ARM::VST2d32wb_fixed : return true;
1801 case ARM::VST2q8PseudoWB_fixed : return true;
1802 case ARM::VST2q16PseudoWB_fixed : return true;
1803 case ARM::VST2q32PseudoWB_fixed : return true;
1807 // Get the register stride update opcode of a VLD/VST instruction that
1808 // is otherwise equivalent to the given fixed stride updating instruction.
1809 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1810 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1811 && "Incorrect fixed stride updating instruction.");
1814 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1815 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1816 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1817 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1818 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1819 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1820 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1821 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1822 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1823 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1824 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1825 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1826 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1827 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1828 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1829 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1830 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1831 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1833 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1834 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1835 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1836 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1837 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1838 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1839 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1840 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1841 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1842 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1844 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1845 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1846 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1847 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1848 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1849 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1851 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1852 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1853 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1854 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1855 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1856 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1858 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1859 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1860 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1862 return Opc; // If not one we handle, return it unchanged.
1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1866 const uint16_t *DOpcodes,
1867 const uint16_t *QOpcodes0,
1868 const uint16_t *QOpcodes1) {
1869 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1872 SDValue MemAddr, Align;
1873 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1874 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1877 SDValue Chain = N->getOperand(0);
1878 EVT VT = N->getValueType(0);
1879 bool is64BitVector = VT.is64BitVector();
1880 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1882 unsigned OpcodeIndex;
1883 switch (VT.getSimpleVT().SimpleTy) {
1884 default: llvm_unreachable("unhandled vld type");
1885 // Double-register operations:
1886 case MVT::v8i8: OpcodeIndex = 0; break;
1887 case MVT::v4i16: OpcodeIndex = 1; break;
1889 case MVT::v2i32: OpcodeIndex = 2; break;
1890 case MVT::v1i64: OpcodeIndex = 3; break;
1891 // Quad-register operations:
1892 case MVT::v16i8: OpcodeIndex = 0; break;
1893 case MVT::v8i16: OpcodeIndex = 1; break;
1895 case MVT::v4i32: OpcodeIndex = 2; break;
1897 case MVT::v2i64: OpcodeIndex = 3;
1898 assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1906 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1909 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1911 std::vector<EVT> ResTys;
1912 ResTys.push_back(ResTy);
1914 ResTys.push_back(MVT::i32);
1915 ResTys.push_back(MVT::Other);
1917 SDValue Pred = getAL(CurDAG, dl);
1918 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1920 SmallVector<SDValue, 7> Ops;
1922 // Double registers and VLD1/VLD2 quad registers are directly supported.
1923 if (is64BitVector || NumVecs <= 2) {
1924 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1925 QOpcodes0[OpcodeIndex]);
1926 Ops.push_back(MemAddr);
1927 Ops.push_back(Align);
1929 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1930 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1931 // case entirely when the rest are updated to that form, too.
1932 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1933 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1934 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1935 // check for that explicitly too. Horribly hacky, but temporary.
1936 if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1937 !isa<ConstantSDNode>(Inc.getNode()))
1938 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1940 Ops.push_back(Pred);
1941 Ops.push_back(Reg0);
1942 Ops.push_back(Chain);
1943 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1946 // Otherwise, quad registers are loaded with two separate instructions,
1947 // where one loads the even registers and the other loads the odd registers.
1948 EVT AddrTy = MemAddr.getValueType();
1950 // Load the even subregs. This is always an updating load, so that it
1951 // provides the address to the second load for the odd subregs.
1953 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1954 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1955 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1956 ResTy, AddrTy, MVT::Other, OpsA);
1957 Chain = SDValue(VLdA, 2);
1959 // Load the odd subregs.
1960 Ops.push_back(SDValue(VLdA, 1));
1961 Ops.push_back(Align);
1963 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1964 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1965 "only constant post-increment update allowed for VLD3/4");
1967 Ops.push_back(Reg0);
1969 Ops.push_back(SDValue(VLdA, 0));
1970 Ops.push_back(Pred);
1971 Ops.push_back(Reg0);
1972 Ops.push_back(Chain);
1973 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1976 // Transfer memoperands.
1977 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1978 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1979 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1982 ReplaceNode(N, VLd);
1986 // Extract out the subregisters.
1987 SDValue SuperReg = SDValue(VLd, 0);
1988 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1989 ARM::qsub_3 == ARM::qsub_0 + 3,
1990 "Unexpected subreg numbering");
1991 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1992 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1993 ReplaceUses(SDValue(N, Vec),
1994 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1995 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1997 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1998 CurDAG->RemoveDeadNode(N);
2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2002 const uint16_t *DOpcodes,
2003 const uint16_t *QOpcodes0,
2004 const uint16_t *QOpcodes1) {
2005 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2008 SDValue MemAddr, Align;
2009 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2010 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2011 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2014 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2015 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2017 SDValue Chain = N->getOperand(0);
2018 EVT VT = N->getOperand(Vec0Idx).getValueType();
2019 bool is64BitVector = VT.is64BitVector();
2020 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2022 unsigned OpcodeIndex;
2023 switch (VT.getSimpleVT().SimpleTy) {
2024 default: llvm_unreachable("unhandled vst type");
2025 // Double-register operations:
2026 case MVT::v8i8: OpcodeIndex = 0; break;
2027 case MVT::v4i16: OpcodeIndex = 1; break;
2029 case MVT::v2i32: OpcodeIndex = 2; break;
2030 case MVT::v1i64: OpcodeIndex = 3; break;
2031 // Quad-register operations:
2032 case MVT::v16i8: OpcodeIndex = 0; break;
2033 case MVT::v8i16: OpcodeIndex = 1; break;
2035 case MVT::v4i32: OpcodeIndex = 2; break;
2037 case MVT::v2i64: OpcodeIndex = 3;
2038 assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2042 std::vector<EVT> ResTys;
2044 ResTys.push_back(MVT::i32);
2045 ResTys.push_back(MVT::Other);
2047 SDValue Pred = getAL(CurDAG, dl);
2048 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2049 SmallVector<SDValue, 7> Ops;
2051 // Double registers and VST1/VST2 quad registers are directly supported.
2052 if (is64BitVector || NumVecs <= 2) {
2055 SrcReg = N->getOperand(Vec0Idx);
2056 } else if (is64BitVector) {
2057 // Form a REG_SEQUENCE to force register allocation.
2058 SDValue V0 = N->getOperand(Vec0Idx + 0);
2059 SDValue V1 = N->getOperand(Vec0Idx + 1);
2061 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2063 SDValue V2 = N->getOperand(Vec0Idx + 2);
2064 // If it's a vst3, form a quad D-register and leave the last part as
2066 SDValue V3 = (NumVecs == 3)
2067 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2068 : N->getOperand(Vec0Idx + 3);
2069 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2072 // Form a QQ register.
2073 SDValue Q0 = N->getOperand(Vec0Idx);
2074 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2075 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2078 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2079 QOpcodes0[OpcodeIndex]);
2080 Ops.push_back(MemAddr);
2081 Ops.push_back(Align);
2083 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2084 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2085 // case entirely when the rest are updated to that form, too.
2086 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2087 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2088 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2089 // check for that explicitly too. Horribly hacky, but temporary.
2090 if (!isa<ConstantSDNode>(Inc.getNode()))
2092 else if (NumVecs > 2 && !isVSTfixed(Opc))
2093 Ops.push_back(Reg0);
2095 Ops.push_back(SrcReg);
2096 Ops.push_back(Pred);
2097 Ops.push_back(Reg0);
2098 Ops.push_back(Chain);
2099 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2101 // Transfer memoperands.
2102 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2104 ReplaceNode(N, VSt);
2108 // Otherwise, quad registers are stored with two separate instructions,
2109 // where one stores the even registers and the other stores the odd registers.
2111 // Form the QQQQ REG_SEQUENCE.
2112 SDValue V0 = N->getOperand(Vec0Idx + 0);
2113 SDValue V1 = N->getOperand(Vec0Idx + 1);
2114 SDValue V2 = N->getOperand(Vec0Idx + 2);
2115 SDValue V3 = (NumVecs == 3)
2116 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2117 : N->getOperand(Vec0Idx + 3);
2118 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2120 // Store the even D registers. This is always an updating store, so that it
2121 // provides the address to the second store for the odd subregs.
2122 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2123 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2124 MemAddr.getValueType(),
2126 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2127 Chain = SDValue(VStA, 1);
2129 // Store the odd D registers.
2130 Ops.push_back(SDValue(VStA, 0));
2131 Ops.push_back(Align);
2133 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2134 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2135 "only constant post-increment update allowed for VST3/4");
2137 Ops.push_back(Reg0);
2139 Ops.push_back(RegSeq);
2140 Ops.push_back(Pred);
2141 Ops.push_back(Reg0);
2142 Ops.push_back(Chain);
2143 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2145 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2146 ReplaceNode(N, VStB);
2149 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2151 const uint16_t *DOpcodes,
2152 const uint16_t *QOpcodes) {
2153 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2156 SDValue MemAddr, Align;
2157 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2158 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2159 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2162 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2163 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2165 SDValue Chain = N->getOperand(0);
2167 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2168 EVT VT = N->getOperand(Vec0Idx).getValueType();
2169 bool is64BitVector = VT.is64BitVector();
2171 unsigned Alignment = 0;
2173 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2174 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2175 if (Alignment > NumBytes)
2176 Alignment = NumBytes;
2177 if (Alignment < 8 && Alignment < NumBytes)
2179 // Alignment must be a power of two; make sure of that.
2180 Alignment = (Alignment & -Alignment);
2184 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2186 unsigned OpcodeIndex;
2187 switch (VT.getSimpleVT().SimpleTy) {
2188 default: llvm_unreachable("unhandled vld/vst lane type");
2189 // Double-register operations:
2190 case MVT::v8i8: OpcodeIndex = 0; break;
2191 case MVT::v4i16: OpcodeIndex = 1; break;
2193 case MVT::v2i32: OpcodeIndex = 2; break;
2194 // Quad-register operations:
2195 case MVT::v8i16: OpcodeIndex = 0; break;
2197 case MVT::v4i32: OpcodeIndex = 1; break;
2200 std::vector<EVT> ResTys;
2202 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2205 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2206 MVT::i64, ResTyElts));
2209 ResTys.push_back(MVT::i32);
2210 ResTys.push_back(MVT::Other);
2212 SDValue Pred = getAL(CurDAG, dl);
2213 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2215 SmallVector<SDValue, 8> Ops;
2216 Ops.push_back(MemAddr);
2217 Ops.push_back(Align);
2219 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2220 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2224 SDValue V0 = N->getOperand(Vec0Idx + 0);
2225 SDValue V1 = N->getOperand(Vec0Idx + 1);
2228 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2230 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2232 SDValue V2 = N->getOperand(Vec0Idx + 2);
2233 SDValue V3 = (NumVecs == 3)
2234 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2235 : N->getOperand(Vec0Idx + 3);
2237 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2239 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2241 Ops.push_back(SuperReg);
2242 Ops.push_back(getI32Imm(Lane, dl));
2243 Ops.push_back(Pred);
2244 Ops.push_back(Reg0);
2245 Ops.push_back(Chain);
2247 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2248 QOpcodes[OpcodeIndex]);
2249 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2250 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2252 ReplaceNode(N, VLdLn);
2256 // Extract the subregisters.
2257 SuperReg = SDValue(VLdLn, 0);
2258 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2259 ARM::qsub_3 == ARM::qsub_0 + 3,
2260 "Unexpected subreg numbering");
2261 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2262 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2263 ReplaceUses(SDValue(N, Vec),
2264 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2265 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2267 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2268 CurDAG->RemoveDeadNode(N);
2271 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2272 const uint16_t *DOpcodes,
2273 const uint16_t *QOpcodes) {
2274 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2277 SDValue MemAddr, Align;
2278 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2281 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2282 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2284 SDValue Chain = N->getOperand(0);
2285 EVT VT = N->getValueType(0);
2287 unsigned Alignment = 0;
2289 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2290 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2291 if (Alignment > NumBytes)
2292 Alignment = NumBytes;
2293 if (Alignment < 8 && Alignment < NumBytes)
2295 // Alignment must be a power of two; make sure of that.
2296 Alignment = (Alignment & -Alignment);
2300 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2303 switch (VT.getSimpleVT().SimpleTy) {
2304 default: llvm_unreachable("unhandled vld-dup type");
2305 case MVT::v8i8: Opc = DOpcodes[0]; break;
2306 case MVT::v16i8: Opc = QOpcodes[0]; break;
2307 case MVT::v4i16: Opc = DOpcodes[1]; break;
2308 case MVT::v8i16: Opc = QOpcodes[1]; break;
2310 case MVT::v2i32: Opc = DOpcodes[2]; break;
2312 case MVT::v4i32: Opc = QOpcodes[2]; break;
2315 SDValue Pred = getAL(CurDAG, dl);
2316 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2317 SmallVector<SDValue, 6> Ops;
2318 Ops.push_back(MemAddr);
2319 Ops.push_back(Align);
2321 // fixed-stride update instructions don't have an explicit writeback
2322 // operand. It's implicit in the opcode itself.
2323 SDValue Inc = N->getOperand(2);
2324 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2325 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2326 if (!isa<ConstantSDNode>(Inc.getNode()))
2328 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2329 else if (NumVecs > 2)
2330 Ops.push_back(Reg0);
2332 Ops.push_back(Pred);
2333 Ops.push_back(Reg0);
2334 Ops.push_back(Chain);
2336 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2337 std::vector<EVT> ResTys;
2338 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2340 ResTys.push_back(MVT::i32);
2341 ResTys.push_back(MVT::Other);
2342 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2343 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2345 // Extract the subregisters.
2347 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2349 SDValue SuperReg = SDValue(VLdDup, 0);
2350 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2351 unsigned SubIdx = ARM::dsub_0;
2352 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2353 ReplaceUses(SDValue(N, Vec),
2354 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2356 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2358 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2359 CurDAG->RemoveDeadNode(N);
2362 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2364 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2366 EVT VT = N->getValueType(0);
2367 unsigned FirstTblReg = IsExt ? 2 : 1;
2369 // Form a REG_SEQUENCE to force register allocation.
2371 SDValue V0 = N->getOperand(FirstTblReg + 0);
2372 SDValue V1 = N->getOperand(FirstTblReg + 1);
2374 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2376 SDValue V2 = N->getOperand(FirstTblReg + 2);
2377 // If it's a vtbl3, form a quad D-register and leave the last part as
2379 SDValue V3 = (NumVecs == 3)
2380 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2381 : N->getOperand(FirstTblReg + 3);
2382 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2385 SmallVector<SDValue, 6> Ops;
2387 Ops.push_back(N->getOperand(1));
2388 Ops.push_back(RegSeq);
2389 Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2390 Ops.push_back(getAL(CurDAG, dl)); // predicate
2391 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2392 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2395 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2396 if (!Subtarget->hasV6T2Ops())
2399 unsigned Opc = isSigned
2400 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2401 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2404 // For unsigned extracts, check for a shift right and mask
2405 unsigned And_imm = 0;
2406 if (N->getOpcode() == ISD::AND) {
2407 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2409 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2410 if (And_imm & (And_imm + 1))
2413 unsigned Srl_imm = 0;
2414 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2416 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2418 // Note: The width operand is encoded as width-1.
2419 unsigned Width = countTrailingOnes(And_imm) - 1;
2420 unsigned LSB = Srl_imm;
2422 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2424 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2425 // It's cheaper to use a right shift to extract the top bits.
2426 if (Subtarget->isThumb()) {
2427 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2428 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2429 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2430 getAL(CurDAG, dl), Reg0, Reg0 };
2431 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2435 // ARM models shift instructions as MOVsi with shifter operand.
2436 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2438 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2440 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2441 getAL(CurDAG, dl), Reg0, Reg0 };
2442 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2446 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2447 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2448 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2449 getAL(CurDAG, dl), Reg0 };
2450 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2457 // Otherwise, we're looking for a shift of a shift
2458 unsigned Shl_imm = 0;
2459 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2460 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2461 unsigned Srl_imm = 0;
2462 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2463 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2464 // Note: The width operand is encoded as width-1.
2465 unsigned Width = 32 - Srl_imm - 1;
2466 int LSB = Srl_imm - Shl_imm;
2469 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2470 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2471 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2472 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2473 getAL(CurDAG, dl), Reg0 };
2474 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2479 // Or we are looking for a shift of an and, with a mask operand
2480 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2481 isShiftedMask_32(And_imm)) {
2482 unsigned Srl_imm = 0;
2483 unsigned LSB = countTrailingZeros(And_imm);
2484 // Shift must be the same as the ands lsb
2485 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2486 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2487 unsigned MSB = 31 - countLeadingZeros(And_imm);
2488 // Note: The width operand is encoded as width-1.
2489 unsigned Width = MSB - LSB;
2490 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2491 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2492 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2493 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2494 getAL(CurDAG, dl), Reg0 };
2495 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2500 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2501 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2503 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2504 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2507 if (LSB + Width > 32)
2510 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2511 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2512 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2513 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2514 getAL(CurDAG, dl), Reg0 };
2515 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2522 /// Target-specific DAG combining for ISD::XOR.
2523 /// Target-independent combining lowers SELECT_CC nodes of the form
2524 /// select_cc setg[ge] X, 0, X, -X
2525 /// select_cc setgt X, -1, X, -X
2526 /// select_cc setl[te] X, 0, -X, X
2527 /// select_cc setlt X, 1, -X, X
2528 /// which represent Integer ABS into:
2529 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2530 /// ARM instruction selection detects the latter and matches it to
2531 /// ARM::ABS or ARM::t2ABS machine node.
2532 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2533 SDValue XORSrc0 = N->getOperand(0);
2534 SDValue XORSrc1 = N->getOperand(1);
2535 EVT VT = N->getValueType(0);
2537 if (Subtarget->isThumb1Only())
2540 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2543 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2544 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2545 SDValue SRASrc0 = XORSrc1.getOperand(0);
2546 SDValue SRASrc1 = XORSrc1.getOperand(1);
2547 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2548 EVT XType = SRASrc0.getValueType();
2549 unsigned Size = XType.getSizeInBits() - 1;
2551 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2552 XType.isInteger() && SRAConstant != nullptr &&
2553 Size == SRAConstant->getZExtValue()) {
2554 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2555 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2562 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2564 // For SM*WB, we need to some form of sext.
2565 // For SM*WT, we need to search for (sra X, 16)
2566 // Src1 then gets set to X.
2567 if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2568 SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2569 SignExt.getOpcode() == ISD::AssertSext) &&
2570 SignExt.getValueType() == MVT::i32) {
2572 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2573 Src1 = SignExt.getOperand(0);
2577 if (SignExt.getOpcode() != ISD::SRA)
2580 ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2581 if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2584 SDValue Op0 = SignExt.getOperand(0);
2586 // The sign extend operand for SM*WB could be generated by a shl and ashr.
2587 if (Op0.getOpcode() == ISD::SHL) {
2589 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2590 if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2593 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2594 Src1 = Op0.getOperand(0);
2597 *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2598 Src1 = SignExt.getOperand(0);
2602 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2603 SDValue &Src1, bool Accumulate) {
2604 // First we look for:
2605 // (add (or (srl ?, 16), (shl ?, 16)))
2606 if (OR.getOpcode() != ISD::OR)
2609 SDValue SRL = OR.getOperand(0);
2610 SDValue SHL = OR.getOperand(1);
2612 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2613 SRL = OR.getOperand(1);
2614 SHL = OR.getOperand(0);
2615 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2619 ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2620 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2621 if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2622 SHLSrc1->getZExtValue() != 16)
2625 // The first operands to the shifts need to be the two results from the
2626 // same smul_lohi node.
2627 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2628 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2631 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2632 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2633 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2637 // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2638 // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2639 // For SMLAWB the 16-bit value will signed extended somehow.
2640 // For SMLAWT only the SRA is required.
2642 // Check both sides of SMUL_LOHI
2643 if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2644 Src0 = SMULLOHI->getOperand(1);
2645 } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2647 Src0 = SMULLOHI->getOperand(0);
2654 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2655 if (!Subtarget->hasV6Ops() ||
2656 (Subtarget->isThumb() && !Subtarget->hasThumb2()))
2660 SDValue Src0 = N->getOperand(0);
2661 SDValue Src1 = N->getOperand(1);
2665 if (N->getOpcode() == ISD::ADD) {
2666 if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2670 if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2672 } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2680 SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2681 CurDAG->getRegister(0, MVT::i32) };
2682 CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2684 } else if (N->getOpcode() == ISD::OR &&
2685 SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2689 SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2690 CurDAG->getRegister(0, MVT::i32)};
2691 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2697 /// We've got special pseudo-instructions for these
2698 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2700 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2701 if (MemTy == MVT::i8)
2702 Opcode = ARM::CMP_SWAP_8;
2703 else if (MemTy == MVT::i16)
2704 Opcode = ARM::CMP_SWAP_16;
2705 else if (MemTy == MVT::i32)
2706 Opcode = ARM::CMP_SWAP_32;
2708 llvm_unreachable("Unknown AtomicCmpSwap type");
2710 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2712 SDNode *CmpSwap = CurDAG->getMachineNode(
2714 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2716 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2717 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2718 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2720 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2721 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2722 CurDAG->RemoveDeadNode(N);
2725 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2726 // The only time a CONCAT_VECTORS operation can have legal types is when
2727 // two 64-bit vectors are concatenated to a 128-bit vector.
2728 EVT VT = N->getValueType(0);
2729 if (!VT.is128BitVector() || N->getNumOperands() != 2)
2730 llvm_unreachable("unexpected CONCAT_VECTORS");
2731 ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2734 static Optional<std::pair<unsigned, unsigned>>
2735 getContiguousRangeOfSetBits(const APInt &A) {
2736 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2737 unsigned LastOne = A.countTrailingZeros();
2738 if (A.countPopulation() != (FirstOne - LastOne + 1))
2739 return Optional<std::pair<unsigned,unsigned>>();
2740 return std::make_pair(FirstOne, LastOne);
2743 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2744 assert(N->getOpcode() == ARMISD::CMPZ);
2745 SwitchEQNEToPLMI = false;
2747 if (!Subtarget->isThumb())
2748 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2749 // LSR don't exist as standalone instructions - they need the barrel shifter.
2752 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2753 SDValue And = N->getOperand(0);
2754 if (!And->hasOneUse())
2757 SDValue Zero = N->getOperand(1);
2758 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2759 And->getOpcode() != ISD::AND)
2761 SDValue X = And.getOperand(0);
2762 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2764 if (!C || !X->hasOneUse())
2766 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2770 // There are several ways to lower this:
2774 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2775 if (Subtarget->isThumb2()) {
2776 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2777 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2778 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2779 CurDAG->getRegister(0, MVT::i32) };
2780 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2782 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2783 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2784 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2785 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2789 if (Range->second == 0) {
2790 // 1. Mask includes the LSB -> Simply shift the top N bits off
2791 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2792 ReplaceNode(And.getNode(), NewN);
2793 } else if (Range->first == 31) {
2794 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2795 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2796 ReplaceNode(And.getNode(), NewN);
2797 } else if (Range->first == Range->second) {
2798 // 3. Only one bit is set. We can shift this into the sign bit and use a
2799 // PL/MI comparison.
2800 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2801 ReplaceNode(And.getNode(), NewN);
2803 SwitchEQNEToPLMI = true;
2804 } else if (!Subtarget->hasV6T2Ops()) {
2805 // 4. Do a double shift to clear bottom and top bits, but only in
2806 // thumb-1 mode as in thumb-2 we can use UBFX.
2807 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2808 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2809 Range->second + (31 - Range->first));
2810 ReplaceNode(And.getNode(), NewN);
2815 void ARMDAGToDAGISel::Select(SDNode *N) {
2818 if (N->isMachineOpcode()) {
2820 return; // Already selected.
2823 switch (N->getOpcode()) {
2827 if (trySMLAWSMULW(N))
2830 case ISD::WRITE_REGISTER:
2831 if (tryWriteRegister(N))
2834 case ISD::READ_REGISTER:
2835 if (tryReadRegister(N))
2838 case ISD::INLINEASM:
2839 if (tryInlineAsm(N))
2843 // Select special operations if XOR node forms integer ABS pattern
2846 // Other cases are autogenerated.
2848 case ISD::Constant: {
2849 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2850 // If we can't materialize the constant we need to use a literal pool
2851 if (ConstantMaterializationCost(Val) > 2) {
2852 SDValue CPIdx = CurDAG->getTargetConstantPool(
2853 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2854 TLI->getPointerTy(CurDAG->getDataLayout()));
2857 if (Subtarget->isThumb()) {
2858 SDValue Pred = getAL(CurDAG, dl);
2859 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2860 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2861 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2866 CurDAG->getTargetConstant(0, dl, MVT::i32),
2868 CurDAG->getRegister(0, MVT::i32),
2869 CurDAG->getEntryNode()
2871 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2874 ReplaceNode(N, ResNode);
2878 // Other cases are autogenerated.
2881 case ISD::FrameIndex: {
2882 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2883 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2884 SDValue TFI = CurDAG->getTargetFrameIndex(
2885 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2886 if (Subtarget->isThumb1Only()) {
2887 // Set the alignment of the frame object to 4, to avoid having to generate
2888 // more than one ADD
2889 MachineFrameInfo &MFI = MF->getFrameInfo();
2890 if (MFI.getObjectAlignment(FI) < 4)
2891 MFI.setObjectAlignment(FI, 4);
2892 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2893 CurDAG->getTargetConstant(0, dl, MVT::i32));
2896 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2897 ARM::t2ADDri : ARM::ADDri);
2898 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2899 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2900 CurDAG->getRegister(0, MVT::i32) };
2901 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2906 if (tryV6T2BitfieldExtractOp(N, false))
2909 case ISD::SIGN_EXTEND_INREG:
2911 if (tryV6T2BitfieldExtractOp(N, true))
2915 if (Subtarget->isThumb1Only())
2917 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2918 unsigned RHSV = C->getZExtValue();
2920 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2921 unsigned ShImm = Log2_32(RHSV-1);
2924 SDValue V = N->getOperand(0);
2925 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2926 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2927 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2928 if (Subtarget->isThumb()) {
2929 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2930 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2933 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2935 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2939 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2940 unsigned ShImm = Log2_32(RHSV+1);
2943 SDValue V = N->getOperand(0);
2944 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2945 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2946 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2947 if (Subtarget->isThumb()) {
2948 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2949 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2952 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2954 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2961 // Check for unsigned bitfield extract
2962 if (tryV6T2BitfieldExtractOp(N, false))
2965 // If an immediate is used in an AND node, it is possible that the immediate
2966 // can be more optimally materialized when negated. If this is the case we
2967 // can negate the immediate and use a BIC instead.
2968 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2969 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2970 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2972 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2973 // immediate can be negated and fit in the immediate operand of
2974 // a t2BIC, don't do any manual transform here as this can be
2975 // handled by the generic ISel machinery.
2976 bool PreferImmediateEncoding =
2977 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2978 if (!PreferImmediateEncoding &&
2979 ConstantMaterializationCost(Imm) >
2980 ConstantMaterializationCost(~Imm)) {
2981 // The current immediate costs more to materialize than a negated
2982 // immediate, so negate the immediate and use a BIC.
2984 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2985 // If the new constant didn't exist before, reposition it in the topological
2986 // ordering so it is just before N. Otherwise, don't touch its location.
2987 if (NewImm->getNodeId() == -1)
2988 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2990 if (!Subtarget->hasThumb2()) {
2991 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2992 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2993 CurDAG->getRegister(0, MVT::i32)};
2994 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2997 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2998 CurDAG->getRegister(0, MVT::i32),
2999 CurDAG->getRegister(0, MVT::i32)};
3001 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3007 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3008 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3009 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3010 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3011 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3012 EVT VT = N->getValueType(0);
3015 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3017 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3020 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3021 N1C = dyn_cast<ConstantSDNode>(N1);
3024 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3025 SDValue N2 = N0.getOperand(1);
3026 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3029 unsigned N1CVal = N1C->getZExtValue();
3030 unsigned N2CVal = N2C->getZExtValue();
3031 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3032 (N1CVal & 0xffffU) == 0xffffU &&
3033 (N2CVal & 0xffffU) == 0x0U) {
3034 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3036 SDValue Ops[] = { N0.getOperand(0), Imm16,
3037 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3038 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3045 case ARMISD::VMOVRRD:
3046 ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
3047 N->getOperand(0), getAL(CurDAG, dl),
3048 CurDAG->getRegister(0, MVT::i32)));
3050 case ISD::UMUL_LOHI: {
3051 if (Subtarget->isThumb1Only())
3053 if (Subtarget->isThumb()) {
3054 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3055 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3057 N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
3060 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3061 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3062 CurDAG->getRegister(0, MVT::i32) };
3063 ReplaceNode(N, CurDAG->getMachineNode(
3064 Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
3065 MVT::i32, MVT::i32, Ops));
3069 case ISD::SMUL_LOHI: {
3070 if (Subtarget->isThumb1Only())
3072 if (Subtarget->isThumb()) {
3073 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3074 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3076 N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
3079 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3080 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3081 CurDAG->getRegister(0, MVT::i32) };
3082 ReplaceNode(N, CurDAG->getMachineNode(
3083 Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
3084 MVT::i32, MVT::i32, Ops));
3088 case ARMISD::UMAAL: {
3089 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3090 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3091 N->getOperand(2), N->getOperand(3),
3093 CurDAG->getRegister(0, MVT::i32) };
3094 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3097 case ARMISD::UMLAL:{
3098 // UMAAL is similar to UMLAL but it adds two 32-bit values to the
3099 // 64-bit multiplication result.
3100 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
3101 N->getOperand(2).getOpcode() == ARMISD::ADDC &&
3102 N->getOperand(3).getOpcode() == ARMISD::ADDE) {
3104 SDValue Addc = N->getOperand(2);
3105 SDValue Adde = N->getOperand(3);
3107 if (Adde.getOperand(2).getNode() == Addc.getNode()) {
3109 ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
3110 ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
3112 if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
3114 // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
3115 // RdLo = one operand to be added, lower 32-bits of res
3116 // RdHi = other operand to be added, upper 32-bits of res
3117 // Rn = first multiply operand
3118 // Rm = second multiply operand
3119 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3120 Addc.getOperand(0), Addc.getOperand(1),
3122 CurDAG->getRegister(0, MVT::i32) };
3123 unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3124 CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
3130 if (Subtarget->isThumb()) {
3131 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3132 N->getOperand(3), getAL(CurDAG, dl),
3133 CurDAG->getRegister(0, MVT::i32)};
3135 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3138 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3139 N->getOperand(3), getAL(CurDAG, dl),
3140 CurDAG->getRegister(0, MVT::i32),
3141 CurDAG->getRegister(0, MVT::i32) };
3142 ReplaceNode(N, CurDAG->getMachineNode(
3143 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3144 MVT::i32, MVT::i32, Ops));
3148 case ARMISD::SMLAL:{
3149 if (Subtarget->isThumb()) {
3150 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3151 N->getOperand(3), getAL(CurDAG, dl),
3152 CurDAG->getRegister(0, MVT::i32)};
3154 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3157 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3158 N->getOperand(3), getAL(CurDAG, dl),
3159 CurDAG->getRegister(0, MVT::i32),
3160 CurDAG->getRegister(0, MVT::i32) };
3161 ReplaceNode(N, CurDAG->getMachineNode(
3162 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3163 MVT::i32, MVT::i32, Ops));
3167 case ARMISD::SUBE: {
3168 if (!Subtarget->hasV6Ops())
3170 // Look for a pattern to match SMMLS
3171 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3172 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3173 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3174 !SDValue(N, 1).use_empty())
3177 if (Subtarget->isThumb())
3178 assert(Subtarget->hasThumb2() &&
3179 "This pattern should not be generated for Thumb");
3181 SDValue SmulLoHi = N->getOperand(1);
3182 SDValue Subc = N->getOperand(2);
3183 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3185 if (!Zero || Zero->getZExtValue() != 0 ||
3186 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3187 N->getOperand(1) != SmulLoHi.getValue(1) ||
3188 N->getOperand(2) != Subc.getValue(1))
3191 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3192 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3193 N->getOperand(0), getAL(CurDAG, dl),
3194 CurDAG->getRegister(0, MVT::i32) };
3195 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3199 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3200 if (tryT2IndexedLoad(N))
3202 } else if (Subtarget->isThumb()) {
3203 if (tryT1IndexedLoad(N))
3205 } else if (tryARMIndexedLoad(N))
3207 // Other cases are autogenerated.
3210 case ARMISD::BRCOND: {
3211 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3212 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3213 // Pattern complexity = 6 cost = 1 size = 0
3215 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3216 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3217 // Pattern complexity = 6 cost = 1 size = 0
3219 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3220 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3221 // Pattern complexity = 6 cost = 1 size = 0
3223 unsigned Opc = Subtarget->isThumb() ?
3224 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3225 SDValue Chain = N->getOperand(0);
3226 SDValue N1 = N->getOperand(1);
3227 SDValue N2 = N->getOperand(2);
3228 SDValue N3 = N->getOperand(3);
3229 SDValue InFlag = N->getOperand(4);
3230 assert(N1.getOpcode() == ISD::BasicBlock);
3231 assert(N2.getOpcode() == ISD::Constant);
3232 assert(N3.getOpcode() == ISD::Register);
3234 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3236 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3237 bool SwitchEQNEToPLMI;
3238 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3239 InFlag = N->getOperand(4);
3241 if (SwitchEQNEToPLMI) {
3242 switch ((ARMCC::CondCodes)CC) {
3243 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3245 CC = (unsigned)ARMCC::MI;
3248 CC = (unsigned)ARMCC::PL;
3254 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3255 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3256 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3258 Chain = SDValue(ResNode, 0);
3259 if (N->getNumValues() == 2) {
3260 InFlag = SDValue(ResNode, 1);
3261 ReplaceUses(SDValue(N, 1), InFlag);
3263 ReplaceUses(SDValue(N, 0),
3264 SDValue(Chain.getNode(), Chain.getResNo()));
3265 CurDAG->RemoveDeadNode(N);
3269 case ARMISD::CMPZ: {
3270 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3271 // This allows us to avoid materializing the expensive negative constant.
3272 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3273 // for its glue output.
3274 SDValue X = N->getOperand(0);
3275 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3276 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3277 int64_t Addend = -C->getSExtValue();
3279 SDNode *Add = nullptr;
3280 // In T2 mode, ADDS can be better than CMN if the immediate fits in a
3281 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3282 // Outside that range we can just use a CMN which is 32-bit but has a
3283 // 12-bit immediate range.
3284 if (Subtarget->isThumb2() && Addend < 1<<8) {
3285 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3286 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3287 CurDAG->getRegister(0, MVT::i32) };
3288 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3289 } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
3290 // FIXME: Add T1 tADDi8 code.
3291 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3292 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3293 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3294 Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
3295 } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
3296 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3297 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3298 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3299 Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
3302 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3303 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3306 // Other cases are autogenerated.
3310 case ARMISD::CMOV: {
3311 SDValue InFlag = N->getOperand(4);
3313 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3314 bool SwitchEQNEToPLMI;
3315 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3317 if (SwitchEQNEToPLMI) {
3318 SDValue ARMcc = N->getOperand(2);
3319 ARMCC::CondCodes CC =
3320 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3323 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3331 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3332 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3333 N->getOperand(3), N->getOperand(4)};
3334 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3338 // Other cases are autogenerated.
3342 case ARMISD::VZIP: {
3344 EVT VT = N->getValueType(0);
3345 switch (VT.getSimpleVT().SimpleTy) {
3347 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3348 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3350 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3351 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3352 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3353 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3355 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3357 SDValue Pred = getAL(CurDAG, dl);
3358 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3359 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3360 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3363 case ARMISD::VUZP: {
3365 EVT VT = N->getValueType(0);
3366 switch (VT.getSimpleVT().SimpleTy) {
3368 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3369 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3371 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3372 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3373 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3374 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3376 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3378 SDValue Pred = getAL(CurDAG, dl);
3379 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3380 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3381 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3384 case ARMISD::VTRN: {
3386 EVT VT = N->getValueType(0);
3387 switch (VT.getSimpleVT().SimpleTy) {
3389 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3390 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3392 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3393 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3394 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3396 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3398 SDValue Pred = getAL(CurDAG, dl);
3399 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3400 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3401 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3404 case ARMISD::BUILD_VECTOR: {
3405 EVT VecVT = N->getValueType(0);
3406 EVT EltVT = VecVT.getVectorElementType();
3407 unsigned NumElts = VecVT.getVectorNumElements();
3408 if (EltVT == MVT::f64) {
3409 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3411 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3414 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3417 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3420 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3422 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3423 N->getOperand(2), N->getOperand(3)));
3427 case ARMISD::VLD1DUP: {
3428 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3430 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3432 SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3436 case ARMISD::VLD2DUP: {
3437 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3439 SelectVLDDup(N, false, 2, Opcodes);
3443 case ARMISD::VLD3DUP: {
3444 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3445 ARM::VLD3DUPd16Pseudo,
3446 ARM::VLD3DUPd32Pseudo };
3447 SelectVLDDup(N, false, 3, Opcodes);
3451 case ARMISD::VLD4DUP: {
3452 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3453 ARM::VLD4DUPd16Pseudo,
3454 ARM::VLD4DUPd32Pseudo };
3455 SelectVLDDup(N, false, 4, Opcodes);
3459 case ARMISD::VLD1DUP_UPD: {
3460 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3461 ARM::VLD1DUPd16wb_fixed,
3462 ARM::VLD1DUPd32wb_fixed };
3463 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3464 ARM::VLD1DUPq16wb_fixed,
3465 ARM::VLD1DUPq32wb_fixed };
3466 SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3470 case ARMISD::VLD2DUP_UPD: {
3471 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3472 ARM::VLD2DUPd16wb_fixed,
3473 ARM::VLD2DUPd32wb_fixed };
3474 SelectVLDDup(N, true, 2, Opcodes);
3478 case ARMISD::VLD3DUP_UPD: {
3479 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3480 ARM::VLD3DUPd16Pseudo_UPD,
3481 ARM::VLD3DUPd32Pseudo_UPD };
3482 SelectVLDDup(N, true, 3, Opcodes);
3486 case ARMISD::VLD4DUP_UPD: {
3487 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3488 ARM::VLD4DUPd16Pseudo_UPD,
3489 ARM::VLD4DUPd32Pseudo_UPD };
3490 SelectVLDDup(N, true, 4, Opcodes);
3494 case ARMISD::VLD1_UPD: {
3495 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3496 ARM::VLD1d16wb_fixed,
3497 ARM::VLD1d32wb_fixed,
3498 ARM::VLD1d64wb_fixed };
3499 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3500 ARM::VLD1q16wb_fixed,
3501 ARM::VLD1q32wb_fixed,
3502 ARM::VLD1q64wb_fixed };
3503 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3507 case ARMISD::VLD2_UPD: {
3508 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3509 ARM::VLD2d16wb_fixed,
3510 ARM::VLD2d32wb_fixed,
3511 ARM::VLD1q64wb_fixed};
3512 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3513 ARM::VLD2q16PseudoWB_fixed,
3514 ARM::VLD2q32PseudoWB_fixed };
3515 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3519 case ARMISD::VLD3_UPD: {
3520 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3521 ARM::VLD3d16Pseudo_UPD,
3522 ARM::VLD3d32Pseudo_UPD,
3523 ARM::VLD1d64TPseudoWB_fixed};
3524 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3525 ARM::VLD3q16Pseudo_UPD,
3526 ARM::VLD3q32Pseudo_UPD };
3527 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3528 ARM::VLD3q16oddPseudo_UPD,
3529 ARM::VLD3q32oddPseudo_UPD };
3530 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3534 case ARMISD::VLD4_UPD: {
3535 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3536 ARM::VLD4d16Pseudo_UPD,
3537 ARM::VLD4d32Pseudo_UPD,
3538 ARM::VLD1d64QPseudoWB_fixed};
3539 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3540 ARM::VLD4q16Pseudo_UPD,
3541 ARM::VLD4q32Pseudo_UPD };
3542 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3543 ARM::VLD4q16oddPseudo_UPD,
3544 ARM::VLD4q32oddPseudo_UPD };
3545 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3549 case ARMISD::VLD2LN_UPD: {
3550 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3551 ARM::VLD2LNd16Pseudo_UPD,
3552 ARM::VLD2LNd32Pseudo_UPD };
3553 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3554 ARM::VLD2LNq32Pseudo_UPD };
3555 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3559 case ARMISD::VLD3LN_UPD: {
3560 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3561 ARM::VLD3LNd16Pseudo_UPD,
3562 ARM::VLD3LNd32Pseudo_UPD };
3563 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3564 ARM::VLD3LNq32Pseudo_UPD };
3565 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3569 case ARMISD::VLD4LN_UPD: {
3570 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3571 ARM::VLD4LNd16Pseudo_UPD,
3572 ARM::VLD4LNd32Pseudo_UPD };
3573 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3574 ARM::VLD4LNq32Pseudo_UPD };
3575 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3579 case ARMISD::VST1_UPD: {
3580 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3581 ARM::VST1d16wb_fixed,
3582 ARM::VST1d32wb_fixed,
3583 ARM::VST1d64wb_fixed };
3584 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3585 ARM::VST1q16wb_fixed,
3586 ARM::VST1q32wb_fixed,
3587 ARM::VST1q64wb_fixed };
3588 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3592 case ARMISD::VST2_UPD: {
3593 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3594 ARM::VST2d16wb_fixed,
3595 ARM::VST2d32wb_fixed,
3596 ARM::VST1q64wb_fixed};
3597 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3598 ARM::VST2q16PseudoWB_fixed,
3599 ARM::VST2q32PseudoWB_fixed };
3600 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3604 case ARMISD::VST3_UPD: {
3605 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3606 ARM::VST3d16Pseudo_UPD,
3607 ARM::VST3d32Pseudo_UPD,
3608 ARM::VST1d64TPseudoWB_fixed};
3609 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3610 ARM::VST3q16Pseudo_UPD,
3611 ARM::VST3q32Pseudo_UPD };
3612 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3613 ARM::VST3q16oddPseudo_UPD,
3614 ARM::VST3q32oddPseudo_UPD };
3615 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3619 case ARMISD::VST4_UPD: {
3620 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3621 ARM::VST4d16Pseudo_UPD,
3622 ARM::VST4d32Pseudo_UPD,
3623 ARM::VST1d64QPseudoWB_fixed};
3624 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3625 ARM::VST4q16Pseudo_UPD,
3626 ARM::VST4q32Pseudo_UPD };
3627 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3628 ARM::VST4q16oddPseudo_UPD,
3629 ARM::VST4q32oddPseudo_UPD };
3630 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3634 case ARMISD::VST2LN_UPD: {
3635 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3636 ARM::VST2LNd16Pseudo_UPD,
3637 ARM::VST2LNd32Pseudo_UPD };
3638 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3639 ARM::VST2LNq32Pseudo_UPD };
3640 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3644 case ARMISD::VST3LN_UPD: {
3645 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3646 ARM::VST3LNd16Pseudo_UPD,
3647 ARM::VST3LNd32Pseudo_UPD };
3648 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3649 ARM::VST3LNq32Pseudo_UPD };
3650 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3654 case ARMISD::VST4LN_UPD: {
3655 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3656 ARM::VST4LNd16Pseudo_UPD,
3657 ARM::VST4LNd32Pseudo_UPD };
3658 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3659 ARM::VST4LNq32Pseudo_UPD };
3660 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3664 case ISD::INTRINSIC_VOID:
3665 case ISD::INTRINSIC_W_CHAIN: {
3666 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3671 case Intrinsic::arm_mrrc:
3672 case Intrinsic::arm_mrrc2: {
3674 SDValue Chain = N->getOperand(0);
3677 if (Subtarget->isThumb())
3678 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3680 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3682 SmallVector<SDValue, 5> Ops;
3683 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3684 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3685 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3687 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3688 // instruction will always be '1111' but it is possible in assembly language to specify
3689 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3690 if (Opc != ARM::MRRC2) {
3691 Ops.push_back(getAL(CurDAG, dl));
3692 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3695 Ops.push_back(Chain);
3697 // Writes to two registers.
3698 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3700 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3703 case Intrinsic::arm_ldaexd:
3704 case Intrinsic::arm_ldrexd: {
3706 SDValue Chain = N->getOperand(0);
3707 SDValue MemAddr = N->getOperand(2);
3708 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3710 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3711 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3712 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3714 // arm_ldrexd returns a i64 value in {i32, i32}
3715 std::vector<EVT> ResTys;
3717 ResTys.push_back(MVT::i32);
3718 ResTys.push_back(MVT::i32);
3720 ResTys.push_back(MVT::Untyped);
3721 ResTys.push_back(MVT::Other);
3723 // Place arguments in the right order.
3724 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3725 CurDAG->getRegister(0, MVT::i32), Chain};
3726 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3727 // Transfer memoperands.
3728 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3729 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3730 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3733 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3734 if (!SDValue(N, 0).use_empty()) {
3737 Result = SDValue(Ld, 0);
3740 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3741 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3742 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3743 Result = SDValue(ResNode,0);
3745 ReplaceUses(SDValue(N, 0), Result);
3747 if (!SDValue(N, 1).use_empty()) {
3750 Result = SDValue(Ld, 1);
3753 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3754 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3755 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3756 Result = SDValue(ResNode,0);
3758 ReplaceUses(SDValue(N, 1), Result);
3760 ReplaceUses(SDValue(N, 2), OutChain);
3761 CurDAG->RemoveDeadNode(N);
3764 case Intrinsic::arm_stlexd:
3765 case Intrinsic::arm_strexd: {
3767 SDValue Chain = N->getOperand(0);
3768 SDValue Val0 = N->getOperand(2);
3769 SDValue Val1 = N->getOperand(3);
3770 SDValue MemAddr = N->getOperand(4);
3772 // Store exclusive double return a i32 value which is the return status
3773 // of the issued store.
3774 const EVT ResTys[] = {MVT::i32, MVT::Other};
3776 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3777 // Place arguments in the right order.
3778 SmallVector<SDValue, 7> Ops;
3780 Ops.push_back(Val0);
3781 Ops.push_back(Val1);
3783 // arm_strexd uses GPRPair.
3784 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3785 Ops.push_back(MemAddr);
3786 Ops.push_back(getAL(CurDAG, dl));
3787 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3788 Ops.push_back(Chain);
3790 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3791 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3792 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3794 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3795 // Transfer memoperands.
3796 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3797 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3798 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3804 case Intrinsic::arm_neon_vld1: {
3805 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3806 ARM::VLD1d32, ARM::VLD1d64 };
3807 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3808 ARM::VLD1q32, ARM::VLD1q64};
3809 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3813 case Intrinsic::arm_neon_vld2: {
3814 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3815 ARM::VLD2d32, ARM::VLD1q64 };
3816 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3817 ARM::VLD2q32Pseudo };
3818 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3822 case Intrinsic::arm_neon_vld3: {
3823 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3826 ARM::VLD1d64TPseudo };
3827 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3828 ARM::VLD3q16Pseudo_UPD,
3829 ARM::VLD3q32Pseudo_UPD };
3830 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3831 ARM::VLD3q16oddPseudo,
3832 ARM::VLD3q32oddPseudo };
3833 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3837 case Intrinsic::arm_neon_vld4: {
3838 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3841 ARM::VLD1d64QPseudo };
3842 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3843 ARM::VLD4q16Pseudo_UPD,
3844 ARM::VLD4q32Pseudo_UPD };
3845 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3846 ARM::VLD4q16oddPseudo,
3847 ARM::VLD4q32oddPseudo };
3848 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3852 case Intrinsic::arm_neon_vld2lane: {
3853 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3854 ARM::VLD2LNd16Pseudo,
3855 ARM::VLD2LNd32Pseudo };
3856 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3857 ARM::VLD2LNq32Pseudo };
3858 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3862 case Intrinsic::arm_neon_vld3lane: {
3863 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3864 ARM::VLD3LNd16Pseudo,
3865 ARM::VLD3LNd32Pseudo };
3866 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3867 ARM::VLD3LNq32Pseudo };
3868 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3872 case Intrinsic::arm_neon_vld4lane: {
3873 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3874 ARM::VLD4LNd16Pseudo,
3875 ARM::VLD4LNd32Pseudo };
3876 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3877 ARM::VLD4LNq32Pseudo };
3878 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3882 case Intrinsic::arm_neon_vst1: {
3883 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3884 ARM::VST1d32, ARM::VST1d64 };
3885 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3886 ARM::VST1q32, ARM::VST1q64 };
3887 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3891 case Intrinsic::arm_neon_vst2: {
3892 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3893 ARM::VST2d32, ARM::VST1q64 };
3894 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3895 ARM::VST2q32Pseudo };
3896 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3900 case Intrinsic::arm_neon_vst3: {
3901 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3904 ARM::VST1d64TPseudo };
3905 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3906 ARM::VST3q16Pseudo_UPD,
3907 ARM::VST3q32Pseudo_UPD };
3908 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3909 ARM::VST3q16oddPseudo,
3910 ARM::VST3q32oddPseudo };
3911 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3915 case Intrinsic::arm_neon_vst4: {
3916 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3919 ARM::VST1d64QPseudo };
3920 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3921 ARM::VST4q16Pseudo_UPD,
3922 ARM::VST4q32Pseudo_UPD };
3923 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3924 ARM::VST4q16oddPseudo,
3925 ARM::VST4q32oddPseudo };
3926 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3930 case Intrinsic::arm_neon_vst2lane: {
3931 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3932 ARM::VST2LNd16Pseudo,
3933 ARM::VST2LNd32Pseudo };
3934 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3935 ARM::VST2LNq32Pseudo };
3936 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3940 case Intrinsic::arm_neon_vst3lane: {
3941 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3942 ARM::VST3LNd16Pseudo,
3943 ARM::VST3LNd32Pseudo };
3944 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3945 ARM::VST3LNq32Pseudo };
3946 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3950 case Intrinsic::arm_neon_vst4lane: {
3951 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3952 ARM::VST4LNd16Pseudo,
3953 ARM::VST4LNd32Pseudo };
3954 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3955 ARM::VST4LNq32Pseudo };
3956 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3963 case ISD::INTRINSIC_WO_CHAIN: {
3964 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3969 case Intrinsic::arm_neon_vtbl2:
3970 SelectVTBL(N, false, 2, ARM::VTBL2);
3972 case Intrinsic::arm_neon_vtbl3:
3973 SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3975 case Intrinsic::arm_neon_vtbl4:
3976 SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3979 case Intrinsic::arm_neon_vtbx2:
3980 SelectVTBL(N, true, 2, ARM::VTBX2);
3982 case Intrinsic::arm_neon_vtbx3:
3983 SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3985 case Intrinsic::arm_neon_vtbx4:
3986 SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3992 case ARMISD::VTBL1: {
3994 EVT VT = N->getValueType(0);
3995 SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3996 getAL(CurDAG, dl), // Predicate
3997 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3998 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
4001 case ARMISD::VTBL2: {
4003 EVT VT = N->getValueType(0);
4005 // Form a REG_SEQUENCE to force register allocation.
4006 SDValue V0 = N->getOperand(0);
4007 SDValue V1 = N->getOperand(1);
4008 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
4010 SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
4011 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
4012 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
4016 case ISD::CONCAT_VECTORS:
4017 SelectConcatVector(N);
4020 case ISD::ATOMIC_CMP_SWAP:
4028 // Inspect a register string of the form
4029 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4030 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4031 // and obtain the integer operands from them, adding these operands to the
4033 static void getIntOperandsFromRegisterString(StringRef RegString,
4034 SelectionDAG *CurDAG,
4036 std::vector<SDValue> &Ops) {
4037 SmallVector<StringRef, 5> Fields;
4038 RegString.split(Fields, ':');
4040 if (Fields.size() > 1) {
4041 bool AllIntFields = true;
4043 for (StringRef Field : Fields) {
4044 // Need to trim out leading 'cp' characters and get the integer field.
4046 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4047 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4050 assert(AllIntFields &&
4051 "Unexpected non-integer value in special register string.");
4055 // Maps a Banked Register string to its mask value. The mask value returned is
4056 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4057 // mask operand, which expresses which register is to be used, e.g. r8, and in
4058 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4060 static inline int getBankedRegisterMask(StringRef RegString) {
4061 return StringSwitch<int>(RegString.lower())
4062 .Case("r8_usr", 0x00)
4063 .Case("r9_usr", 0x01)
4064 .Case("r10_usr", 0x02)
4065 .Case("r11_usr", 0x03)
4066 .Case("r12_usr", 0x04)
4067 .Case("sp_usr", 0x05)
4068 .Case("lr_usr", 0x06)
4069 .Case("r8_fiq", 0x08)
4070 .Case("r9_fiq", 0x09)
4071 .Case("r10_fiq", 0x0a)
4072 .Case("r11_fiq", 0x0b)
4073 .Case("r12_fiq", 0x0c)
4074 .Case("sp_fiq", 0x0d)
4075 .Case("lr_fiq", 0x0e)
4076 .Case("lr_irq", 0x10)
4077 .Case("sp_irq", 0x11)
4078 .Case("lr_svc", 0x12)
4079 .Case("sp_svc", 0x13)
4080 .Case("lr_abt", 0x14)
4081 .Case("sp_abt", 0x15)
4082 .Case("lr_und", 0x16)
4083 .Case("sp_und", 0x17)
4084 .Case("lr_mon", 0x1c)
4085 .Case("sp_mon", 0x1d)
4086 .Case("elr_hyp", 0x1e)
4087 .Case("sp_hyp", 0x1f)
4088 .Case("spsr_fiq", 0x2e)
4089 .Case("spsr_irq", 0x30)
4090 .Case("spsr_svc", 0x32)
4091 .Case("spsr_abt", 0x34)
4092 .Case("spsr_und", 0x36)
4093 .Case("spsr_mon", 0x3c)
4094 .Case("spsr_hyp", 0x3e)
4098 // Maps a MClass special register string to its value for use in the
4099 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
4100 // Returns -1 to signify that the string was invalid.
4101 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
4102 return StringSwitch<int>(RegString.lower())
4112 .Case("primask", 0x10)
4113 .Case("basepri", 0x11)
4114 .Case("basepri_max", 0x12)
4115 .Case("faultmask", 0x13)
4116 .Case("control", 0x14)
4117 .Case("msplim", 0x0a)
4118 .Case("psplim", 0x0b)
4123 // The flags here are common to those allowed for apsr in the A class cores and
4124 // those allowed for the special registers in the M class cores. Returns a
4125 // value representing which flags were present, -1 if invalid.
4126 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
4128 return 0x2 | (int)hasDSP;
4130 return StringSwitch<int>(Flags)
4133 .Case("nzcvqg", 0x3)
4137 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
4138 const ARMSubtarget *Subtarget) {
4139 // Ensure that the register (without flags) was a valid M Class special
4141 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
4142 if (SYSmvalue == -1)
4145 // basepri, basepri_max and faultmask are only valid for V7m.
4146 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
4149 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
4154 if (!Subtarget->has8MSecExt() &&
4155 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
4158 if (!Subtarget->hasV8MMainlineOps() &&
4159 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
4163 // If it was a read then we won't be expecting flags and so at this point
4164 // we can return the mask.
4172 // We know we are now handling a write so need to get the mask for the flags.
4173 int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
4175 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
4176 // shouldn't have flags present.
4177 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
4180 // The _g and _nzcvqg versions are only valid if the DSP extension is
4182 if (!Subtarget->hasDSP() && (Mask & 0x1))
4185 // The register was valid so need to put the mask in the correct place
4186 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
4187 // construct the operand for the instruction node.
4188 if (SYSmvalue < 0x4)
4189 return SYSmvalue | Mask << 10;
4194 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4195 // The mask operand contains the special register (R Bit) in bit 4, whether
4196 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4197 // bits 3-0 contains the fields to be accessed in the special register, set by
4198 // the flags provided with the register.
4200 if (Reg == "apsr") {
4201 // The flags permitted for apsr are the same flags that are allowed in
4202 // M class registers. We get the flag value and then shift the flags into
4203 // the correct place to combine with the mask.
4204 Mask = getMClassFlagsMask(Flags, true);
4210 if (Reg != "cpsr" && Reg != "spsr") {
4214 // This is the same as if the flags were "fc"
4215 if (Flags.empty() || Flags == "all")
4218 // Inspect the supplied flags string and set the bits in the mask for
4219 // the relevant and valid flags allowed for cpsr and spsr.
4220 for (char Flag : Flags) {
4239 // This avoids allowing strings where the same flag bit appears twice.
4240 if (!FlagVal || (Mask & FlagVal))
4245 // If the register is spsr then we need to set the R bit.
4252 // Lower the read_register intrinsic to ARM specific DAG nodes
4253 // using the supplied metadata string to select the instruction node to use
4254 // and the registers/masks to construct as operands for the node.
4255 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4256 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4257 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4258 bool IsThumb2 = Subtarget->isThumb2();
4261 std::vector<SDValue> Ops;
4262 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4265 // If the special register string was constructed of fields (as defined
4266 // in the ACLE) then need to lower to MRC node (32 bit) or
4267 // MRRC node(64 bit), we can make the distinction based on the number of
4268 // operands we have.
4270 SmallVector<EVT, 3> ResTypes;
4271 if (Ops.size() == 5){
4272 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4273 ResTypes.append({ MVT::i32, MVT::Other });
4275 assert(Ops.size() == 3 &&
4276 "Invalid number of fields in special register string.");
4277 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4278 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4281 Ops.push_back(getAL(CurDAG, DL));
4282 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4283 Ops.push_back(N->getOperand(0));
4284 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4288 std::string SpecialReg = RegString->getString().lower();
4290 int BankedReg = getBankedRegisterMask(SpecialReg);
4291 if (BankedReg != -1) {
4292 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4293 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4296 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4297 DL, MVT::i32, MVT::Other, Ops));
4301 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4302 // corresponding to the register that is being read from. So we switch on the
4303 // string to find which opcode we need to use.
4304 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4305 .Case("fpscr", ARM::VMRS)
4306 .Case("fpexc", ARM::VMRS_FPEXC)
4307 .Case("fpsid", ARM::VMRS_FPSID)
4308 .Case("mvfr0", ARM::VMRS_MVFR0)
4309 .Case("mvfr1", ARM::VMRS_MVFR1)
4310 .Case("mvfr2", ARM::VMRS_MVFR2)
4311 .Case("fpinst", ARM::VMRS_FPINST)
4312 .Case("fpinst2", ARM::VMRS_FPINST2)
4315 // If an opcode was found then we can lower the read to a VFP instruction.
4317 if (!Subtarget->hasVFP2())
4319 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4322 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4325 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4329 // If the target is M Class then need to validate that the register string
4330 // is an acceptable value, so check that a mask can be constructed from the
4332 if (Subtarget->isMClass()) {
4333 StringRef Flags = "", Reg = SpecialReg;
4334 if (Reg.endswith("_ns")) {
4336 Reg = Reg.drop_back(3);
4339 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4340 if (SYSmValue == -1)
4343 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4344 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4347 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4351 // Here we know the target is not M Class so we need to check if it is one
4352 // of the remaining possible values which are apsr, cpsr or spsr.
4353 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4354 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4356 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4357 DL, MVT::i32, MVT::Other, Ops));
4361 if (SpecialReg == "spsr") {
4362 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4365 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4366 MVT::i32, MVT::Other, Ops));
4373 // Lower the write_register intrinsic to ARM specific DAG nodes
4374 // using the supplied metadata string to select the instruction node to use
4375 // and the registers/masks to use in the nodes
4376 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4377 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4378 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4379 bool IsThumb2 = Subtarget->isThumb2();
4382 std::vector<SDValue> Ops;
4383 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4386 // If the special register string was constructed of fields (as defined
4387 // in the ACLE) then need to lower to MCR node (32 bit) or
4388 // MCRR node(64 bit), we can make the distinction based on the number of
4389 // operands we have.
4391 if (Ops.size() == 5) {
4392 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4393 Ops.insert(Ops.begin()+2, N->getOperand(2));
4395 assert(Ops.size() == 3 &&
4396 "Invalid number of fields in special register string.");
4397 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4398 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4399 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4402 Ops.push_back(getAL(CurDAG, DL));
4403 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4404 Ops.push_back(N->getOperand(0));
4406 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4410 std::string SpecialReg = RegString->getString().lower();
4411 int BankedReg = getBankedRegisterMask(SpecialReg);
4412 if (BankedReg != -1) {
4413 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4414 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4417 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4418 DL, MVT::Other, Ops));
4422 // The VFP registers are written to by creating SelectionDAG nodes with
4423 // opcodes corresponding to the register that is being written. So we switch
4424 // on the string to find which opcode we need to use.
4425 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4426 .Case("fpscr", ARM::VMSR)
4427 .Case("fpexc", ARM::VMSR_FPEXC)
4428 .Case("fpsid", ARM::VMSR_FPSID)
4429 .Case("fpinst", ARM::VMSR_FPINST)
4430 .Case("fpinst2", ARM::VMSR_FPINST2)
4434 if (!Subtarget->hasVFP2())
4436 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4437 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4438 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4442 std::pair<StringRef, StringRef> Fields;
4443 Fields = StringRef(SpecialReg).rsplit('_');
4444 std::string Reg = Fields.first.str();
4445 StringRef Flags = Fields.second;
4447 // If the target was M Class then need to validate the special register value
4448 // and retrieve the mask for use in the instruction node.
4449 if (Subtarget->isMClass()) {
4450 // basepri_max gets split so need to correct Reg and Flags.
4451 if (SpecialReg == "basepri_max") {
4455 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4456 if (SYSmValue == -1)
4459 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4460 N->getOperand(2), getAL(CurDAG, DL),
4461 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4462 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4466 // We then check to see if a valid mask can be constructed for one of the
4467 // register string values permitted for the A and R class cores. These values
4468 // are apsr, spsr and cpsr; these are also valid on older cores.
4469 int Mask = getARClassRegisterMask(Reg, Flags);
4471 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4472 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4474 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4475 DL, MVT::Other, Ops));
4482 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4483 std::vector<SDValue> AsmNodeOperands;
4484 unsigned Flag, Kind;
4485 bool Changed = false;
4486 unsigned NumOps = N->getNumOperands();
4488 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4489 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4490 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4491 // respectively. Since there is no constraint to explicitly specify a
4492 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4493 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4494 // them into a GPRPair.
4497 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4498 : SDValue(nullptr,0);
4500 SmallVector<bool, 8> OpChanged;
4501 // Glue node will be appended late.
4502 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4503 SDValue op = N->getOperand(i);
4504 AsmNodeOperands.push_back(op);
4506 if (i < InlineAsm::Op_FirstOperand)
4509 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4510 Flag = C->getZExtValue();
4511 Kind = InlineAsm::getKind(Flag);
4516 // Immediate operands to inline asm in the SelectionDAG are modeled with
4517 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4518 // the second is a constant with the value of the immediate. If we get here
4519 // and we have a Kind_Imm, skip the next operand, and continue.
4520 if (Kind == InlineAsm::Kind_Imm) {
4521 SDValue op = N->getOperand(++i);
4522 AsmNodeOperands.push_back(op);
4526 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4528 OpChanged.push_back(false);
4530 unsigned DefIdx = 0;
4531 bool IsTiedToChangedOp = false;
4532 // If it's a use that is tied with a previous def, it has no
4533 // reg class constraint.
4534 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4535 IsTiedToChangedOp = OpChanged[DefIdx];
4537 // Memory operands to inline asm in the SelectionDAG are modeled with two
4538 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4539 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4540 // it doesn't get misinterpreted), and continue. We do this here because
4541 // it's important to update the OpChanged array correctly before moving on.
4542 if (Kind == InlineAsm::Kind_Mem) {
4543 SDValue op = N->getOperand(++i);
4544 AsmNodeOperands.push_back(op);
4548 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4549 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4553 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4554 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4558 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4559 SDValue V0 = N->getOperand(i+1);
4560 SDValue V1 = N->getOperand(i+2);
4561 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4562 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4564 MachineRegisterInfo &MRI = MF->getRegInfo();
4566 if (Kind == InlineAsm::Kind_RegDef ||
4567 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4568 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4569 // the original GPRs.
4571 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4572 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4573 SDValue Chain = SDValue(N,0);
4575 SDNode *GU = N->getGluedUser();
4576 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4579 // Extract values from a GPRPair reg and copy to the original GPR reg.
4580 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4582 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4584 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4585 RegCopy.getValue(1));
4586 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4588 // Update the original glue user.
4589 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4590 Ops.push_back(T1.getValue(1));
4591 CurDAG->UpdateNodeOperands(GU, Ops);
4594 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4595 // GPRPair and then pass the GPRPair to the inline asm.
4596 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4598 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4599 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4601 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4603 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4605 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4606 // i32 VRs of inline asm with it.
4607 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4608 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4609 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4611 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4612 Glue = Chain.getValue(1);
4617 if(PairedReg.getNode()) {
4618 OpChanged[OpChanged.size() -1 ] = true;
4619 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4620 if (IsTiedToChangedOp)
4621 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4623 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4624 // Replace the current flag.
4625 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4626 Flag, dl, MVT::i32);
4627 // Add the new register node and skip the original two GPRs.
4628 AsmNodeOperands.push_back(PairedReg);
4629 // Skip the next two GPRs.
4635 AsmNodeOperands.push_back(Glue);
4639 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4640 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4642 ReplaceNode(N, New.getNode());
4647 bool ARMDAGToDAGISel::
4648 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4649 std::vector<SDValue> &OutOps) {
4650 switch(ConstraintID) {
4652 llvm_unreachable("Unexpected asm memory constraint");
4653 case InlineAsm::Constraint_i:
4654 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4655 // be an immediate and not a memory constraint.
4657 case InlineAsm::Constraint_m:
4658 case InlineAsm::Constraint_o:
4659 case InlineAsm::Constraint_Q:
4660 case InlineAsm::Constraint_Um:
4661 case InlineAsm::Constraint_Un:
4662 case InlineAsm::Constraint_Uq:
4663 case InlineAsm::Constraint_Us:
4664 case InlineAsm::Constraint_Ut:
4665 case InlineAsm::Constraint_Uv:
4666 case InlineAsm::Constraint_Uy:
4667 // Require the address to be in a register. That is safe for all ARM
4668 // variants and it is hard to do anything much smarter without knowing
4669 // how the operand is used.
4670 OutOps.push_back(Op);
4676 /// createARMISelDag - This pass converts a legalized DAG into a
4677 /// ARM-specific DAG, ready for instruction scheduling.
4679 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4680 CodeGenOpt::Level OptLevel) {
4681 return new ARMDAGToDAGISel(TM, OptLevel);