1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the ARM target.
12 //===----------------------------------------------------------------------===//
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMTargetMachine.h"
17 #include "MCTargetDesc/ARMAddressingModes.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Target/TargetLowering.h"
35 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "arm-isel"
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
53 AM2_BASE, // Simple AM2 (+-imm12)
54 AM2_SHOP // Shifter-op AM2
57 class ARMDAGToDAGISel : public SelectionDAGISel {
58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
59 /// make the right decision when generating code for different targets.
60 const ARMSubtarget *Subtarget;
63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
64 : SelectionDAGISel(tm, OptLevel) {}
66 bool runOnMachineFunction(MachineFunction &MF) override {
67 // Reset the subtarget each time through.
68 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 SelectionDAGISel::runOnMachineFunction(MF);
73 StringRef getPassName() const override { return "ARM Instruction Selection"; }
75 void PreprocessISelDAG() override;
77 /// getI32Imm - Return a target constant of type i32 with the specified
79 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
80 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
83 void Select(SDNode *N) override;
85 bool hasNoVMLxHazardUse(SDNode *N) const;
86 bool isShifterOpProfitable(const SDValue &Shift,
87 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
88 bool SelectRegShifterOperand(SDValue N, SDValue &A,
89 SDValue &B, SDValue &C,
90 bool CheckProfitability = true);
91 bool SelectImmShifterOperand(SDValue N, SDValue &A,
92 SDValue &B, bool CheckProfitability = true);
93 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, SDValue &C) {
95 // Don't apply the profitability check
96 return SelectRegShifterOperand(N, A, B, C, false);
98 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
104 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
105 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
107 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
108 SDValue &Offset, SDValue &Opc);
109 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
111 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
114 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
116 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
119 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
121 SelectAddrMode2Worker(N, Base, Offset, Opc);
122 // return SelectAddrMode2ShOp(N, Base, Offset, Opc);
123 // This always matches one way or another.
127 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
128 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
129 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
130 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
134 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
135 SDValue &Offset, SDValue &Opc);
136 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
137 SDValue &Offset, SDValue &Opc);
138 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
139 SDValue &Offset, SDValue &Opc);
140 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
141 bool SelectAddrMode3(SDValue N, SDValue &Base,
142 SDValue &Offset, SDValue &Opc);
143 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
144 SDValue &Offset, SDValue &Opc);
145 bool SelectAddrMode5(SDValue N, SDValue &Base,
147 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
148 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
150 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
152 // Thumb Addressing Modes:
153 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
154 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
156 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
158 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
160 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
162 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
164 // Thumb 2 Addressing Modes:
165 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
166 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
168 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
170 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
171 SDValue &OffReg, SDValue &ShImm);
172 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174 inline bool is_so_imm(unsigned Imm) const {
175 return ARM_AM::getSOImmVal(Imm) != -1;
178 inline bool is_so_imm_not(unsigned Imm) const {
179 return ARM_AM::getSOImmVal(~Imm) != -1;
182 inline bool is_t2_so_imm(unsigned Imm) const {
183 return ARM_AM::getT2SOImmVal(Imm) != -1;
186 inline bool is_t2_so_imm_not(unsigned Imm) const {
187 return ARM_AM::getT2SOImmVal(~Imm) != -1;
190 // Include the pieces autogenerated from the target description.
191 #include "ARMGenDAGISel.inc"
194 void transferMemOperands(SDNode *Src, SDNode *Dst);
196 /// Indexed (pre/post inc/dec) load matching code for ARM.
197 bool tryARMIndexedLoad(SDNode *N);
198 bool tryT1IndexedLoad(SDNode *N);
199 bool tryT2IndexedLoad(SDNode *N);
201 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
202 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
203 /// loads of D registers and even subregs and odd subregs of Q registers.
204 /// For NumVecs <= 2, QOpcodes1 is not used.
205 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
206 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
207 const uint16_t *QOpcodes1);
209 /// SelectVST - Select NEON store intrinsics. NumVecs should
210 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
211 /// stores of D registers and even subregs and odd subregs of Q registers.
212 /// For NumVecs <= 2, QOpcodes1 is not used.
213 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
214 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
215 const uint16_t *QOpcodes1);
217 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
218 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
219 /// load/store of D registers and Q registers.
220 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
221 unsigned NumVecs, const uint16_t *DOpcodes,
222 const uint16_t *QOpcodes);
224 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
225 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
226 /// for loading D registers. (Q registers are not supported.)
227 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
228 const uint16_t *DOpcodes,
229 const uint16_t *QOpcodes = nullptr);
231 /// Try to select SBFX/UBFX instructions for ARM.
232 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
234 // Select special operations if node forms integer ABS pattern
235 bool tryABSOp(SDNode *N);
237 bool tryReadRegister(SDNode *N);
238 bool tryWriteRegister(SDNode *N);
240 bool tryInlineAsm(SDNode *N);
242 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
244 void SelectCMP_SWAP(SDNode *N);
246 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
247 /// inline asm expressions.
248 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
249 std::vector<SDValue> &OutOps) override;
251 // Form pairs of consecutive R, S, D, or Q registers.
252 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
253 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
254 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
255 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
257 // Form sequences of 4 consecutive S, D, or Q registers.
258 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
259 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
260 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
262 // Get the alignment operand for a NEON VLD or VST instruction.
263 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
266 /// Returns the number of instructions required to materialize the given
267 /// constant in a register, or 3 if a literal pool load is needed.
268 unsigned ConstantMaterializationCost(unsigned Val) const;
270 /// Checks if N is a multiplication by a constant where we can extract out a
271 /// power of two from the constant so that it can be used in a shift, but only
272 /// if it simplifies the materialization of the constant. Returns true if it
273 /// is, and assigns to PowerOfTwo the power of two that should be extracted
274 /// out and to NewMulConst the new constant to be multiplied by.
275 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
276 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
278 /// Replace N with M in CurDAG, in a way that also ensures that M gets
279 /// selected when N would have been selected.
280 void replaceDAGValue(const SDValue &N, SDValue M);
284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
285 /// operand. If so Imm will receive the 32-bit value.
286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
287 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
288 Imm = cast<ConstantSDNode>(N)->getZExtValue();
294 // isInt32Immediate - This method tests to see if a constant operand.
295 // If so Imm will receive the 32 bit value.
296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
297 return isInt32Immediate(N.getNode(), Imm);
300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
301 // opcode and that it has a immediate integer right operand.
302 // If so Imm will receive the 32 bit value.
303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
304 return N->getOpcode() == Opc &&
305 isInt32Immediate(N->getOperand(1).getNode(), Imm);
308 /// \brief Check whether a particular node is a constant value representable as
309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
312 static bool isScaledConstantInRange(SDValue Node, int Scale,
313 int RangeMin, int RangeMax,
314 int &ScaledConstant) {
315 assert(Scale > 0 && "Invalid scale!");
317 // Check that this is a constant.
318 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
322 ScaledConstant = (int) C->getZExtValue();
323 if ((ScaledConstant % Scale) != 0)
326 ScaledConstant /= Scale;
327 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
330 void ARMDAGToDAGISel::PreprocessISelDAG() {
331 if (!Subtarget->hasV6T2Ops())
334 bool isThumb2 = Subtarget->isThumb();
335 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
336 E = CurDAG->allnodes_end(); I != E; ) {
337 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
339 if (N->getOpcode() != ISD::ADD)
342 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
343 // leading zeros, followed by consecutive set bits, followed by 1 or 2
344 // trailing zeros, e.g. 1020.
345 // Transform the expression to
346 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
347 // of trailing zeros of c2. The left shift would be folded as an shifter
348 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
351 SDValue N0 = N->getOperand(0);
352 SDValue N1 = N->getOperand(1);
353 unsigned And_imm = 0;
354 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
355 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
361 // Check if the AND mask is an immediate of the form: 000.....1111111100
362 unsigned TZ = countTrailingZeros(And_imm);
363 if (TZ != 1 && TZ != 2)
364 // Be conservative here. Shifter operands aren't always free. e.g. On
365 // Swift, left shifter operand of 1 / 2 for free but others are not.
367 // ubfx r3, r1, #16, #8
368 // ldr.w r3, [r0, r3, lsl #2]
371 // and.w r2, r9, r1, lsr #14
375 if (And_imm & (And_imm + 1))
378 // Look for (and (srl X, c1), c2).
379 SDValue Srl = N1.getOperand(0);
380 unsigned Srl_imm = 0;
381 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
385 // Make sure first operand is not a shifter operand which would prevent
386 // folding of the left shift.
391 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
394 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
395 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
399 // Now make the transformation.
400 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
402 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
404 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
406 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
407 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
408 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
409 CurDAG->UpdateNodeOperands(N, N0, N1);
413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
415 /// least on current ARM implementations) which should be avoidded.
416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
417 if (OptLevel == CodeGenOpt::None)
420 if (!Subtarget->hasVMLxHazards())
426 SDNode *Use = *N->use_begin();
427 if (Use->getOpcode() == ISD::CopyToReg)
429 if (Use->isMachineOpcode()) {
430 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
431 CurDAG->getSubtarget().getInstrInfo());
433 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
436 unsigned Opcode = MCID.getOpcode();
437 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
439 // vmlx feeding into another vmlx. We actually want to unfold
440 // the use later in the MLxExpansion pass. e.g.
442 // vmla (stall 8 cycles)
447 // This adds up to about 18 - 19 cycles.
450 // vmul (stall 4 cycles)
451 // vadd adds up to about 14 cycles.
452 return TII->isFpMLxInstruction(Opcode);
458 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
459 ARM_AM::ShiftOpc ShOpcVal,
461 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
463 if (Shift.hasOneUse())
466 return ShOpcVal == ARM_AM::lsl &&
467 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
470 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
471 if (Subtarget->isThumb()) {
472 if (Val <= 255) return 1; // MOV
473 if (Subtarget->hasV6T2Ops() &&
474 (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
476 if (Val <= 510) return 2; // MOV + ADDi8
477 if (~Val <= 255) return 2; // MOV + MVN
478 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
480 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
481 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
482 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
483 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
485 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
486 return 3; // Literal pool load
489 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
491 unsigned &PowerOfTwo,
492 SDValue &NewMulConst) const {
493 assert(N.getOpcode() == ISD::MUL);
494 assert(MaxShift > 0);
496 // If the multiply is used in more than one place then changing the constant
497 // will make other uses incorrect, so don't.
498 if (!N.hasOneUse()) return false;
499 // Check if the multiply is by a constant
500 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
501 if (!MulConst) return false;
502 // If the constant is used in more than one place then modifying it will mean
503 // we need to materialize two constants instead of one, which is a bad idea.
504 if (!MulConst->hasOneUse()) return false;
505 unsigned MulConstVal = MulConst->getZExtValue();
506 if (MulConstVal == 0) return false;
508 // Find the largest power of 2 that MulConstVal is a multiple of
509 PowerOfTwo = MaxShift;
510 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
512 if (PowerOfTwo == 0) return false;
515 // Only optimise if the new cost is better
516 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
517 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
518 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
519 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
520 return NewCost < OldCost;
523 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
524 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
525 CurDAG->ReplaceAllUsesWith(N, M);
528 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
531 bool CheckProfitability) {
532 if (DisableShifterOp)
535 // If N is a multiply-by-constant and it's profitable to extract a shift and
536 // use it in a shifted operand do so.
537 if (N.getOpcode() == ISD::MUL) {
538 unsigned PowerOfTwo = 0;
540 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
541 HandleSDNode Handle(N);
543 replaceDAGValue(N.getOperand(1), NewMulConst);
544 BaseReg = Handle.getValue();
545 Opc = CurDAG->getTargetConstant(
546 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
551 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
553 // Don't match base register only case. That is matched to a separate
554 // lower complexity pattern with explicit register operand.
555 if (ShOpcVal == ARM_AM::no_shift) return false;
557 BaseReg = N.getOperand(0);
558 unsigned ShImmVal = 0;
559 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
560 if (!RHS) return false;
561 ShImmVal = RHS->getZExtValue() & 31;
562 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
567 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
571 bool CheckProfitability) {
572 if (DisableShifterOp)
575 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
577 // Don't match base register only case. That is matched to a separate
578 // lower complexity pattern with explicit register operand.
579 if (ShOpcVal == ARM_AM::no_shift) return false;
581 BaseReg = N.getOperand(0);
582 unsigned ShImmVal = 0;
583 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
584 if (RHS) return false;
586 ShReg = N.getOperand(1);
587 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
589 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
595 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
598 // Match simple R + imm12 operands.
601 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
602 !CurDAG->isBaseWithConstantOffset(N)) {
603 if (N.getOpcode() == ISD::FrameIndex) {
604 // Match frame index.
605 int FI = cast<FrameIndexSDNode>(N)->getIndex();
606 Base = CurDAG->getTargetFrameIndex(
607 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
608 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
612 if (N.getOpcode() == ARMISD::Wrapper &&
613 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
614 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
615 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
616 Base = N.getOperand(0);
619 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
623 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
624 int RHSC = (int)RHS->getSExtValue();
625 if (N.getOpcode() == ISD::SUB)
628 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
629 Base = N.getOperand(0);
630 if (Base.getOpcode() == ISD::FrameIndex) {
631 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
632 Base = CurDAG->getTargetFrameIndex(
633 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
635 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
642 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
648 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
650 if (N.getOpcode() == ISD::MUL &&
651 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
652 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
653 // X * [3,5,9] -> X + X * [2,4,8] etc.
654 int RHSC = (int)RHS->getZExtValue();
657 ARM_AM::AddrOpc AddSub = ARM_AM::add;
659 AddSub = ARM_AM::sub;
662 if (isPowerOf2_32(RHSC)) {
663 unsigned ShAmt = Log2_32(RHSC);
664 Base = Offset = N.getOperand(0);
665 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
674 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
675 // ISD::OR that is equivalent to an ISD::ADD.
676 !CurDAG->isBaseWithConstantOffset(N))
679 // Leave simple R +/- imm12 operands for LDRi12
680 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
682 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
683 -0x1000+1, 0x1000, RHSC)) // 12 bits.
687 // Otherwise this is R +/- [possibly shifted] R.
688 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
689 ARM_AM::ShiftOpc ShOpcVal =
690 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
693 Base = N.getOperand(0);
694 Offset = N.getOperand(1);
696 if (ShOpcVal != ARM_AM::no_shift) {
697 // Check to see if the RHS of the shift is a constant, if not, we can't fold
699 if (ConstantSDNode *Sh =
700 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
701 ShAmt = Sh->getZExtValue();
702 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
703 Offset = N.getOperand(1).getOperand(0);
706 ShOpcVal = ARM_AM::no_shift;
709 ShOpcVal = ARM_AM::no_shift;
713 // Try matching (R shl C) + (R).
714 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
715 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
716 N.getOperand(0).hasOneUse())) {
717 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
718 if (ShOpcVal != ARM_AM::no_shift) {
719 // Check to see if the RHS of the shift is a constant, if not, we can't
721 if (ConstantSDNode *Sh =
722 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
723 ShAmt = Sh->getZExtValue();
724 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
725 Offset = N.getOperand(0).getOperand(0);
726 Base = N.getOperand(1);
729 ShOpcVal = ARM_AM::no_shift;
732 ShOpcVal = ARM_AM::no_shift;
737 // If Offset is a multiply-by-constant and it's profitable to extract a shift
738 // and use it in a shifted operand do so.
739 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
740 unsigned PowerOfTwo = 0;
742 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
743 replaceDAGValue(Offset.getOperand(1), NewMulConst);
745 ShOpcVal = ARM_AM::lsl;
749 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
757 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
761 if (N.getOpcode() == ISD::MUL &&
762 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
763 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
764 // X * [3,5,9] -> X + X * [2,4,8] etc.
765 int RHSC = (int)RHS->getZExtValue();
768 ARM_AM::AddrOpc AddSub = ARM_AM::add;
770 AddSub = ARM_AM::sub;
773 if (isPowerOf2_32(RHSC)) {
774 unsigned ShAmt = Log2_32(RHSC);
775 Base = Offset = N.getOperand(0);
776 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
785 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
786 // ISD::OR that is equivalent to an ADD.
787 !CurDAG->isBaseWithConstantOffset(N)) {
789 if (N.getOpcode() == ISD::FrameIndex) {
790 int FI = cast<FrameIndexSDNode>(N)->getIndex();
791 Base = CurDAG->getTargetFrameIndex(
792 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
793 } else if (N.getOpcode() == ARMISD::Wrapper &&
794 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
795 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
796 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
797 Base = N.getOperand(0);
799 Offset = CurDAG->getRegister(0, MVT::i32);
800 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
806 // Match simple R +/- imm12 operands.
807 if (N.getOpcode() != ISD::SUB) {
809 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
810 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
811 Base = N.getOperand(0);
812 if (Base.getOpcode() == ISD::FrameIndex) {
813 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
814 Base = CurDAG->getTargetFrameIndex(
815 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
817 Offset = CurDAG->getRegister(0, MVT::i32);
819 ARM_AM::AddrOpc AddSub = ARM_AM::add;
821 AddSub = ARM_AM::sub;
824 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
831 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
832 // Compute R +/- (R << N) and reuse it.
834 Offset = CurDAG->getRegister(0, MVT::i32);
835 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
841 // Otherwise this is R +/- [possibly shifted] R.
842 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
843 ARM_AM::ShiftOpc ShOpcVal =
844 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
847 Base = N.getOperand(0);
848 Offset = N.getOperand(1);
850 if (ShOpcVal != ARM_AM::no_shift) {
851 // Check to see if the RHS of the shift is a constant, if not, we can't fold
853 if (ConstantSDNode *Sh =
854 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
855 ShAmt = Sh->getZExtValue();
856 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
857 Offset = N.getOperand(1).getOperand(0);
860 ShOpcVal = ARM_AM::no_shift;
863 ShOpcVal = ARM_AM::no_shift;
867 // Try matching (R shl C) + (R).
868 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
869 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
870 N.getOperand(0).hasOneUse())) {
871 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
872 if (ShOpcVal != ARM_AM::no_shift) {
873 // Check to see if the RHS of the shift is a constant, if not, we can't
875 if (ConstantSDNode *Sh =
876 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
877 ShAmt = Sh->getZExtValue();
878 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
879 Offset = N.getOperand(0).getOperand(0);
880 Base = N.getOperand(1);
883 ShOpcVal = ARM_AM::no_shift;
886 ShOpcVal = ARM_AM::no_shift;
891 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
896 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
897 SDValue &Offset, SDValue &Opc) {
898 unsigned Opcode = Op->getOpcode();
899 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
900 ? cast<LoadSDNode>(Op)->getAddressingMode()
901 : cast<StoreSDNode>(Op)->getAddressingMode();
902 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
903 ? ARM_AM::add : ARM_AM::sub;
905 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
909 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
911 if (ShOpcVal != ARM_AM::no_shift) {
912 // Check to see if the RHS of the shift is a constant, if not, we can't fold
914 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
915 ShAmt = Sh->getZExtValue();
916 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
917 Offset = N.getOperand(0);
920 ShOpcVal = ARM_AM::no_shift;
923 ShOpcVal = ARM_AM::no_shift;
927 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
932 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
933 SDValue &Offset, SDValue &Opc) {
934 unsigned Opcode = Op->getOpcode();
935 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
936 ? cast<LoadSDNode>(Op)->getAddressingMode()
937 : cast<StoreSDNode>(Op)->getAddressingMode();
938 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
939 ? ARM_AM::add : ARM_AM::sub;
941 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
942 if (AddSub == ARM_AM::sub) Val *= -1;
943 Offset = CurDAG->getRegister(0, MVT::i32);
944 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
952 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
953 SDValue &Offset, SDValue &Opc) {
954 unsigned Opcode = Op->getOpcode();
955 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
956 ? cast<LoadSDNode>(Op)->getAddressingMode()
957 : cast<StoreSDNode>(Op)->getAddressingMode();
958 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
959 ? ARM_AM::add : ARM_AM::sub;
961 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
962 Offset = CurDAG->getRegister(0, MVT::i32);
963 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
965 SDLoc(Op), MVT::i32);
972 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
977 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
978 SDValue &Base, SDValue &Offset,
980 if (N.getOpcode() == ISD::SUB) {
981 // X - C is canonicalize to X + -C, no need to handle it here.
982 Base = N.getOperand(0);
983 Offset = N.getOperand(1);
984 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
989 if (!CurDAG->isBaseWithConstantOffset(N)) {
991 if (N.getOpcode() == ISD::FrameIndex) {
992 int FI = cast<FrameIndexSDNode>(N)->getIndex();
993 Base = CurDAG->getTargetFrameIndex(
994 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
996 Offset = CurDAG->getRegister(0, MVT::i32);
997 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1002 // If the RHS is +/- imm8, fold into addr mode.
1004 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1005 -256 + 1, 256, RHSC)) { // 8 bits.
1006 Base = N.getOperand(0);
1007 if (Base.getOpcode() == ISD::FrameIndex) {
1008 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1009 Base = CurDAG->getTargetFrameIndex(
1010 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1012 Offset = CurDAG->getRegister(0, MVT::i32);
1014 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1016 AddSub = ARM_AM::sub;
1019 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1024 Base = N.getOperand(0);
1025 Offset = N.getOperand(1);
1026 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1031 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1032 SDValue &Offset, SDValue &Opc) {
1033 unsigned Opcode = Op->getOpcode();
1034 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1035 ? cast<LoadSDNode>(Op)->getAddressingMode()
1036 : cast<StoreSDNode>(Op)->getAddressingMode();
1037 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1038 ? ARM_AM::add : ARM_AM::sub;
1040 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1041 Offset = CurDAG->getRegister(0, MVT::i32);
1042 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1048 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1053 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1054 SDValue &Base, SDValue &Offset) {
1055 if (!CurDAG->isBaseWithConstantOffset(N)) {
1057 if (N.getOpcode() == ISD::FrameIndex) {
1058 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1059 Base = CurDAG->getTargetFrameIndex(
1060 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1061 } else if (N.getOpcode() == ARMISD::Wrapper &&
1062 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1063 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1064 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1065 Base = N.getOperand(0);
1067 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1068 SDLoc(N), MVT::i32);
1072 // If the RHS is +/- imm8, fold into addr mode.
1074 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1075 -256 + 1, 256, RHSC)) {
1076 Base = N.getOperand(0);
1077 if (Base.getOpcode() == ISD::FrameIndex) {
1078 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1079 Base = CurDAG->getTargetFrameIndex(
1080 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1083 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1085 AddSub = ARM_AM::sub;
1088 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1089 SDLoc(N), MVT::i32);
1094 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1095 SDLoc(N), MVT::i32);
1099 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1103 unsigned Alignment = 0;
1105 MemSDNode *MemN = cast<MemSDNode>(Parent);
1107 if (isa<LSBaseSDNode>(MemN) ||
1108 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1109 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1110 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1111 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1112 // The maximum alignment is equal to the memory size being referenced.
1113 unsigned MMOAlign = MemN->getAlignment();
1114 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1115 if (MMOAlign >= MemSize && MemSize > 1)
1116 Alignment = MemSize;
1118 // All other uses of addrmode6 are for intrinsics. For now just record
1119 // the raw alignment value; it will be refined later based on the legal
1120 // alignment operands for the intrinsic.
1121 Alignment = MemN->getAlignment();
1124 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1128 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1130 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1131 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1132 if (AM != ISD::POST_INC)
1135 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1136 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1137 Offset = CurDAG->getRegister(0, MVT::i32);
1142 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1143 SDValue &Offset, SDValue &Label) {
1144 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1145 Offset = N.getOperand(0);
1146 SDValue N1 = N.getOperand(1);
1147 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1148 SDLoc(N), MVT::i32);
1156 //===----------------------------------------------------------------------===//
1157 // Thumb Addressing Modes
1158 //===----------------------------------------------------------------------===//
1160 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1161 SDValue &Base, SDValue &Offset){
1162 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1163 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1164 if (!NC || !NC->isNullValue())
1171 Base = N.getOperand(0);
1172 Offset = N.getOperand(1);
1177 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1178 SDValue &Base, SDValue &OffImm) {
1179 if (!CurDAG->isBaseWithConstantOffset(N)) {
1180 if (N.getOpcode() == ISD::ADD) {
1181 return false; // We want to select register offset instead
1182 } else if (N.getOpcode() == ARMISD::Wrapper &&
1183 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1184 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1185 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1186 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1187 Base = N.getOperand(0);
1192 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1196 // If the RHS is + imm5 * scale, fold into addr mode.
1198 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1199 Base = N.getOperand(0);
1200 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1204 // Offset is too large, so use register offset instead.
1209 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1211 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1215 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1217 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1221 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1223 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1227 SDValue &Base, SDValue &OffImm) {
1228 if (N.getOpcode() == ISD::FrameIndex) {
1229 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1230 // Only multiples of 4 are allowed for the offset, so the frame object
1231 // alignment must be at least 4.
1232 MachineFrameInfo &MFI = MF->getFrameInfo();
1233 if (MFI.getObjectAlignment(FI) < 4)
1234 MFI.setObjectAlignment(FI, 4);
1235 Base = CurDAG->getTargetFrameIndex(
1236 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1237 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1241 if (!CurDAG->isBaseWithConstantOffset(N))
1244 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1245 if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1246 (LHSR && LHSR->getReg() == ARM::SP)) {
1247 // If the RHS is + imm8 * scale, fold into addr mode.
1249 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1250 Base = N.getOperand(0);
1251 if (Base.getOpcode() == ISD::FrameIndex) {
1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1254 // indexed by the LHS must be 4-byte aligned.
1255 MachineFrameInfo &MFI = MF->getFrameInfo();
1256 if (MFI.getObjectAlignment(FI) < 4)
1257 MFI.setObjectAlignment(FI, 4);
1258 Base = CurDAG->getTargetFrameIndex(
1259 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1261 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1270 //===----------------------------------------------------------------------===//
1271 // Thumb 2 Addressing Modes
1272 //===----------------------------------------------------------------------===//
1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1276 SDValue &Base, SDValue &OffImm) {
1277 // Match simple R + imm12 operands.
1280 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1281 !CurDAG->isBaseWithConstantOffset(N)) {
1282 if (N.getOpcode() == ISD::FrameIndex) {
1283 // Match frame index.
1284 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1285 Base = CurDAG->getTargetFrameIndex(
1286 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1287 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1291 if (N.getOpcode() == ARMISD::Wrapper &&
1292 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1293 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1295 Base = N.getOperand(0);
1296 if (Base.getOpcode() == ISD::TargetConstantPool)
1297 return false; // We want to select t2LDRpci instead.
1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1304 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305 if (SelectT2AddrModeImm8(N, Base, OffImm))
1306 // Let t2LDRi8 handle (R - imm8).
1309 int RHSC = (int)RHS->getZExtValue();
1310 if (N.getOpcode() == ISD::SUB)
1313 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1314 Base = N.getOperand(0);
1315 if (Base.getOpcode() == ISD::FrameIndex) {
1316 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1317 Base = CurDAG->getTargetFrameIndex(
1318 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1320 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1327 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1331 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1332 SDValue &Base, SDValue &OffImm) {
1333 // Match simple R - imm8 operands.
1334 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1335 !CurDAG->isBaseWithConstantOffset(N))
1338 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1339 int RHSC = (int)RHS->getSExtValue();
1340 if (N.getOpcode() == ISD::SUB)
1343 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1344 Base = N.getOperand(0);
1345 if (Base.getOpcode() == ISD::FrameIndex) {
1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1347 Base = CurDAG->getTargetFrameIndex(
1348 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1350 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1358 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1360 unsigned Opcode = Op->getOpcode();
1361 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1362 ? cast<LoadSDNode>(Op)->getAddressingMode()
1363 : cast<StoreSDNode>(Op)->getAddressingMode();
1365 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1366 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1367 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1368 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1375 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1377 SDValue &OffReg, SDValue &ShImm) {
1378 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1379 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1383 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1384 int RHSC = (int)RHS->getZExtValue();
1385 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1387 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1391 // Look for (R + R) or (R + (R << [1,2,3])).
1393 Base = N.getOperand(0);
1394 OffReg = N.getOperand(1);
1396 // Swap if it is ((R << c) + R).
1397 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1398 if (ShOpcVal != ARM_AM::lsl) {
1399 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1400 if (ShOpcVal == ARM_AM::lsl)
1401 std::swap(Base, OffReg);
1404 if (ShOpcVal == ARM_AM::lsl) {
1405 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1407 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1408 ShAmt = Sh->getZExtValue();
1409 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1410 OffReg = OffReg.getOperand(0);
1417 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1418 // and use it in a shifted operand do so.
1419 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1420 unsigned PowerOfTwo = 0;
1421 SDValue NewMulConst;
1422 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1423 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1428 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1433 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1435 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1438 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1440 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1443 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1447 uint32_t RHSC = (int)RHS->getZExtValue();
1448 if (RHSC > 1020 || RHSC % 4 != 0)
1451 Base = N.getOperand(0);
1452 if (Base.getOpcode() == ISD::FrameIndex) {
1453 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1454 Base = CurDAG->getTargetFrameIndex(
1455 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1458 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1462 //===--------------------------------------------------------------------===//
1464 /// getAL - Returns a ARMCC::AL immediate node.
1465 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1466 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1469 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1470 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1471 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
1472 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
1475 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1476 LoadSDNode *LD = cast<LoadSDNode>(N);
1477 ISD::MemIndexedMode AM = LD->getAddressingMode();
1478 if (AM == ISD::UNINDEXED)
1481 EVT LoadedVT = LD->getMemoryVT();
1482 SDValue Offset, AMOpc;
1483 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1484 unsigned Opcode = 0;
1486 if (LoadedVT == MVT::i32 && isPre &&
1487 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1488 Opcode = ARM::LDR_PRE_IMM;
1490 } else if (LoadedVT == MVT::i32 && !isPre &&
1491 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1492 Opcode = ARM::LDR_POST_IMM;
1494 } else if (LoadedVT == MVT::i32 &&
1495 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1496 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1499 } else if (LoadedVT == MVT::i16 &&
1500 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1502 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1503 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1504 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1505 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1506 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1507 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1509 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1513 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1515 Opcode = ARM::LDRB_PRE_IMM;
1516 } else if (!isPre &&
1517 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1519 Opcode = ARM::LDRB_POST_IMM;
1520 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1522 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1528 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1529 SDValue Chain = LD->getChain();
1530 SDValue Base = LD->getBasePtr();
1531 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1532 CurDAG->getRegister(0, MVT::i32), Chain };
1533 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1535 transferMemOperands(N, New);
1536 ReplaceNode(N, New);
1539 SDValue Chain = LD->getChain();
1540 SDValue Base = LD->getBasePtr();
1541 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1542 CurDAG->getRegister(0, MVT::i32), Chain };
1543 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1545 transferMemOperands(N, New);
1546 ReplaceNode(N, New);
1554 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1555 LoadSDNode *LD = cast<LoadSDNode>(N);
1556 EVT LoadedVT = LD->getMemoryVT();
1557 ISD::MemIndexedMode AM = LD->getAddressingMode();
1558 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1559 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1562 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1563 if (!COffs || COffs->getZExtValue() != 4)
1566 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1567 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1568 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1570 SDValue Chain = LD->getChain();
1571 SDValue Base = LD->getBasePtr();
1572 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1573 CurDAG->getRegister(0, MVT::i32), Chain };
1574 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1575 MVT::i32, MVT::Other, Ops);
1576 transferMemOperands(N, New);
1577 ReplaceNode(N, New);
1581 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1582 LoadSDNode *LD = cast<LoadSDNode>(N);
1583 ISD::MemIndexedMode AM = LD->getAddressingMode();
1584 if (AM == ISD::UNINDEXED)
1587 EVT LoadedVT = LD->getMemoryVT();
1588 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1590 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1591 unsigned Opcode = 0;
1593 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1594 switch (LoadedVT.getSimpleVT().SimpleTy) {
1596 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1600 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1602 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1607 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1609 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1618 SDValue Chain = LD->getChain();
1619 SDValue Base = LD->getBasePtr();
1620 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1621 CurDAG->getRegister(0, MVT::i32), Chain };
1622 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1624 transferMemOperands(N, New);
1625 ReplaceNode(N, New);
1632 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1633 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1634 SDLoc dl(V0.getNode());
1636 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1637 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1638 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1639 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1640 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1643 /// \brief Form a D register from a pair of S registers.
1644 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1645 SDLoc dl(V0.getNode());
1647 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1648 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1649 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1650 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1651 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1654 /// \brief Form a quad register from a pair of D registers.
1655 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1656 SDLoc dl(V0.getNode());
1657 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1659 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1660 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1661 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1662 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1665 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1666 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1667 SDLoc dl(V0.getNode());
1668 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1670 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1671 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1672 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1673 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1676 /// \brief Form 4 consecutive S registers.
1677 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1678 SDValue V2, SDValue V3) {
1679 SDLoc dl(V0.getNode());
1681 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1682 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1683 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1684 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1685 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1686 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1687 V2, SubReg2, V3, SubReg3 };
1688 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1691 /// \brief Form 4 consecutive D registers.
1692 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1693 SDValue V2, SDValue V3) {
1694 SDLoc dl(V0.getNode());
1695 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1697 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1698 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1699 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1700 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1701 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1702 V2, SubReg2, V3, SubReg3 };
1703 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1706 /// \brief Form 4 consecutive Q registers.
1707 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1708 SDValue V2, SDValue V3) {
1709 SDLoc dl(V0.getNode());
1710 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1712 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1713 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1714 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1715 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1716 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1717 V2, SubReg2, V3, SubReg3 };
1718 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1721 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1722 /// of a NEON VLD or VST instruction. The supported values depend on the
1723 /// number of registers being loaded.
1724 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1725 unsigned NumVecs, bool is64BitVector) {
1726 unsigned NumRegs = NumVecs;
1727 if (!is64BitVector && NumVecs < 3)
1730 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1731 if (Alignment >= 32 && NumRegs == 4)
1733 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1735 else if (Alignment >= 8)
1740 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1743 static bool isVLDfixed(unsigned Opc)
1746 default: return false;
1747 case ARM::VLD1d8wb_fixed : return true;
1748 case ARM::VLD1d16wb_fixed : return true;
1749 case ARM::VLD1d64Qwb_fixed : return true;
1750 case ARM::VLD1d32wb_fixed : return true;
1751 case ARM::VLD1d64wb_fixed : return true;
1752 case ARM::VLD1d64TPseudoWB_fixed : return true;
1753 case ARM::VLD1d64QPseudoWB_fixed : return true;
1754 case ARM::VLD1q8wb_fixed : return true;
1755 case ARM::VLD1q16wb_fixed : return true;
1756 case ARM::VLD1q32wb_fixed : return true;
1757 case ARM::VLD1q64wb_fixed : return true;
1758 case ARM::VLD1DUPd8wb_fixed : return true;
1759 case ARM::VLD1DUPd16wb_fixed : return true;
1760 case ARM::VLD1DUPd32wb_fixed : return true;
1761 case ARM::VLD1DUPq8wb_fixed : return true;
1762 case ARM::VLD1DUPq16wb_fixed : return true;
1763 case ARM::VLD1DUPq32wb_fixed : return true;
1764 case ARM::VLD2d8wb_fixed : return true;
1765 case ARM::VLD2d16wb_fixed : return true;
1766 case ARM::VLD2d32wb_fixed : return true;
1767 case ARM::VLD2q8PseudoWB_fixed : return true;
1768 case ARM::VLD2q16PseudoWB_fixed : return true;
1769 case ARM::VLD2q32PseudoWB_fixed : return true;
1770 case ARM::VLD2DUPd8wb_fixed : return true;
1771 case ARM::VLD2DUPd16wb_fixed : return true;
1772 case ARM::VLD2DUPd32wb_fixed : return true;
1776 static bool isVSTfixed(unsigned Opc)
1779 default: return false;
1780 case ARM::VST1d8wb_fixed : return true;
1781 case ARM::VST1d16wb_fixed : return true;
1782 case ARM::VST1d32wb_fixed : return true;
1783 case ARM::VST1d64wb_fixed : return true;
1784 case ARM::VST1q8wb_fixed : return true;
1785 case ARM::VST1q16wb_fixed : return true;
1786 case ARM::VST1q32wb_fixed : return true;
1787 case ARM::VST1q64wb_fixed : return true;
1788 case ARM::VST1d64TPseudoWB_fixed : return true;
1789 case ARM::VST1d64QPseudoWB_fixed : return true;
1790 case ARM::VST2d8wb_fixed : return true;
1791 case ARM::VST2d16wb_fixed : return true;
1792 case ARM::VST2d32wb_fixed : return true;
1793 case ARM::VST2q8PseudoWB_fixed : return true;
1794 case ARM::VST2q16PseudoWB_fixed : return true;
1795 case ARM::VST2q32PseudoWB_fixed : return true;
1799 // Get the register stride update opcode of a VLD/VST instruction that
1800 // is otherwise equivalent to the given fixed stride updating instruction.
1801 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1802 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1803 && "Incorrect fixed stride updating instruction.");
1806 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1807 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1808 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1809 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1810 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1811 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1812 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1813 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1814 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1815 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1816 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1817 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1818 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1819 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1820 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1821 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1822 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1823 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1825 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1826 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1827 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1828 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1829 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1830 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1831 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1832 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1833 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1834 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1836 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1837 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1838 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1839 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1840 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1841 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1843 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1844 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1845 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1846 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1847 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1848 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1850 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1851 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1852 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1854 return Opc; // If not one we handle, return it unchanged.
1857 /// Returns true if the given increment is a Constant known to be equal to the
1858 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1860 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1861 auto C = dyn_cast<ConstantSDNode>(Inc);
1862 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1865 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1866 const uint16_t *DOpcodes,
1867 const uint16_t *QOpcodes0,
1868 const uint16_t *QOpcodes1) {
1869 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1872 SDValue MemAddr, Align;
1873 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1874 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1877 SDValue Chain = N->getOperand(0);
1878 EVT VT = N->getValueType(0);
1879 bool is64BitVector = VT.is64BitVector();
1880 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1882 unsigned OpcodeIndex;
1883 switch (VT.getSimpleVT().SimpleTy) {
1884 default: llvm_unreachable("unhandled vld type");
1885 // Double-register operations:
1886 case MVT::v8i8: OpcodeIndex = 0; break;
1887 case MVT::v4i16: OpcodeIndex = 1; break;
1889 case MVT::v2i32: OpcodeIndex = 2; break;
1890 case MVT::v1i64: OpcodeIndex = 3; break;
1891 // Quad-register operations:
1892 case MVT::v16i8: OpcodeIndex = 0; break;
1893 case MVT::v8i16: OpcodeIndex = 1; break;
1895 case MVT::v4i32: OpcodeIndex = 2; break;
1897 case MVT::v2i64: OpcodeIndex = 3;
1898 assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1906 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1909 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1911 std::vector<EVT> ResTys;
1912 ResTys.push_back(ResTy);
1914 ResTys.push_back(MVT::i32);
1915 ResTys.push_back(MVT::Other);
1917 SDValue Pred = getAL(CurDAG, dl);
1918 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1920 SmallVector<SDValue, 7> Ops;
1922 // Double registers and VLD1/VLD2 quad registers are directly supported.
1923 if (is64BitVector || NumVecs <= 2) {
1924 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1925 QOpcodes0[OpcodeIndex]);
1926 Ops.push_back(MemAddr);
1927 Ops.push_back(Align);
1929 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1930 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1931 // case entirely when the rest are updated to that form, too.
1932 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1933 if ((NumVecs <= 2) && !IsImmUpdate)
1934 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1935 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1936 // check for that explicitly too. Horribly hacky, but temporary.
1937 if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
1938 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
1940 Ops.push_back(Pred);
1941 Ops.push_back(Reg0);
1942 Ops.push_back(Chain);
1943 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1946 // Otherwise, quad registers are loaded with two separate instructions,
1947 // where one loads the even registers and the other loads the odd registers.
1948 EVT AddrTy = MemAddr.getValueType();
1950 // Load the even subregs. This is always an updating load, so that it
1951 // provides the address to the second load for the odd subregs.
1953 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1954 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1955 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1956 ResTy, AddrTy, MVT::Other, OpsA);
1957 Chain = SDValue(VLdA, 2);
1959 // Load the odd subregs.
1960 Ops.push_back(SDValue(VLdA, 1));
1961 Ops.push_back(Align);
1963 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1964 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1965 "only constant post-increment update allowed for VLD3/4");
1967 Ops.push_back(Reg0);
1969 Ops.push_back(SDValue(VLdA, 0));
1970 Ops.push_back(Pred);
1971 Ops.push_back(Reg0);
1972 Ops.push_back(Chain);
1973 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1976 // Transfer memoperands.
1977 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1978 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1979 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1982 ReplaceNode(N, VLd);
1986 // Extract out the subregisters.
1987 SDValue SuperReg = SDValue(VLd, 0);
1988 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1989 ARM::qsub_3 == ARM::qsub_0 + 3,
1990 "Unexpected subreg numbering");
1991 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1992 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1993 ReplaceUses(SDValue(N, Vec),
1994 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1995 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1997 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1998 CurDAG->RemoveDeadNode(N);
2001 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2002 const uint16_t *DOpcodes,
2003 const uint16_t *QOpcodes0,
2004 const uint16_t *QOpcodes1) {
2005 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2008 SDValue MemAddr, Align;
2009 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2010 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2011 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2014 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2015 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2017 SDValue Chain = N->getOperand(0);
2018 EVT VT = N->getOperand(Vec0Idx).getValueType();
2019 bool is64BitVector = VT.is64BitVector();
2020 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2022 unsigned OpcodeIndex;
2023 switch (VT.getSimpleVT().SimpleTy) {
2024 default: llvm_unreachable("unhandled vst type");
2025 // Double-register operations:
2026 case MVT::v8i8: OpcodeIndex = 0; break;
2027 case MVT::v4i16: OpcodeIndex = 1; break;
2029 case MVT::v2i32: OpcodeIndex = 2; break;
2030 case MVT::v1i64: OpcodeIndex = 3; break;
2031 // Quad-register operations:
2032 case MVT::v16i8: OpcodeIndex = 0; break;
2033 case MVT::v8i16: OpcodeIndex = 1; break;
2035 case MVT::v4i32: OpcodeIndex = 2; break;
2037 case MVT::v2i64: OpcodeIndex = 3;
2038 assert(NumVecs == 1 && "v2i64 type only supported for VST1");
2042 std::vector<EVT> ResTys;
2044 ResTys.push_back(MVT::i32);
2045 ResTys.push_back(MVT::Other);
2047 SDValue Pred = getAL(CurDAG, dl);
2048 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2049 SmallVector<SDValue, 7> Ops;
2051 // Double registers and VST1/VST2 quad registers are directly supported.
2052 if (is64BitVector || NumVecs <= 2) {
2055 SrcReg = N->getOperand(Vec0Idx);
2056 } else if (is64BitVector) {
2057 // Form a REG_SEQUENCE to force register allocation.
2058 SDValue V0 = N->getOperand(Vec0Idx + 0);
2059 SDValue V1 = N->getOperand(Vec0Idx + 1);
2061 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2063 SDValue V2 = N->getOperand(Vec0Idx + 2);
2064 // If it's a vst3, form a quad D-register and leave the last part as
2066 SDValue V3 = (NumVecs == 3)
2067 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2068 : N->getOperand(Vec0Idx + 3);
2069 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2072 // Form a QQ register.
2073 SDValue Q0 = N->getOperand(Vec0Idx);
2074 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2075 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2078 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2079 QOpcodes0[OpcodeIndex]);
2080 Ops.push_back(MemAddr);
2081 Ops.push_back(Align);
2083 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2084 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2085 // case entirely when the rest are updated to that form, too.
2086 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2087 if (NumVecs <= 2 && !IsImmUpdate)
2088 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2089 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2090 // check for that explicitly too. Horribly hacky, but temporary.
2093 else if (NumVecs > 2 && !isVSTfixed(Opc))
2094 Ops.push_back(Reg0);
2096 Ops.push_back(SrcReg);
2097 Ops.push_back(Pred);
2098 Ops.push_back(Reg0);
2099 Ops.push_back(Chain);
2100 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2102 // Transfer memoperands.
2103 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2105 ReplaceNode(N, VSt);
2109 // Otherwise, quad registers are stored with two separate instructions,
2110 // where one stores the even registers and the other stores the odd registers.
2112 // Form the QQQQ REG_SEQUENCE.
2113 SDValue V0 = N->getOperand(Vec0Idx + 0);
2114 SDValue V1 = N->getOperand(Vec0Idx + 1);
2115 SDValue V2 = N->getOperand(Vec0Idx + 2);
2116 SDValue V3 = (NumVecs == 3)
2117 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2118 : N->getOperand(Vec0Idx + 3);
2119 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2121 // Store the even D registers. This is always an updating store, so that it
2122 // provides the address to the second store for the odd subregs.
2123 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2124 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2125 MemAddr.getValueType(),
2127 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2128 Chain = SDValue(VStA, 1);
2130 // Store the odd D registers.
2131 Ops.push_back(SDValue(VStA, 0));
2132 Ops.push_back(Align);
2134 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2135 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2136 "only constant post-increment update allowed for VST3/4");
2138 Ops.push_back(Reg0);
2140 Ops.push_back(RegSeq);
2141 Ops.push_back(Pred);
2142 Ops.push_back(Reg0);
2143 Ops.push_back(Chain);
2144 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2146 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2147 ReplaceNode(N, VStB);
2150 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2152 const uint16_t *DOpcodes,
2153 const uint16_t *QOpcodes) {
2154 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2157 SDValue MemAddr, Align;
2158 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2159 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2160 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2163 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2164 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2166 SDValue Chain = N->getOperand(0);
2168 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2169 EVT VT = N->getOperand(Vec0Idx).getValueType();
2170 bool is64BitVector = VT.is64BitVector();
2172 unsigned Alignment = 0;
2174 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2175 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2176 if (Alignment > NumBytes)
2177 Alignment = NumBytes;
2178 if (Alignment < 8 && Alignment < NumBytes)
2180 // Alignment must be a power of two; make sure of that.
2181 Alignment = (Alignment & -Alignment);
2185 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2187 unsigned OpcodeIndex;
2188 switch (VT.getSimpleVT().SimpleTy) {
2189 default: llvm_unreachable("unhandled vld/vst lane type");
2190 // Double-register operations:
2191 case MVT::v8i8: OpcodeIndex = 0; break;
2192 case MVT::v4i16: OpcodeIndex = 1; break;
2194 case MVT::v2i32: OpcodeIndex = 2; break;
2195 // Quad-register operations:
2196 case MVT::v8i16: OpcodeIndex = 0; break;
2198 case MVT::v4i32: OpcodeIndex = 1; break;
2201 std::vector<EVT> ResTys;
2203 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2206 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2207 MVT::i64, ResTyElts));
2210 ResTys.push_back(MVT::i32);
2211 ResTys.push_back(MVT::Other);
2213 SDValue Pred = getAL(CurDAG, dl);
2214 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2216 SmallVector<SDValue, 8> Ops;
2217 Ops.push_back(MemAddr);
2218 Ops.push_back(Align);
2220 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2222 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2223 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2227 SDValue V0 = N->getOperand(Vec0Idx + 0);
2228 SDValue V1 = N->getOperand(Vec0Idx + 1);
2231 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2233 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2235 SDValue V2 = N->getOperand(Vec0Idx + 2);
2236 SDValue V3 = (NumVecs == 3)
2237 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2238 : N->getOperand(Vec0Idx + 3);
2240 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2242 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2244 Ops.push_back(SuperReg);
2245 Ops.push_back(getI32Imm(Lane, dl));
2246 Ops.push_back(Pred);
2247 Ops.push_back(Reg0);
2248 Ops.push_back(Chain);
2250 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2251 QOpcodes[OpcodeIndex]);
2252 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2253 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2255 ReplaceNode(N, VLdLn);
2259 // Extract the subregisters.
2260 SuperReg = SDValue(VLdLn, 0);
2261 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2262 ARM::qsub_3 == ARM::qsub_0 + 3,
2263 "Unexpected subreg numbering");
2264 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2265 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2266 ReplaceUses(SDValue(N, Vec),
2267 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2268 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2270 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2271 CurDAG->RemoveDeadNode(N);
2274 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2275 const uint16_t *DOpcodes,
2276 const uint16_t *QOpcodes) {
2277 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2280 SDValue MemAddr, Align;
2281 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2284 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2285 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2287 SDValue Chain = N->getOperand(0);
2288 EVT VT = N->getValueType(0);
2290 unsigned Alignment = 0;
2292 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2293 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2294 if (Alignment > NumBytes)
2295 Alignment = NumBytes;
2296 if (Alignment < 8 && Alignment < NumBytes)
2298 // Alignment must be a power of two; make sure of that.
2299 Alignment = (Alignment & -Alignment);
2303 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2306 switch (VT.getSimpleVT().SimpleTy) {
2307 default: llvm_unreachable("unhandled vld-dup type");
2308 case MVT::v8i8: Opc = DOpcodes[0]; break;
2309 case MVT::v16i8: Opc = QOpcodes[0]; break;
2310 case MVT::v4i16: Opc = DOpcodes[1]; break;
2311 case MVT::v8i16: Opc = QOpcodes[1]; break;
2313 case MVT::v2i32: Opc = DOpcodes[2]; break;
2315 case MVT::v4i32: Opc = QOpcodes[2]; break;
2318 SDValue Pred = getAL(CurDAG, dl);
2319 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2320 SmallVector<SDValue, 6> Ops;
2321 Ops.push_back(MemAddr);
2322 Ops.push_back(Align);
2324 // fixed-stride update instructions don't have an explicit writeback
2325 // operand. It's implicit in the opcode itself.
2326 SDValue Inc = N->getOperand(2);
2328 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2329 if (NumVecs <= 2 && !IsImmUpdate)
2330 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2333 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2334 else if (NumVecs > 2)
2335 Ops.push_back(Reg0);
2337 Ops.push_back(Pred);
2338 Ops.push_back(Reg0);
2339 Ops.push_back(Chain);
2341 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2342 std::vector<EVT> ResTys;
2343 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2345 ResTys.push_back(MVT::i32);
2346 ResTys.push_back(MVT::Other);
2347 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2348 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2350 // Extract the subregisters.
2352 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2354 SDValue SuperReg = SDValue(VLdDup, 0);
2355 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2356 unsigned SubIdx = ARM::dsub_0;
2357 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2358 ReplaceUses(SDValue(N, Vec),
2359 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2361 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2363 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2364 CurDAG->RemoveDeadNode(N);
2367 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2368 if (!Subtarget->hasV6T2Ops())
2371 unsigned Opc = isSigned
2372 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2373 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2376 // For unsigned extracts, check for a shift right and mask
2377 unsigned And_imm = 0;
2378 if (N->getOpcode() == ISD::AND) {
2379 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2381 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2382 if (And_imm & (And_imm + 1))
2385 unsigned Srl_imm = 0;
2386 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2388 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2390 // Note: The width operand is encoded as width-1.
2391 unsigned Width = countTrailingOnes(And_imm) - 1;
2392 unsigned LSB = Srl_imm;
2394 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2396 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2397 // It's cheaper to use a right shift to extract the top bits.
2398 if (Subtarget->isThumb()) {
2399 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2400 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2401 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2402 getAL(CurDAG, dl), Reg0, Reg0 };
2403 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2407 // ARM models shift instructions as MOVsi with shifter operand.
2408 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2410 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2412 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2413 getAL(CurDAG, dl), Reg0, Reg0 };
2414 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2418 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2419 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2420 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2421 getAL(CurDAG, dl), Reg0 };
2422 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2429 // Otherwise, we're looking for a shift of a shift
2430 unsigned Shl_imm = 0;
2431 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2432 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2433 unsigned Srl_imm = 0;
2434 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2435 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2436 // Note: The width operand is encoded as width-1.
2437 unsigned Width = 32 - Srl_imm - 1;
2438 int LSB = Srl_imm - Shl_imm;
2441 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2442 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2443 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2444 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2445 getAL(CurDAG, dl), Reg0 };
2446 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2451 // Or we are looking for a shift of an and, with a mask operand
2452 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2453 isShiftedMask_32(And_imm)) {
2454 unsigned Srl_imm = 0;
2455 unsigned LSB = countTrailingZeros(And_imm);
2456 // Shift must be the same as the ands lsb
2457 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2458 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2459 unsigned MSB = 31 - countLeadingZeros(And_imm);
2460 // Note: The width operand is encoded as width-1.
2461 unsigned Width = MSB - LSB;
2462 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2463 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2464 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2465 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2466 getAL(CurDAG, dl), Reg0 };
2467 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2472 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2473 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2475 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2476 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2479 if (LSB + Width > 32)
2482 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2483 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2484 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2485 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2486 getAL(CurDAG, dl), Reg0 };
2487 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2494 /// Target-specific DAG combining for ISD::XOR.
2495 /// Target-independent combining lowers SELECT_CC nodes of the form
2496 /// select_cc setg[ge] X, 0, X, -X
2497 /// select_cc setgt X, -1, X, -X
2498 /// select_cc setl[te] X, 0, -X, X
2499 /// select_cc setlt X, 1, -X, X
2500 /// which represent Integer ABS into:
2501 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2502 /// ARM instruction selection detects the latter and matches it to
2503 /// ARM::ABS or ARM::t2ABS machine node.
2504 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2505 SDValue XORSrc0 = N->getOperand(0);
2506 SDValue XORSrc1 = N->getOperand(1);
2507 EVT VT = N->getValueType(0);
2509 if (Subtarget->isThumb1Only())
2512 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2515 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2516 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2517 SDValue SRASrc0 = XORSrc1.getOperand(0);
2518 SDValue SRASrc1 = XORSrc1.getOperand(1);
2519 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2520 EVT XType = SRASrc0.getValueType();
2521 unsigned Size = XType.getSizeInBits() - 1;
2523 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2524 XType.isInteger() && SRAConstant != nullptr &&
2525 Size == SRAConstant->getZExtValue()) {
2526 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2527 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2534 /// We've got special pseudo-instructions for these
2535 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2537 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2538 if (MemTy == MVT::i8)
2539 Opcode = ARM::CMP_SWAP_8;
2540 else if (MemTy == MVT::i16)
2541 Opcode = ARM::CMP_SWAP_16;
2542 else if (MemTy == MVT::i32)
2543 Opcode = ARM::CMP_SWAP_32;
2545 llvm_unreachable("Unknown AtomicCmpSwap type");
2547 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2549 SDNode *CmpSwap = CurDAG->getMachineNode(
2551 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2553 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2554 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2555 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2557 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2558 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2559 CurDAG->RemoveDeadNode(N);
2562 static Optional<std::pair<unsigned, unsigned>>
2563 getContiguousRangeOfSetBits(const APInt &A) {
2564 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2565 unsigned LastOne = A.countTrailingZeros();
2566 if (A.countPopulation() != (FirstOne - LastOne + 1))
2567 return Optional<std::pair<unsigned,unsigned>>();
2568 return std::make_pair(FirstOne, LastOne);
2571 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2572 assert(N->getOpcode() == ARMISD::CMPZ);
2573 SwitchEQNEToPLMI = false;
2575 if (!Subtarget->isThumb())
2576 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2577 // LSR don't exist as standalone instructions - they need the barrel shifter.
2580 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2581 SDValue And = N->getOperand(0);
2582 if (!And->hasOneUse())
2585 SDValue Zero = N->getOperand(1);
2586 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2587 And->getOpcode() != ISD::AND)
2589 SDValue X = And.getOperand(0);
2590 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2592 if (!C || !X->hasOneUse())
2594 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2598 // There are several ways to lower this:
2602 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2603 if (Subtarget->isThumb2()) {
2604 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2605 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2606 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2607 CurDAG->getRegister(0, MVT::i32) };
2608 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2610 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2611 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2612 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2613 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2617 if (Range->second == 0) {
2618 // 1. Mask includes the LSB -> Simply shift the top N bits off
2619 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620 ReplaceNode(And.getNode(), NewN);
2621 } else if (Range->first == 31) {
2622 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
2623 NewN = EmitShift(ARM::tLSRri, X, Range->second);
2624 ReplaceNode(And.getNode(), NewN);
2625 } else if (Range->first == Range->second) {
2626 // 3. Only one bit is set. We can shift this into the sign bit and use a
2627 // PL/MI comparison.
2628 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2629 ReplaceNode(And.getNode(), NewN);
2631 SwitchEQNEToPLMI = true;
2632 } else if (!Subtarget->hasV6T2Ops()) {
2633 // 4. Do a double shift to clear bottom and top bits, but only in
2634 // thumb-1 mode as in thumb-2 we can use UBFX.
2635 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2636 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2637 Range->second + (31 - Range->first));
2638 ReplaceNode(And.getNode(), NewN);
2643 void ARMDAGToDAGISel::Select(SDNode *N) {
2646 if (N->isMachineOpcode()) {
2648 return; // Already selected.
2651 switch (N->getOpcode()) {
2653 case ISD::WRITE_REGISTER:
2654 if (tryWriteRegister(N))
2657 case ISD::READ_REGISTER:
2658 if (tryReadRegister(N))
2661 case ISD::INLINEASM:
2662 if (tryInlineAsm(N))
2666 // Select special operations if XOR node forms integer ABS pattern
2669 // Other cases are autogenerated.
2671 case ISD::Constant: {
2672 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2673 // If we can't materialize the constant we need to use a literal pool
2674 if (ConstantMaterializationCost(Val) > 2) {
2675 SDValue CPIdx = CurDAG->getTargetConstantPool(
2676 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2677 TLI->getPointerTy(CurDAG->getDataLayout()));
2680 if (Subtarget->isThumb()) {
2681 SDValue Pred = getAL(CurDAG, dl);
2682 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2683 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2684 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2689 CurDAG->getTargetConstant(0, dl, MVT::i32),
2691 CurDAG->getRegister(0, MVT::i32),
2692 CurDAG->getEntryNode()
2694 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2697 ReplaceNode(N, ResNode);
2701 // Other cases are autogenerated.
2704 case ISD::FrameIndex: {
2705 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2706 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2707 SDValue TFI = CurDAG->getTargetFrameIndex(
2708 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2709 if (Subtarget->isThumb1Only()) {
2710 // Set the alignment of the frame object to 4, to avoid having to generate
2711 // more than one ADD
2712 MachineFrameInfo &MFI = MF->getFrameInfo();
2713 if (MFI.getObjectAlignment(FI) < 4)
2714 MFI.setObjectAlignment(FI, 4);
2715 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2716 CurDAG->getTargetConstant(0, dl, MVT::i32));
2719 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2720 ARM::t2ADDri : ARM::ADDri);
2721 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2722 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2723 CurDAG->getRegister(0, MVT::i32) };
2724 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2729 if (tryV6T2BitfieldExtractOp(N, false))
2732 case ISD::SIGN_EXTEND_INREG:
2734 if (tryV6T2BitfieldExtractOp(N, true))
2738 if (Subtarget->isThumb1Only())
2740 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2741 unsigned RHSV = C->getZExtValue();
2743 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2744 unsigned ShImm = Log2_32(RHSV-1);
2747 SDValue V = N->getOperand(0);
2748 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2749 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2750 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2751 if (Subtarget->isThumb()) {
2752 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2753 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2756 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2758 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2762 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2763 unsigned ShImm = Log2_32(RHSV+1);
2766 SDValue V = N->getOperand(0);
2767 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2768 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2769 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2770 if (Subtarget->isThumb()) {
2771 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2772 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2775 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2777 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2784 // Check for unsigned bitfield extract
2785 if (tryV6T2BitfieldExtractOp(N, false))
2788 // If an immediate is used in an AND node, it is possible that the immediate
2789 // can be more optimally materialized when negated. If this is the case we
2790 // can negate the immediate and use a BIC instead.
2791 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2792 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2793 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2795 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2796 // immediate can be negated and fit in the immediate operand of
2797 // a t2BIC, don't do any manual transform here as this can be
2798 // handled by the generic ISel machinery.
2799 bool PreferImmediateEncoding =
2800 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2801 if (!PreferImmediateEncoding &&
2802 ConstantMaterializationCost(Imm) >
2803 ConstantMaterializationCost(~Imm)) {
2804 // The current immediate costs more to materialize than a negated
2805 // immediate, so negate the immediate and use a BIC.
2807 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2808 // If the new constant didn't exist before, reposition it in the topological
2809 // ordering so it is just before N. Otherwise, don't touch its location.
2810 if (NewImm->getNodeId() == -1)
2811 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2813 if (!Subtarget->hasThumb2()) {
2814 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2815 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2816 CurDAG->getRegister(0, MVT::i32)};
2817 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2820 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2821 CurDAG->getRegister(0, MVT::i32),
2822 CurDAG->getRegister(0, MVT::i32)};
2824 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2830 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2831 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2832 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2833 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2834 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2835 EVT VT = N->getValueType(0);
2838 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2840 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2843 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2844 N1C = dyn_cast<ConstantSDNode>(N1);
2847 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2848 SDValue N2 = N0.getOperand(1);
2849 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2852 unsigned N1CVal = N1C->getZExtValue();
2853 unsigned N2CVal = N2C->getZExtValue();
2854 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2855 (N1CVal & 0xffffU) == 0xffffU &&
2856 (N2CVal & 0xffffU) == 0x0U) {
2857 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2859 SDValue Ops[] = { N0.getOperand(0), Imm16,
2860 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2861 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2868 case ARMISD::UMAAL: {
2869 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2870 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2871 N->getOperand(2), N->getOperand(3),
2873 CurDAG->getRegister(0, MVT::i32) };
2874 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2877 case ARMISD::UMLAL:{
2878 if (Subtarget->isThumb()) {
2879 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2880 N->getOperand(3), getAL(CurDAG, dl),
2881 CurDAG->getRegister(0, MVT::i32)};
2883 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2886 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2887 N->getOperand(3), getAL(CurDAG, dl),
2888 CurDAG->getRegister(0, MVT::i32),
2889 CurDAG->getRegister(0, MVT::i32) };
2890 ReplaceNode(N, CurDAG->getMachineNode(
2891 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2892 MVT::i32, MVT::i32, Ops));
2896 case ARMISD::SMLAL:{
2897 if (Subtarget->isThumb()) {
2898 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2899 N->getOperand(3), getAL(CurDAG, dl),
2900 CurDAG->getRegister(0, MVT::i32)};
2902 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2905 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2906 N->getOperand(3), getAL(CurDAG, dl),
2907 CurDAG->getRegister(0, MVT::i32),
2908 CurDAG->getRegister(0, MVT::i32) };
2909 ReplaceNode(N, CurDAG->getMachineNode(
2910 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2911 MVT::i32, MVT::i32, Ops));
2915 case ARMISD::SUBE: {
2916 if (!Subtarget->hasV6Ops())
2918 // Look for a pattern to match SMMLS
2919 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2920 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2921 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2922 !SDValue(N, 1).use_empty())
2925 if (Subtarget->isThumb())
2926 assert(Subtarget->hasThumb2() &&
2927 "This pattern should not be generated for Thumb");
2929 SDValue SmulLoHi = N->getOperand(1);
2930 SDValue Subc = N->getOperand(2);
2931 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2933 if (!Zero || Zero->getZExtValue() != 0 ||
2934 Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2935 N->getOperand(1) != SmulLoHi.getValue(1) ||
2936 N->getOperand(2) != Subc.getValue(1))
2939 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2940 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2941 N->getOperand(0), getAL(CurDAG, dl),
2942 CurDAG->getRegister(0, MVT::i32) };
2943 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2947 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2948 if (tryT2IndexedLoad(N))
2950 } else if (Subtarget->isThumb()) {
2951 if (tryT1IndexedLoad(N))
2953 } else if (tryARMIndexedLoad(N))
2955 // Other cases are autogenerated.
2958 case ARMISD::BRCOND: {
2959 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2960 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2961 // Pattern complexity = 6 cost = 1 size = 0
2963 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2964 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2965 // Pattern complexity = 6 cost = 1 size = 0
2967 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2968 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2969 // Pattern complexity = 6 cost = 1 size = 0
2971 unsigned Opc = Subtarget->isThumb() ?
2972 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2973 SDValue Chain = N->getOperand(0);
2974 SDValue N1 = N->getOperand(1);
2975 SDValue N2 = N->getOperand(2);
2976 SDValue N3 = N->getOperand(3);
2977 SDValue InFlag = N->getOperand(4);
2978 assert(N1.getOpcode() == ISD::BasicBlock);
2979 assert(N2.getOpcode() == ISD::Constant);
2980 assert(N3.getOpcode() == ISD::Register);
2982 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
2984 if (InFlag.getOpcode() == ARMISD::CMPZ) {
2985 bool SwitchEQNEToPLMI;
2986 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
2987 InFlag = N->getOperand(4);
2989 if (SwitchEQNEToPLMI) {
2990 switch ((ARMCC::CondCodes)CC) {
2991 default: llvm_unreachable("CMPZ must be either NE or EQ!");
2993 CC = (unsigned)ARMCC::MI;
2996 CC = (unsigned)ARMCC::PL;
3002 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3003 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3004 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3006 Chain = SDValue(ResNode, 0);
3007 if (N->getNumValues() == 2) {
3008 InFlag = SDValue(ResNode, 1);
3009 ReplaceUses(SDValue(N, 1), InFlag);
3011 ReplaceUses(SDValue(N, 0),
3012 SDValue(Chain.getNode(), Chain.getResNo()));
3013 CurDAG->RemoveDeadNode(N);
3017 case ARMISD::CMPZ: {
3018 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3019 // This allows us to avoid materializing the expensive negative constant.
3020 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3021 // for its glue output.
3022 SDValue X = N->getOperand(0);
3023 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3024 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3025 int64_t Addend = -C->getSExtValue();
3027 SDNode *Add = nullptr;
3028 // ADDS can be better than CMN if the immediate fits in a
3029 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3030 // Outside that range we can just use a CMN which is 32-bit but has a
3031 // 12-bit immediate range.
3032 if (Addend < 1<<8) {
3033 if (Subtarget->isThumb2()) {
3034 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3035 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3036 CurDAG->getRegister(0, MVT::i32) };
3037 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3039 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3040 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3041 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3042 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3043 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3047 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3048 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3051 // Other cases are autogenerated.
3055 case ARMISD::CMOV: {
3056 SDValue InFlag = N->getOperand(4);
3058 if (InFlag.getOpcode() == ARMISD::CMPZ) {
3059 bool SwitchEQNEToPLMI;
3060 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3062 if (SwitchEQNEToPLMI) {
3063 SDValue ARMcc = N->getOperand(2);
3064 ARMCC::CondCodes CC =
3065 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3068 default: llvm_unreachable("CMPZ must be either NE or EQ!");
3076 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3077 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3078 N->getOperand(3), N->getOperand(4)};
3079 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3083 // Other cases are autogenerated.
3087 case ARMISD::VZIP: {
3089 EVT VT = N->getValueType(0);
3090 switch (VT.getSimpleVT().SimpleTy) {
3092 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3093 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3095 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3096 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3097 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3098 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3100 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3102 SDValue Pred = getAL(CurDAG, dl);
3103 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3104 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3105 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3108 case ARMISD::VUZP: {
3110 EVT VT = N->getValueType(0);
3111 switch (VT.getSimpleVT().SimpleTy) {
3113 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3114 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3116 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3117 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3118 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3119 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3121 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3123 SDValue Pred = getAL(CurDAG, dl);
3124 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3125 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3126 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3129 case ARMISD::VTRN: {
3131 EVT VT = N->getValueType(0);
3132 switch (VT.getSimpleVT().SimpleTy) {
3134 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3135 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3137 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3138 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3139 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3141 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3143 SDValue Pred = getAL(CurDAG, dl);
3144 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3145 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3146 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3149 case ARMISD::BUILD_VECTOR: {
3150 EVT VecVT = N->getValueType(0);
3151 EVT EltVT = VecVT.getVectorElementType();
3152 unsigned NumElts = VecVT.getVectorNumElements();
3153 if (EltVT == MVT::f64) {
3154 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3156 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3159 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3162 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3165 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3167 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3168 N->getOperand(2), N->getOperand(3)));
3172 case ARMISD::VLD1DUP: {
3173 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3175 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3177 SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
3181 case ARMISD::VLD2DUP: {
3182 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3184 SelectVLDDup(N, false, 2, Opcodes);
3188 case ARMISD::VLD3DUP: {
3189 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3190 ARM::VLD3DUPd16Pseudo,
3191 ARM::VLD3DUPd32Pseudo };
3192 SelectVLDDup(N, false, 3, Opcodes);
3196 case ARMISD::VLD4DUP: {
3197 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3198 ARM::VLD4DUPd16Pseudo,
3199 ARM::VLD4DUPd32Pseudo };
3200 SelectVLDDup(N, false, 4, Opcodes);
3204 case ARMISD::VLD1DUP_UPD: {
3205 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3206 ARM::VLD1DUPd16wb_fixed,
3207 ARM::VLD1DUPd32wb_fixed };
3208 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3209 ARM::VLD1DUPq16wb_fixed,
3210 ARM::VLD1DUPq32wb_fixed };
3211 SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
3215 case ARMISD::VLD2DUP_UPD: {
3216 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3217 ARM::VLD2DUPd16wb_fixed,
3218 ARM::VLD2DUPd32wb_fixed };
3219 SelectVLDDup(N, true, 2, Opcodes);
3223 case ARMISD::VLD3DUP_UPD: {
3224 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3225 ARM::VLD3DUPd16Pseudo_UPD,
3226 ARM::VLD3DUPd32Pseudo_UPD };
3227 SelectVLDDup(N, true, 3, Opcodes);
3231 case ARMISD::VLD4DUP_UPD: {
3232 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3233 ARM::VLD4DUPd16Pseudo_UPD,
3234 ARM::VLD4DUPd32Pseudo_UPD };
3235 SelectVLDDup(N, true, 4, Opcodes);
3239 case ARMISD::VLD1_UPD: {
3240 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3241 ARM::VLD1d16wb_fixed,
3242 ARM::VLD1d32wb_fixed,
3243 ARM::VLD1d64wb_fixed };
3244 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3245 ARM::VLD1q16wb_fixed,
3246 ARM::VLD1q32wb_fixed,
3247 ARM::VLD1q64wb_fixed };
3248 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3252 case ARMISD::VLD2_UPD: {
3253 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3254 ARM::VLD2d16wb_fixed,
3255 ARM::VLD2d32wb_fixed,
3256 ARM::VLD1q64wb_fixed};
3257 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3258 ARM::VLD2q16PseudoWB_fixed,
3259 ARM::VLD2q32PseudoWB_fixed };
3260 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3264 case ARMISD::VLD3_UPD: {
3265 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3266 ARM::VLD3d16Pseudo_UPD,
3267 ARM::VLD3d32Pseudo_UPD,
3268 ARM::VLD1d64TPseudoWB_fixed};
3269 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3270 ARM::VLD3q16Pseudo_UPD,
3271 ARM::VLD3q32Pseudo_UPD };
3272 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3273 ARM::VLD3q16oddPseudo_UPD,
3274 ARM::VLD3q32oddPseudo_UPD };
3275 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3279 case ARMISD::VLD4_UPD: {
3280 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3281 ARM::VLD4d16Pseudo_UPD,
3282 ARM::VLD4d32Pseudo_UPD,
3283 ARM::VLD1d64QPseudoWB_fixed};
3284 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3285 ARM::VLD4q16Pseudo_UPD,
3286 ARM::VLD4q32Pseudo_UPD };
3287 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3288 ARM::VLD4q16oddPseudo_UPD,
3289 ARM::VLD4q32oddPseudo_UPD };
3290 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3294 case ARMISD::VLD2LN_UPD: {
3295 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3296 ARM::VLD2LNd16Pseudo_UPD,
3297 ARM::VLD2LNd32Pseudo_UPD };
3298 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3299 ARM::VLD2LNq32Pseudo_UPD };
3300 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3304 case ARMISD::VLD3LN_UPD: {
3305 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3306 ARM::VLD3LNd16Pseudo_UPD,
3307 ARM::VLD3LNd32Pseudo_UPD };
3308 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3309 ARM::VLD3LNq32Pseudo_UPD };
3310 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3314 case ARMISD::VLD4LN_UPD: {
3315 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3316 ARM::VLD4LNd16Pseudo_UPD,
3317 ARM::VLD4LNd32Pseudo_UPD };
3318 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3319 ARM::VLD4LNq32Pseudo_UPD };
3320 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3324 case ARMISD::VST1_UPD: {
3325 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3326 ARM::VST1d16wb_fixed,
3327 ARM::VST1d32wb_fixed,
3328 ARM::VST1d64wb_fixed };
3329 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3330 ARM::VST1q16wb_fixed,
3331 ARM::VST1q32wb_fixed,
3332 ARM::VST1q64wb_fixed };
3333 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3337 case ARMISD::VST2_UPD: {
3338 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3339 ARM::VST2d16wb_fixed,
3340 ARM::VST2d32wb_fixed,
3341 ARM::VST1q64wb_fixed};
3342 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3343 ARM::VST2q16PseudoWB_fixed,
3344 ARM::VST2q32PseudoWB_fixed };
3345 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3349 case ARMISD::VST3_UPD: {
3350 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3351 ARM::VST3d16Pseudo_UPD,
3352 ARM::VST3d32Pseudo_UPD,
3353 ARM::VST1d64TPseudoWB_fixed};
3354 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3355 ARM::VST3q16Pseudo_UPD,
3356 ARM::VST3q32Pseudo_UPD };
3357 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3358 ARM::VST3q16oddPseudo_UPD,
3359 ARM::VST3q32oddPseudo_UPD };
3360 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3364 case ARMISD::VST4_UPD: {
3365 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3366 ARM::VST4d16Pseudo_UPD,
3367 ARM::VST4d32Pseudo_UPD,
3368 ARM::VST1d64QPseudoWB_fixed};
3369 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3370 ARM::VST4q16Pseudo_UPD,
3371 ARM::VST4q32Pseudo_UPD };
3372 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3373 ARM::VST4q16oddPseudo_UPD,
3374 ARM::VST4q32oddPseudo_UPD };
3375 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3379 case ARMISD::VST2LN_UPD: {
3380 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3381 ARM::VST2LNd16Pseudo_UPD,
3382 ARM::VST2LNd32Pseudo_UPD };
3383 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3384 ARM::VST2LNq32Pseudo_UPD };
3385 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3389 case ARMISD::VST3LN_UPD: {
3390 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3391 ARM::VST3LNd16Pseudo_UPD,
3392 ARM::VST3LNd32Pseudo_UPD };
3393 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3394 ARM::VST3LNq32Pseudo_UPD };
3395 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3399 case ARMISD::VST4LN_UPD: {
3400 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3401 ARM::VST4LNd16Pseudo_UPD,
3402 ARM::VST4LNd32Pseudo_UPD };
3403 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3404 ARM::VST4LNq32Pseudo_UPD };
3405 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3409 case ISD::INTRINSIC_VOID:
3410 case ISD::INTRINSIC_W_CHAIN: {
3411 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3416 case Intrinsic::arm_mrrc:
3417 case Intrinsic::arm_mrrc2: {
3419 SDValue Chain = N->getOperand(0);
3422 if (Subtarget->isThumb())
3423 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3425 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3427 SmallVector<SDValue, 5> Ops;
3428 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3429 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3430 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3432 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3433 // instruction will always be '1111' but it is possible in assembly language to specify
3434 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3435 if (Opc != ARM::MRRC2) {
3436 Ops.push_back(getAL(CurDAG, dl));
3437 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3440 Ops.push_back(Chain);
3442 // Writes to two registers.
3443 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3445 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3448 case Intrinsic::arm_ldaexd:
3449 case Intrinsic::arm_ldrexd: {
3451 SDValue Chain = N->getOperand(0);
3452 SDValue MemAddr = N->getOperand(2);
3453 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3455 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3456 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3457 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3459 // arm_ldrexd returns a i64 value in {i32, i32}
3460 std::vector<EVT> ResTys;
3462 ResTys.push_back(MVT::i32);
3463 ResTys.push_back(MVT::i32);
3465 ResTys.push_back(MVT::Untyped);
3466 ResTys.push_back(MVT::Other);
3468 // Place arguments in the right order.
3469 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3470 CurDAG->getRegister(0, MVT::i32), Chain};
3471 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3472 // Transfer memoperands.
3473 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3474 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3475 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3478 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3479 if (!SDValue(N, 0).use_empty()) {
3482 Result = SDValue(Ld, 0);
3485 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3486 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3487 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3488 Result = SDValue(ResNode,0);
3490 ReplaceUses(SDValue(N, 0), Result);
3492 if (!SDValue(N, 1).use_empty()) {
3495 Result = SDValue(Ld, 1);
3498 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3499 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3500 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3501 Result = SDValue(ResNode,0);
3503 ReplaceUses(SDValue(N, 1), Result);
3505 ReplaceUses(SDValue(N, 2), OutChain);
3506 CurDAG->RemoveDeadNode(N);
3509 case Intrinsic::arm_stlexd:
3510 case Intrinsic::arm_strexd: {
3512 SDValue Chain = N->getOperand(0);
3513 SDValue Val0 = N->getOperand(2);
3514 SDValue Val1 = N->getOperand(3);
3515 SDValue MemAddr = N->getOperand(4);
3517 // Store exclusive double return a i32 value which is the return status
3518 // of the issued store.
3519 const EVT ResTys[] = {MVT::i32, MVT::Other};
3521 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3522 // Place arguments in the right order.
3523 SmallVector<SDValue, 7> Ops;
3525 Ops.push_back(Val0);
3526 Ops.push_back(Val1);
3528 // arm_strexd uses GPRPair.
3529 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3530 Ops.push_back(MemAddr);
3531 Ops.push_back(getAL(CurDAG, dl));
3532 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3533 Ops.push_back(Chain);
3535 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3536 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3537 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3539 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3540 // Transfer memoperands.
3541 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3542 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3543 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3549 case Intrinsic::arm_neon_vld1: {
3550 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3551 ARM::VLD1d32, ARM::VLD1d64 };
3552 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3553 ARM::VLD1q32, ARM::VLD1q64};
3554 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3558 case Intrinsic::arm_neon_vld2: {
3559 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3560 ARM::VLD2d32, ARM::VLD1q64 };
3561 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3562 ARM::VLD2q32Pseudo };
3563 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3567 case Intrinsic::arm_neon_vld3: {
3568 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3571 ARM::VLD1d64TPseudo };
3572 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3573 ARM::VLD3q16Pseudo_UPD,
3574 ARM::VLD3q32Pseudo_UPD };
3575 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3576 ARM::VLD3q16oddPseudo,
3577 ARM::VLD3q32oddPseudo };
3578 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3582 case Intrinsic::arm_neon_vld4: {
3583 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3586 ARM::VLD1d64QPseudo };
3587 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3588 ARM::VLD4q16Pseudo_UPD,
3589 ARM::VLD4q32Pseudo_UPD };
3590 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3591 ARM::VLD4q16oddPseudo,
3592 ARM::VLD4q32oddPseudo };
3593 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3597 case Intrinsic::arm_neon_vld2lane: {
3598 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3599 ARM::VLD2LNd16Pseudo,
3600 ARM::VLD2LNd32Pseudo };
3601 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3602 ARM::VLD2LNq32Pseudo };
3603 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3607 case Intrinsic::arm_neon_vld3lane: {
3608 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3609 ARM::VLD3LNd16Pseudo,
3610 ARM::VLD3LNd32Pseudo };
3611 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3612 ARM::VLD3LNq32Pseudo };
3613 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3617 case Intrinsic::arm_neon_vld4lane: {
3618 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3619 ARM::VLD4LNd16Pseudo,
3620 ARM::VLD4LNd32Pseudo };
3621 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3622 ARM::VLD4LNq32Pseudo };
3623 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3627 case Intrinsic::arm_neon_vst1: {
3628 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3629 ARM::VST1d32, ARM::VST1d64 };
3630 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3631 ARM::VST1q32, ARM::VST1q64 };
3632 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3636 case Intrinsic::arm_neon_vst2: {
3637 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3638 ARM::VST2d32, ARM::VST1q64 };
3639 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3640 ARM::VST2q32Pseudo };
3641 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3645 case Intrinsic::arm_neon_vst3: {
3646 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3649 ARM::VST1d64TPseudo };
3650 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3651 ARM::VST3q16Pseudo_UPD,
3652 ARM::VST3q32Pseudo_UPD };
3653 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3654 ARM::VST3q16oddPseudo,
3655 ARM::VST3q32oddPseudo };
3656 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3660 case Intrinsic::arm_neon_vst4: {
3661 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3664 ARM::VST1d64QPseudo };
3665 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3666 ARM::VST4q16Pseudo_UPD,
3667 ARM::VST4q32Pseudo_UPD };
3668 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3669 ARM::VST4q16oddPseudo,
3670 ARM::VST4q32oddPseudo };
3671 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3675 case Intrinsic::arm_neon_vst2lane: {
3676 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3677 ARM::VST2LNd16Pseudo,
3678 ARM::VST2LNd32Pseudo };
3679 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3680 ARM::VST2LNq32Pseudo };
3681 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3685 case Intrinsic::arm_neon_vst3lane: {
3686 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3687 ARM::VST3LNd16Pseudo,
3688 ARM::VST3LNd32Pseudo };
3689 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3690 ARM::VST3LNq32Pseudo };
3691 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3695 case Intrinsic::arm_neon_vst4lane: {
3696 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3697 ARM::VST4LNd16Pseudo,
3698 ARM::VST4LNd32Pseudo };
3699 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3700 ARM::VST4LNq32Pseudo };
3701 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3708 case ISD::ATOMIC_CMP_SWAP:
3716 // Inspect a register string of the form
3717 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3718 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3719 // and obtain the integer operands from them, adding these operands to the
3721 static void getIntOperandsFromRegisterString(StringRef RegString,
3722 SelectionDAG *CurDAG,
3724 std::vector<SDValue> &Ops) {
3725 SmallVector<StringRef, 5> Fields;
3726 RegString.split(Fields, ':');
3728 if (Fields.size() > 1) {
3729 bool AllIntFields = true;
3731 for (StringRef Field : Fields) {
3732 // Need to trim out leading 'cp' characters and get the integer field.
3734 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3735 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3738 assert(AllIntFields &&
3739 "Unexpected non-integer value in special register string.");
3743 // Maps a Banked Register string to its mask value. The mask value returned is
3744 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3745 // mask operand, which expresses which register is to be used, e.g. r8, and in
3746 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3748 static inline int getBankedRegisterMask(StringRef RegString) {
3749 return StringSwitch<int>(RegString.lower())
3750 .Case("r8_usr", 0x00)
3751 .Case("r9_usr", 0x01)
3752 .Case("r10_usr", 0x02)
3753 .Case("r11_usr", 0x03)
3754 .Case("r12_usr", 0x04)
3755 .Case("sp_usr", 0x05)
3756 .Case("lr_usr", 0x06)
3757 .Case("r8_fiq", 0x08)
3758 .Case("r9_fiq", 0x09)
3759 .Case("r10_fiq", 0x0a)
3760 .Case("r11_fiq", 0x0b)
3761 .Case("r12_fiq", 0x0c)
3762 .Case("sp_fiq", 0x0d)
3763 .Case("lr_fiq", 0x0e)
3764 .Case("lr_irq", 0x10)
3765 .Case("sp_irq", 0x11)
3766 .Case("lr_svc", 0x12)
3767 .Case("sp_svc", 0x13)
3768 .Case("lr_abt", 0x14)
3769 .Case("sp_abt", 0x15)
3770 .Case("lr_und", 0x16)
3771 .Case("sp_und", 0x17)
3772 .Case("lr_mon", 0x1c)
3773 .Case("sp_mon", 0x1d)
3774 .Case("elr_hyp", 0x1e)
3775 .Case("sp_hyp", 0x1f)
3776 .Case("spsr_fiq", 0x2e)
3777 .Case("spsr_irq", 0x30)
3778 .Case("spsr_svc", 0x32)
3779 .Case("spsr_abt", 0x34)
3780 .Case("spsr_und", 0x36)
3781 .Case("spsr_mon", 0x3c)
3782 .Case("spsr_hyp", 0x3e)
3786 // Maps a MClass special register string to its value for use in the
3787 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3788 // Returns -1 to signify that the string was invalid.
3789 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3790 return StringSwitch<int>(RegString.lower())
3800 .Case("primask", 0x10)
3801 .Case("basepri", 0x11)
3802 .Case("basepri_max", 0x12)
3803 .Case("faultmask", 0x13)
3804 .Case("control", 0x14)
3805 .Case("msplim", 0x0a)
3806 .Case("psplim", 0x0b)
3811 // The flags here are common to those allowed for apsr in the A class cores and
3812 // those allowed for the special registers in the M class cores. Returns a
3813 // value representing which flags were present, -1 if invalid.
3814 static inline int getMClassFlagsMask(StringRef Flags) {
3815 return StringSwitch<int>(Flags)
3816 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3817 // correct when flags are not permitted
3820 .Case("nzcvqg", 0x3)
3824 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3825 const ARMSubtarget *Subtarget) {
3826 // Ensure that the register (without flags) was a valid M Class special
3828 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3829 if (SYSmvalue == -1)
3832 // basepri, basepri_max and faultmask are only valid for V7m.
3833 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3836 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3841 if (!Subtarget->has8MSecExt() &&
3842 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3845 if (!Subtarget->hasV8MMainlineOps() &&
3846 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3850 // If it was a read then we won't be expecting flags and so at this point
3851 // we can return the mask.
3859 // We know we are now handling a write so need to get the mask for the flags.
3860 int Mask = getMClassFlagsMask(Flags);
3862 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3863 // shouldn't have flags present.
3864 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3867 // The _g and _nzcvqg versions are only valid if the DSP extension is
3869 if (!Subtarget->hasDSP() && (Mask & 0x1))
3872 // The register was valid so need to put the mask in the correct place
3873 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3874 // construct the operand for the instruction node.
3875 return SYSmvalue | Mask << 10;
3878 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3879 // The mask operand contains the special register (R Bit) in bit 4, whether
3880 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3881 // bits 3-0 contains the fields to be accessed in the special register, set by
3882 // the flags provided with the register.
3884 if (Reg == "apsr") {
3885 // The flags permitted for apsr are the same flags that are allowed in
3886 // M class registers. We get the flag value and then shift the flags into
3887 // the correct place to combine with the mask.
3888 Mask = getMClassFlagsMask(Flags);
3894 if (Reg != "cpsr" && Reg != "spsr") {
3898 // This is the same as if the flags were "fc"
3899 if (Flags.empty() || Flags == "all")
3902 // Inspect the supplied flags string and set the bits in the mask for
3903 // the relevant and valid flags allowed for cpsr and spsr.
3904 for (char Flag : Flags) {
3923 // This avoids allowing strings where the same flag bit appears twice.
3924 if (!FlagVal || (Mask & FlagVal))
3929 // If the register is spsr then we need to set the R bit.
3936 // Lower the read_register intrinsic to ARM specific DAG nodes
3937 // using the supplied metadata string to select the instruction node to use
3938 // and the registers/masks to construct as operands for the node.
3939 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3940 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3941 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3942 bool IsThumb2 = Subtarget->isThumb2();
3945 std::vector<SDValue> Ops;
3946 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3949 // If the special register string was constructed of fields (as defined
3950 // in the ACLE) then need to lower to MRC node (32 bit) or
3951 // MRRC node(64 bit), we can make the distinction based on the number of
3952 // operands we have.
3954 SmallVector<EVT, 3> ResTypes;
3955 if (Ops.size() == 5){
3956 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3957 ResTypes.append({ MVT::i32, MVT::Other });
3959 assert(Ops.size() == 3 &&
3960 "Invalid number of fields in special register string.");
3961 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3962 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3965 Ops.push_back(getAL(CurDAG, DL));
3966 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3967 Ops.push_back(N->getOperand(0));
3968 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3972 std::string SpecialReg = RegString->getString().lower();
3974 int BankedReg = getBankedRegisterMask(SpecialReg);
3975 if (BankedReg != -1) {
3976 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3977 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3980 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3981 DL, MVT::i32, MVT::Other, Ops));
3985 // The VFP registers are read by creating SelectionDAG nodes with opcodes
3986 // corresponding to the register that is being read from. So we switch on the
3987 // string to find which opcode we need to use.
3988 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3989 .Case("fpscr", ARM::VMRS)
3990 .Case("fpexc", ARM::VMRS_FPEXC)
3991 .Case("fpsid", ARM::VMRS_FPSID)
3992 .Case("mvfr0", ARM::VMRS_MVFR0)
3993 .Case("mvfr1", ARM::VMRS_MVFR1)
3994 .Case("mvfr2", ARM::VMRS_MVFR2)
3995 .Case("fpinst", ARM::VMRS_FPINST)
3996 .Case("fpinst2", ARM::VMRS_FPINST2)
3999 // If an opcode was found then we can lower the read to a VFP instruction.
4001 if (!Subtarget->hasVFP2())
4003 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4006 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4009 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4013 // If the target is M Class then need to validate that the register string
4014 // is an acceptable value, so check that a mask can be constructed from the
4016 if (Subtarget->isMClass()) {
4017 StringRef Flags = "", Reg = SpecialReg;
4018 if (Reg.endswith("_ns")) {
4020 Reg = Reg.drop_back(3);
4023 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4024 if (SYSmValue == -1)
4027 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4028 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4031 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4035 // Here we know the target is not M Class so we need to check if it is one
4036 // of the remaining possible values which are apsr, cpsr or spsr.
4037 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4038 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4040 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4041 DL, MVT::i32, MVT::Other, Ops));
4045 if (SpecialReg == "spsr") {
4046 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4049 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4050 MVT::i32, MVT::Other, Ops));
4057 // Lower the write_register intrinsic to ARM specific DAG nodes
4058 // using the supplied metadata string to select the instruction node to use
4059 // and the registers/masks to use in the nodes
4060 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4061 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4062 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4063 bool IsThumb2 = Subtarget->isThumb2();
4066 std::vector<SDValue> Ops;
4067 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4070 // If the special register string was constructed of fields (as defined
4071 // in the ACLE) then need to lower to MCR node (32 bit) or
4072 // MCRR node(64 bit), we can make the distinction based on the number of
4073 // operands we have.
4075 if (Ops.size() == 5) {
4076 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4077 Ops.insert(Ops.begin()+2, N->getOperand(2));
4079 assert(Ops.size() == 3 &&
4080 "Invalid number of fields in special register string.");
4081 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4082 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4083 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4086 Ops.push_back(getAL(CurDAG, DL));
4087 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4088 Ops.push_back(N->getOperand(0));
4090 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4094 std::string SpecialReg = RegString->getString().lower();
4095 int BankedReg = getBankedRegisterMask(SpecialReg);
4096 if (BankedReg != -1) {
4097 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4098 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4101 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4102 DL, MVT::Other, Ops));
4106 // The VFP registers are written to by creating SelectionDAG nodes with
4107 // opcodes corresponding to the register that is being written. So we switch
4108 // on the string to find which opcode we need to use.
4109 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4110 .Case("fpscr", ARM::VMSR)
4111 .Case("fpexc", ARM::VMSR_FPEXC)
4112 .Case("fpsid", ARM::VMSR_FPSID)
4113 .Case("fpinst", ARM::VMSR_FPINST)
4114 .Case("fpinst2", ARM::VMSR_FPINST2)
4118 if (!Subtarget->hasVFP2())
4120 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4121 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4122 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4126 std::pair<StringRef, StringRef> Fields;
4127 Fields = StringRef(SpecialReg).rsplit('_');
4128 std::string Reg = Fields.first.str();
4129 StringRef Flags = Fields.second;
4131 // If the target was M Class then need to validate the special register value
4132 // and retrieve the mask for use in the instruction node.
4133 if (Subtarget->isMClass()) {
4134 // basepri_max gets split so need to correct Reg and Flags.
4135 if (SpecialReg == "basepri_max") {
4139 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4140 if (SYSmValue == -1)
4143 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4144 N->getOperand(2), getAL(CurDAG, DL),
4145 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4146 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4150 // We then check to see if a valid mask can be constructed for one of the
4151 // register string values permitted for the A and R class cores. These values
4152 // are apsr, spsr and cpsr; these are also valid on older cores.
4153 int Mask = getARClassRegisterMask(Reg, Flags);
4155 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4156 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4158 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4159 DL, MVT::Other, Ops));
4166 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4167 std::vector<SDValue> AsmNodeOperands;
4168 unsigned Flag, Kind;
4169 bool Changed = false;
4170 unsigned NumOps = N->getNumOperands();
4172 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4173 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4174 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4175 // respectively. Since there is no constraint to explicitly specify a
4176 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4177 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4178 // them into a GPRPair.
4181 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4182 : SDValue(nullptr,0);
4184 SmallVector<bool, 8> OpChanged;
4185 // Glue node will be appended late.
4186 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4187 SDValue op = N->getOperand(i);
4188 AsmNodeOperands.push_back(op);
4190 if (i < InlineAsm::Op_FirstOperand)
4193 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4194 Flag = C->getZExtValue();
4195 Kind = InlineAsm::getKind(Flag);
4200 // Immediate operands to inline asm in the SelectionDAG are modeled with
4201 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4202 // the second is a constant with the value of the immediate. If we get here
4203 // and we have a Kind_Imm, skip the next operand, and continue.
4204 if (Kind == InlineAsm::Kind_Imm) {
4205 SDValue op = N->getOperand(++i);
4206 AsmNodeOperands.push_back(op);
4210 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4212 OpChanged.push_back(false);
4214 unsigned DefIdx = 0;
4215 bool IsTiedToChangedOp = false;
4216 // If it's a use that is tied with a previous def, it has no
4217 // reg class constraint.
4218 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4219 IsTiedToChangedOp = OpChanged[DefIdx];
4221 // Memory operands to inline asm in the SelectionDAG are modeled with two
4222 // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4223 // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4224 // it doesn't get misinterpreted), and continue. We do this here because
4225 // it's important to update the OpChanged array correctly before moving on.
4226 if (Kind == InlineAsm::Kind_Mem) {
4227 SDValue op = N->getOperand(++i);
4228 AsmNodeOperands.push_back(op);
4232 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4233 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4237 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4238 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4242 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4243 SDValue V0 = N->getOperand(i+1);
4244 SDValue V1 = N->getOperand(i+2);
4245 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4246 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4248 MachineRegisterInfo &MRI = MF->getRegInfo();
4250 if (Kind == InlineAsm::Kind_RegDef ||
4251 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4252 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4253 // the original GPRs.
4255 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4256 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4257 SDValue Chain = SDValue(N,0);
4259 SDNode *GU = N->getGluedUser();
4260 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4263 // Extract values from a GPRPair reg and copy to the original GPR reg.
4264 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4266 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4268 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4269 RegCopy.getValue(1));
4270 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4272 // Update the original glue user.
4273 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4274 Ops.push_back(T1.getValue(1));
4275 CurDAG->UpdateNodeOperands(GU, Ops);
4278 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4279 // GPRPair and then pass the GPRPair to the inline asm.
4280 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4282 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4283 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4285 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4287 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4289 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4290 // i32 VRs of inline asm with it.
4291 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4292 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4293 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4295 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4296 Glue = Chain.getValue(1);
4301 if(PairedReg.getNode()) {
4302 OpChanged[OpChanged.size() -1 ] = true;
4303 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4304 if (IsTiedToChangedOp)
4305 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4307 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4308 // Replace the current flag.
4309 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4310 Flag, dl, MVT::i32);
4311 // Add the new register node and skip the original two GPRs.
4312 AsmNodeOperands.push_back(PairedReg);
4313 // Skip the next two GPRs.
4319 AsmNodeOperands.push_back(Glue);
4323 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4324 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4326 ReplaceNode(N, New.getNode());
4331 bool ARMDAGToDAGISel::
4332 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4333 std::vector<SDValue> &OutOps) {
4334 switch(ConstraintID) {
4336 llvm_unreachable("Unexpected asm memory constraint");
4337 case InlineAsm::Constraint_i:
4338 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4339 // be an immediate and not a memory constraint.
4341 case InlineAsm::Constraint_m:
4342 case InlineAsm::Constraint_o:
4343 case InlineAsm::Constraint_Q:
4344 case InlineAsm::Constraint_Um:
4345 case InlineAsm::Constraint_Un:
4346 case InlineAsm::Constraint_Uq:
4347 case InlineAsm::Constraint_Us:
4348 case InlineAsm::Constraint_Ut:
4349 case InlineAsm::Constraint_Uv:
4350 case InlineAsm::Constraint_Uy:
4351 // Require the address to be in a register. That is safe for all ARM
4352 // variants and it is hard to do anything much smarter without knowing
4353 // how the operand is used.
4354 OutOps.push_back(Op);
4360 /// createARMISelDag - This pass converts a legalized DAG into a
4361 /// ARM-specific DAG, ready for instruction scheduling.
4363 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4364 CodeGenOpt::Level OptLevel) {
4365 return new ARMDAGToDAGISel(TM, OptLevel);