1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/TargetRegistry.h"
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
32 static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
33 MachineMemOperand::MOTargetFlag1;
35 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
36 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
37 RI(STI.getTargetTriple()), Subtarget(STI) {}
39 /// GetInstSize - Return the number of bytes of code the specified
40 /// instruction may be. This returns the maximum number of bytes.
41 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
42 const MachineBasicBlock &MBB = *MI.getParent();
43 const MachineFunction *MF = MBB.getParent();
44 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
46 if (MI.getOpcode() == AArch64::INLINEASM)
47 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
49 const MCInstrDesc &Desc = MI.getDesc();
50 switch (Desc.getOpcode()) {
52 // Anything not explicitly designated otherwise is a nomal 4-byte insn.
54 case TargetOpcode::DBG_VALUE:
55 case TargetOpcode::EH_LABEL:
56 case TargetOpcode::IMPLICIT_DEF:
57 case TargetOpcode::KILL:
61 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
64 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
65 SmallVectorImpl<MachineOperand> &Cond) {
66 // Block ends with fall-through condbranch.
67 switch (LastInst->getOpcode()) {
69 llvm_unreachable("Unknown branch instruction?");
71 Target = LastInst->getOperand(1).getMBB();
72 Cond.push_back(LastInst->getOperand(0));
78 Target = LastInst->getOperand(1).getMBB();
79 Cond.push_back(MachineOperand::CreateImm(-1));
80 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
81 Cond.push_back(LastInst->getOperand(0));
87 Target = LastInst->getOperand(2).getMBB();
88 Cond.push_back(MachineOperand::CreateImm(-1));
89 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
90 Cond.push_back(LastInst->getOperand(0));
91 Cond.push_back(LastInst->getOperand(1));
96 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
97 MachineBasicBlock *&TBB,
98 MachineBasicBlock *&FBB,
99 SmallVectorImpl<MachineOperand> &Cond,
100 bool AllowModify) const {
101 // If the block has no terminators, it just falls into the block after it.
102 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
106 if (!isUnpredicatedTerminator(*I))
109 // Get the last instruction in the block.
110 MachineInstr *LastInst = &*I;
112 // If there is only one terminator instruction, process it.
113 unsigned LastOpc = LastInst->getOpcode();
114 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
115 if (isUncondBranchOpcode(LastOpc)) {
116 TBB = LastInst->getOperand(0).getMBB();
119 if (isCondBranchOpcode(LastOpc)) {
120 // Block ends with fall-through condbranch.
121 parseCondBranch(LastInst, TBB, Cond);
124 return true; // Can't handle indirect branch.
127 // Get the instruction before it if it is a terminator.
128 MachineInstr *SecondLastInst = &*I;
129 unsigned SecondLastOpc = SecondLastInst->getOpcode();
131 // If AllowModify is true and the block ends with two or more unconditional
132 // branches, delete all but the first unconditional branch.
133 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
134 while (isUncondBranchOpcode(SecondLastOpc)) {
135 LastInst->eraseFromParent();
136 LastInst = SecondLastInst;
137 LastOpc = LastInst->getOpcode();
138 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
139 // Return now the only terminator is an unconditional branch.
140 TBB = LastInst->getOperand(0).getMBB();
143 SecondLastInst = &*I;
144 SecondLastOpc = SecondLastInst->getOpcode();
149 // If there are three terminators, we don't know what sort of block this is.
150 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
153 // If the block ends with a B and a Bcc, handle it.
154 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
155 parseCondBranch(SecondLastInst, TBB, Cond);
156 FBB = LastInst->getOperand(0).getMBB();
160 // If the block ends with two unconditional branches, handle it. The second
161 // one is not executed, so remove it.
162 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
163 TBB = SecondLastInst->getOperand(0).getMBB();
166 I->eraseFromParent();
170 // ...likewise if it ends with an indirect branch followed by an unconditional
172 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
175 I->eraseFromParent();
179 // Otherwise, can't handle this.
183 bool AArch64InstrInfo::ReverseBranchCondition(
184 SmallVectorImpl<MachineOperand> &Cond) const {
185 if (Cond[0].getImm() != -1) {
187 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
188 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
190 // Folded compare-and-branch
191 switch (Cond[1].getImm()) {
193 llvm_unreachable("Unknown conditional branch!");
195 Cond[1].setImm(AArch64::CBNZW);
198 Cond[1].setImm(AArch64::CBZW);
201 Cond[1].setImm(AArch64::CBNZX);
204 Cond[1].setImm(AArch64::CBZX);
207 Cond[1].setImm(AArch64::TBNZW);
210 Cond[1].setImm(AArch64::TBZW);
213 Cond[1].setImm(AArch64::TBNZX);
216 Cond[1].setImm(AArch64::TBZX);
224 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
225 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
229 if (!isUncondBranchOpcode(I->getOpcode()) &&
230 !isCondBranchOpcode(I->getOpcode()))
233 // Remove the branch.
234 I->eraseFromParent();
238 if (I == MBB.begin())
241 if (!isCondBranchOpcode(I->getOpcode()))
244 // Remove the branch.
245 I->eraseFromParent();
249 void AArch64InstrInfo::instantiateCondBranch(
250 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
251 ArrayRef<MachineOperand> Cond) const {
252 if (Cond[0].getImm() != -1) {
254 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
256 // Folded compare-and-branch
257 // Note that we use addOperand instead of addReg to keep the flags.
258 const MachineInstrBuilder MIB =
259 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
261 MIB.addImm(Cond[3].getImm());
266 unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
267 MachineBasicBlock *TBB,
268 MachineBasicBlock *FBB,
269 ArrayRef<MachineOperand> Cond,
270 const DebugLoc &DL) const {
271 // Shouldn't be a fall through.
272 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
275 if (Cond.empty()) // Unconditional branch?
276 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
278 instantiateCondBranch(MBB, DL, TBB, Cond);
282 // Two-way conditional branch.
283 instantiateCondBranch(MBB, DL, TBB, Cond);
284 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
288 // Find the original register that VReg is copied from.
289 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
290 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
291 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
292 if (!DefMI->isFullCopy())
294 VReg = DefMI->getOperand(1).getReg();
299 // Determine if VReg is defined by an instruction that can be folded into a
300 // csel instruction. If so, return the folded opcode, and the replacement
302 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
303 unsigned *NewVReg = nullptr) {
304 VReg = removeCopies(MRI, VReg);
305 if (!TargetRegisterInfo::isVirtualRegister(VReg))
308 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
309 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
311 unsigned SrcOpNum = 0;
312 switch (DefMI->getOpcode()) {
313 case AArch64::ADDSXri:
314 case AArch64::ADDSWri:
315 // if NZCV is used, do not fold.
316 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
318 // fall-through to ADDXri and ADDWri.
319 case AArch64::ADDXri:
320 case AArch64::ADDWri:
321 // add x, 1 -> csinc.
322 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
323 DefMI->getOperand(3).getImm() != 0)
326 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
329 case AArch64::ORNXrr:
330 case AArch64::ORNWrr: {
331 // not x -> csinv, represented as orn dst, xzr, src.
332 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
333 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
336 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
340 case AArch64::SUBSXrr:
341 case AArch64::SUBSWrr:
342 // if NZCV is used, do not fold.
343 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
345 // fall-through to SUBXrr and SUBWrr.
346 case AArch64::SUBXrr:
347 case AArch64::SUBWrr: {
348 // neg x -> csneg, represented as sub dst, xzr, src.
349 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
350 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
353 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
359 assert(Opc && SrcOpNum && "Missing parameters");
362 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
366 bool AArch64InstrInfo::canInsertSelect(
367 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
368 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
369 int &FalseCycles) const {
370 // Check register classes.
371 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
372 const TargetRegisterClass *RC =
373 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
377 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
378 unsigned ExtraCondLat = Cond.size() != 1;
380 // GPRs are handled by csel.
381 // FIXME: Fold in x+1, -x, and ~x when applicable.
382 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
383 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
384 // Single-cycle csel, csinc, csinv, and csneg.
385 CondCycles = 1 + ExtraCondLat;
386 TrueCycles = FalseCycles = 1;
387 if (canFoldIntoCSel(MRI, TrueReg))
389 else if (canFoldIntoCSel(MRI, FalseReg))
394 // Scalar floating point is handled by fcsel.
395 // FIXME: Form fabs, fmin, and fmax when applicable.
396 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
397 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
398 CondCycles = 5 + ExtraCondLat;
399 TrueCycles = FalseCycles = 2;
407 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator I,
409 const DebugLoc &DL, unsigned DstReg,
410 ArrayRef<MachineOperand> Cond,
411 unsigned TrueReg, unsigned FalseReg) const {
412 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
414 // Parse the condition code, see parseCondBranch() above.
415 AArch64CC::CondCode CC;
416 switch (Cond.size()) {
418 llvm_unreachable("Unknown condition opcode in Cond");
420 CC = AArch64CC::CondCode(Cond[0].getImm());
422 case 3: { // cbz/cbnz
423 // We must insert a compare against 0.
425 switch (Cond[1].getImm()) {
427 llvm_unreachable("Unknown branch opcode in Cond");
445 unsigned SrcReg = Cond[2].getReg();
447 // cmp reg, #0 is actually subs xzr, reg, #0.
448 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
449 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
454 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
455 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
462 case 4: { // tbz/tbnz
463 // We must insert a tst instruction.
464 switch (Cond[1].getImm()) {
466 llvm_unreachable("Unknown branch opcode in Cond");
476 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
477 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
478 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
479 .addReg(Cond[2].getReg())
481 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
483 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
484 .addReg(Cond[2].getReg())
486 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
492 const TargetRegisterClass *RC = nullptr;
493 bool TryFold = false;
494 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
495 RC = &AArch64::GPR64RegClass;
496 Opc = AArch64::CSELXr;
498 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
499 RC = &AArch64::GPR32RegClass;
500 Opc = AArch64::CSELWr;
502 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
503 RC = &AArch64::FPR64RegClass;
504 Opc = AArch64::FCSELDrrr;
505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
506 RC = &AArch64::FPR32RegClass;
507 Opc = AArch64::FCSELSrrr;
509 assert(RC && "Unsupported regclass");
511 // Try folding simple instructions into the csel.
513 unsigned NewVReg = 0;
514 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
516 // The folded opcodes csinc, csinc and csneg apply the operation to
517 // FalseReg, so we need to invert the condition.
518 CC = AArch64CC::getInvertedCondCode(CC);
521 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
523 // Fold the operation. Leave any dead instructions for DCE to clean up.
527 // The extends the live range of NewVReg.
528 MRI.clearKillFlags(NewVReg);
532 // Pull all virtual register into the appropriate class.
533 MRI.constrainRegClass(TrueReg, RC);
534 MRI.constrainRegClass(FalseReg, RC);
537 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
541 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
542 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
543 uint64_t Imm = MI.getOperand(1).getImm();
544 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
546 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
549 // FIXME: this implementation should be micro-architecture dependent, so a
550 // micro-architecture target hook should be introduced here in future.
551 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
552 if (!Subtarget.hasCustomCheapAsMoveHandling())
553 return MI.isAsCheapAsAMove();
557 switch (MI.getOpcode()) {
561 // add/sub on register without shift
562 case AArch64::ADDWri:
563 case AArch64::ADDXri:
564 case AArch64::SUBWri:
565 case AArch64::SUBXri:
566 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
567 MI.getOperand(3).getImm() == 0);
569 // add/sub on register with shift
570 case AArch64::ADDWrs:
571 case AArch64::ADDXrs:
572 case AArch64::SUBWrs:
573 case AArch64::SUBXrs:
574 Imm = MI.getOperand(3).getImm();
575 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
576 AArch64_AM::getArithShiftValue(Imm) < 4);
578 // logical ops on immediate
579 case AArch64::ANDWri:
580 case AArch64::ANDXri:
581 case AArch64::EORWri:
582 case AArch64::EORXri:
583 case AArch64::ORRWri:
584 case AArch64::ORRXri:
587 // logical ops on register without shift
588 case AArch64::ANDWrr:
589 case AArch64::ANDXrr:
590 case AArch64::BICWrr:
591 case AArch64::BICXrr:
592 case AArch64::EONWrr:
593 case AArch64::EONXrr:
594 case AArch64::EORWrr:
595 case AArch64::EORXrr:
596 case AArch64::ORNWrr:
597 case AArch64::ORNXrr:
598 case AArch64::ORRWrr:
599 case AArch64::ORRXrr:
602 // logical ops on register with shift
603 case AArch64::ANDWrs:
604 case AArch64::ANDXrs:
605 case AArch64::BICWrs:
606 case AArch64::BICXrs:
607 case AArch64::EONWrs:
608 case AArch64::EONXrs:
609 case AArch64::EORWrs:
610 case AArch64::EORXrs:
611 case AArch64::ORNWrs:
612 case AArch64::ORNXrs:
613 case AArch64::ORRWrs:
614 case AArch64::ORRXrs:
615 Imm = MI.getOperand(3).getImm();
616 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
617 AArch64_AM::getShiftValue(Imm) < 4 &&
618 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
620 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
621 // ORRXri, it is as cheap as MOV
622 case AArch64::MOVi32imm:
623 return canBeExpandedToORR(MI, 32);
624 case AArch64::MOVi64imm:
625 return canBeExpandedToORR(MI, 64);
627 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
629 case AArch64::FMOVS0:
630 case AArch64::FMOVD0:
631 return Subtarget.hasZeroCycleZeroing();
632 case TargetOpcode::COPY:
633 return (Subtarget.hasZeroCycleZeroing() &&
634 (MI.getOperand(1).getReg() == AArch64::WZR ||
635 MI.getOperand(1).getReg() == AArch64::XZR));
638 llvm_unreachable("Unknown opcode to check as cheap as a move!");
641 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
642 unsigned &SrcReg, unsigned &DstReg,
643 unsigned &SubIdx) const {
644 switch (MI.getOpcode()) {
647 case AArch64::SBFMXri: // aka sxtw
648 case AArch64::UBFMXri: // aka uxtw
649 // Check for the 32 -> 64 bit extension case, these instructions can do
651 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
653 // This is a signed or unsigned 32 -> 64 bit extension.
654 SrcReg = MI.getOperand(1).getReg();
655 DstReg = MI.getOperand(0).getReg();
656 SubIdx = AArch64::sub_32;
661 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
662 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
663 const TargetRegisterInfo *TRI = &getRegisterInfo();
664 unsigned BaseRegA = 0, BaseRegB = 0;
665 int64_t OffsetA = 0, OffsetB = 0;
666 unsigned WidthA = 0, WidthB = 0;
668 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
669 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
671 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
672 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
675 // Retrieve the base register, offset from the base register and width. Width
676 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
677 // base registers are identical, and the offset of a lower memory access +
678 // the width doesn't overlap the offset of a higher memory access,
679 // then the memory accesses are different.
680 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
681 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
682 if (BaseRegA == BaseRegB) {
683 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
684 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
685 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
686 if (LowOffset + LowWidth <= HighOffset)
693 /// analyzeCompare - For a comparison instruction, return the source registers
694 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
695 /// Return true if the comparison instruction can be analyzed.
696 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
697 unsigned &SrcReg2, int &CmpMask,
698 int &CmpValue) const {
699 switch (MI.getOpcode()) {
702 case AArch64::SUBSWrr:
703 case AArch64::SUBSWrs:
704 case AArch64::SUBSWrx:
705 case AArch64::SUBSXrr:
706 case AArch64::SUBSXrs:
707 case AArch64::SUBSXrx:
708 case AArch64::ADDSWrr:
709 case AArch64::ADDSWrs:
710 case AArch64::ADDSWrx:
711 case AArch64::ADDSXrr:
712 case AArch64::ADDSXrs:
713 case AArch64::ADDSXrx:
714 // Replace SUBSWrr with SUBWrr if NZCV is not used.
715 SrcReg = MI.getOperand(1).getReg();
716 SrcReg2 = MI.getOperand(2).getReg();
720 case AArch64::SUBSWri:
721 case AArch64::ADDSWri:
722 case AArch64::SUBSXri:
723 case AArch64::ADDSXri:
724 SrcReg = MI.getOperand(1).getReg();
727 // FIXME: In order to convert CmpValue to 0 or 1
728 CmpValue = MI.getOperand(2).getImm() != 0;
730 case AArch64::ANDSWri:
731 case AArch64::ANDSXri:
732 // ANDS does not use the same encoding scheme as the others xxxS
734 SrcReg = MI.getOperand(1).getReg();
737 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
738 // while the type of CmpValue is int. When converting uint64_t to int,
739 // the high 32 bits of uint64_t will be lost.
740 // In fact it causes a bug in spec2006-483.xalancbmk
741 // CmpValue is only used to compare with zero in OptimizeCompareInstr
742 CmpValue = AArch64_AM::decodeLogicalImmediate(
743 MI.getOperand(2).getImm(),
744 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
751 static bool UpdateOperandRegClass(MachineInstr &Instr) {
752 MachineBasicBlock *MBB = Instr.getParent();
753 assert(MBB && "Can't get MachineBasicBlock here");
754 MachineFunction *MF = MBB->getParent();
755 assert(MF && "Can't get MachineFunction here");
756 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
757 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
758 MachineRegisterInfo *MRI = &MF->getRegInfo();
760 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
762 MachineOperand &MO = Instr.getOperand(OpIdx);
763 const TargetRegisterClass *OpRegCstraints =
764 Instr.getRegClassConstraint(OpIdx, TII, TRI);
766 // If there's no constraint, there's nothing to do.
769 // If the operand is a frame index, there's nothing to do here.
770 // A frame index operand will resolve correctly during PEI.
775 "Operand has register constraints without being a register!");
777 unsigned Reg = MO.getReg();
778 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
779 if (!OpRegCstraints->contains(Reg))
781 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
782 !MRI->constrainRegClass(Reg, OpRegCstraints))
789 /// \brief Return the opcode that does not set flags when possible - otherwise
790 /// return the original opcode. The caller is responsible to do the actual
791 /// substitution and legality checking.
792 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
793 // Don't convert all compare instructions, because for some the zero register
794 // encoding becomes the sp register.
795 bool MIDefinesZeroReg = false;
796 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
797 MIDefinesZeroReg = true;
799 switch (MI.getOpcode()) {
801 return MI.getOpcode();
802 case AArch64::ADDSWrr:
803 return AArch64::ADDWrr;
804 case AArch64::ADDSWri:
805 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
806 case AArch64::ADDSWrs:
807 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
808 case AArch64::ADDSWrx:
809 return AArch64::ADDWrx;
810 case AArch64::ADDSXrr:
811 return AArch64::ADDXrr;
812 case AArch64::ADDSXri:
813 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
814 case AArch64::ADDSXrs:
815 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
816 case AArch64::ADDSXrx:
817 return AArch64::ADDXrx;
818 case AArch64::SUBSWrr:
819 return AArch64::SUBWrr;
820 case AArch64::SUBSWri:
821 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
822 case AArch64::SUBSWrs:
823 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
824 case AArch64::SUBSWrx:
825 return AArch64::SUBWrx;
826 case AArch64::SUBSXrr:
827 return AArch64::SUBXrr;
828 case AArch64::SUBSXri:
829 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
830 case AArch64::SUBSXrs:
831 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
832 case AArch64::SUBSXrx:
833 return AArch64::SUBXrx;
843 /// True when condition flags are accessed (either by writing or reading)
844 /// on the instruction trace starting at From and ending at To.
846 /// Note: If From and To are from different blocks it's assumed CC are accessed
848 static bool areCFlagsAccessedBetweenInstrs(
849 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
850 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
851 // Early exit if To is at the beginning of the BB.
852 if (To == To->getParent()->begin())
855 // Check whether the instructions are in the same basic block
856 // If not, assume the condition flags might get modified somewhere.
857 if (To->getParent() != From->getParent())
860 // From must be above To.
861 assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
862 To->getParent()->rend(), [From](MachineInstr &MI) {
863 return MachineBasicBlock::iterator(MI) == From;
864 }) != To->getParent()->rend());
866 // We iterate backward starting \p To until we hit \p From.
867 for (--To; To != From; --To) {
868 const MachineInstr &Instr = *To;
870 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
871 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
877 /// Try to optimize a compare instruction. A compare instruction is an
878 /// instruction which produces AArch64::NZCV. It can be truly compare instruction
879 /// when there are no uses of its destination register.
881 /// The following steps are tried in order:
882 /// 1. Convert CmpInstr into an unconditional version.
883 /// 2. Remove CmpInstr if above there is an instruction producing a needed
884 /// condition code or an instruction which can be converted into such an instruction.
885 /// Only comparison with zero is supported.
886 bool AArch64InstrInfo::optimizeCompareInstr(
887 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
888 int CmpValue, const MachineRegisterInfo *MRI) const {
889 assert(CmpInstr.getParent());
892 // Replace SUBSWrr with SUBWrr if NZCV is not used.
893 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
894 if (DeadNZCVIdx != -1) {
895 if (CmpInstr.definesRegister(AArch64::WZR) ||
896 CmpInstr.definesRegister(AArch64::XZR)) {
897 CmpInstr.eraseFromParent();
900 unsigned Opc = CmpInstr.getOpcode();
901 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
904 const MCInstrDesc &MCID = get(NewOpc);
905 CmpInstr.setDesc(MCID);
906 CmpInstr.RemoveOperand(DeadNZCVIdx);
907 bool succeeded = UpdateOperandRegClass(CmpInstr);
909 assert(succeeded && "Some operands reg class are incompatible!");
913 // Continue only if we have a "ri" where immediate is zero.
914 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
916 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
917 if (CmpValue != 0 || SrcReg2 != 0)
920 // CmpInstr is a Compare instruction if destination register is not used.
921 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
924 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
927 /// Get opcode of S version of Instr.
928 /// If Instr is S version its opcode is returned.
929 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
930 /// or we are not interested in it.
931 static unsigned sForm(MachineInstr &Instr) {
932 switch (Instr.getOpcode()) {
934 return AArch64::INSTRUCTION_LIST_END;
936 case AArch64::ADDSWrr:
937 case AArch64::ADDSWri:
938 case AArch64::ADDSXrr:
939 case AArch64::ADDSXri:
940 case AArch64::SUBSWrr:
941 case AArch64::SUBSWri:
942 case AArch64::SUBSXrr:
943 case AArch64::SUBSXri:
944 return Instr.getOpcode();;
946 case AArch64::ADDWrr: return AArch64::ADDSWrr;
947 case AArch64::ADDWri: return AArch64::ADDSWri;
948 case AArch64::ADDXrr: return AArch64::ADDSXrr;
949 case AArch64::ADDXri: return AArch64::ADDSXri;
950 case AArch64::ADCWr: return AArch64::ADCSWr;
951 case AArch64::ADCXr: return AArch64::ADCSXr;
952 case AArch64::SUBWrr: return AArch64::SUBSWrr;
953 case AArch64::SUBWri: return AArch64::SUBSWri;
954 case AArch64::SUBXrr: return AArch64::SUBSXrr;
955 case AArch64::SUBXri: return AArch64::SUBSXri;
956 case AArch64::SBCWr: return AArch64::SBCSWr;
957 case AArch64::SBCXr: return AArch64::SBCSXr;
958 case AArch64::ANDWri: return AArch64::ANDSWri;
959 case AArch64::ANDXri: return AArch64::ANDSXri;
963 /// Check if AArch64::NZCV should be alive in successors of MBB.
964 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
965 for (auto *BB : MBB->successors())
966 if (BB->isLiveIn(AArch64::NZCV))
976 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
977 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
978 this->N |= UsedFlags.N;
979 this->Z |= UsedFlags.Z;
980 this->C |= UsedFlags.C;
981 this->V |= UsedFlags.V;
986 /// Find a condition code used by the instruction.
987 /// Returns AArch64CC::Invalid if either the instruction does not use condition
988 /// codes or we don't optimize CmpInstr in the presence of such instructions.
989 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
990 switch (Instr.getOpcode()) {
992 return AArch64CC::Invalid;
995 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
997 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1000 case AArch64::CSINVWr:
1001 case AArch64::CSINVXr:
1002 case AArch64::CSINCWr:
1003 case AArch64::CSINCXr:
1004 case AArch64::CSELWr:
1005 case AArch64::CSELXr:
1006 case AArch64::CSNEGWr:
1007 case AArch64::CSNEGXr:
1008 case AArch64::FCSELSrrr:
1009 case AArch64::FCSELDrrr: {
1010 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1012 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1017 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1018 assert(CC != AArch64CC::Invalid);
1024 case AArch64CC::EQ: // Z set
1025 case AArch64CC::NE: // Z clear
1029 case AArch64CC::HI: // Z clear and C set
1030 case AArch64CC::LS: // Z set or C clear
1032 case AArch64CC::HS: // C set
1033 case AArch64CC::LO: // C clear
1037 case AArch64CC::MI: // N set
1038 case AArch64CC::PL: // N clear
1042 case AArch64CC::VS: // V set
1043 case AArch64CC::VC: // V clear
1047 case AArch64CC::GT: // Z clear, N and V the same
1048 case AArch64CC::LE: // Z set, N and V differ
1050 case AArch64CC::GE: // N and V the same
1051 case AArch64CC::LT: // N and V differ
1059 static bool isADDSRegImm(unsigned Opcode) {
1060 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1063 static bool isSUBSRegImm(unsigned Opcode) {
1064 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1067 /// Check if CmpInstr can be substituted by MI.
1069 /// CmpInstr can be substituted:
1070 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1071 /// - and, MI and CmpInstr are from the same MachineBB
1072 /// - and, condition flags are not alive in successors of the CmpInstr parent
1073 /// - and, if MI opcode is the S form there must be no defs of flags between
1075 /// or if MI opcode is not the S form there must be neither defs of flags
1076 /// nor uses of flags between MI and CmpInstr.
1077 /// - and C/V flags are not used after CmpInstr
1078 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1079 const TargetRegisterInfo *TRI) {
1081 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1084 const unsigned CmpOpcode = CmpInstr->getOpcode();
1085 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1088 if (MI->getParent() != CmpInstr->getParent())
1091 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1094 AccessKind AccessToCheck = AK_Write;
1095 if (sForm(*MI) != MI->getOpcode())
1096 AccessToCheck = AK_All;
1097 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1100 UsedNZCV NZCVUsedAfterCmp;
1101 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1103 const MachineInstr &Instr = *I;
1104 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1105 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1106 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1108 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1111 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1115 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1118 /// Substitute an instruction comparing to zero with another instruction
1119 /// which produces needed condition flags.
1121 /// Return true on success.
1122 bool AArch64InstrInfo::substituteCmpToZero(
1123 MachineInstr &CmpInstr, unsigned SrcReg,
1124 const MachineRegisterInfo *MRI) const {
1126 // Get the unique definition of SrcReg.
1127 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1131 const TargetRegisterInfo *TRI = &getRegisterInfo();
1133 unsigned NewOpc = sForm(*MI);
1134 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1137 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1140 // Update the instruction to set NZCV.
1141 MI->setDesc(get(NewOpc));
1142 CmpInstr.eraseFromParent();
1143 bool succeeded = UpdateOperandRegClass(*MI);
1145 assert(succeeded && "Some operands reg class are incompatible!");
1146 MI->addRegisterDefined(AArch64::NZCV, TRI);
1150 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1151 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1154 MachineBasicBlock &MBB = *MI.getParent();
1155 DebugLoc DL = MI.getDebugLoc();
1156 unsigned Reg = MI.getOperand(0).getReg();
1157 const GlobalValue *GV =
1158 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1159 const TargetMachine &TM = MBB.getParent()->getTarget();
1160 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1161 const unsigned char MO_NC = AArch64II::MO_NC;
1163 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1164 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1165 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1166 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1167 .addReg(Reg, RegState::Kill)
1169 .addMemOperand(*MI.memoperands_begin());
1170 } else if (TM.getCodeModel() == CodeModel::Large) {
1171 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1172 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1173 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1174 .addReg(Reg, RegState::Kill)
1175 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1176 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1177 .addReg(Reg, RegState::Kill)
1178 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1179 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1180 .addReg(Reg, RegState::Kill)
1181 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1182 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1183 .addReg(Reg, RegState::Kill)
1185 .addMemOperand(*MI.memoperands_begin());
1187 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1188 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1189 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1190 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1191 .addReg(Reg, RegState::Kill)
1192 .addGlobalAddress(GV, 0, LoFlags)
1193 .addMemOperand(*MI.memoperands_begin());
1201 /// Return true if this is this instruction has a non-zero immediate
1202 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1203 switch (MI.getOpcode()) {
1206 case AArch64::ADDSWrs:
1207 case AArch64::ADDSXrs:
1208 case AArch64::ADDWrs:
1209 case AArch64::ADDXrs:
1210 case AArch64::ANDSWrs:
1211 case AArch64::ANDSXrs:
1212 case AArch64::ANDWrs:
1213 case AArch64::ANDXrs:
1214 case AArch64::BICSWrs:
1215 case AArch64::BICSXrs:
1216 case AArch64::BICWrs:
1217 case AArch64::BICXrs:
1218 case AArch64::CRC32Brr:
1219 case AArch64::CRC32CBrr:
1220 case AArch64::CRC32CHrr:
1221 case AArch64::CRC32CWrr:
1222 case AArch64::CRC32CXrr:
1223 case AArch64::CRC32Hrr:
1224 case AArch64::CRC32Wrr:
1225 case AArch64::CRC32Xrr:
1226 case AArch64::EONWrs:
1227 case AArch64::EONXrs:
1228 case AArch64::EORWrs:
1229 case AArch64::EORXrs:
1230 case AArch64::ORNWrs:
1231 case AArch64::ORNXrs:
1232 case AArch64::ORRWrs:
1233 case AArch64::ORRXrs:
1234 case AArch64::SUBSWrs:
1235 case AArch64::SUBSXrs:
1236 case AArch64::SUBWrs:
1237 case AArch64::SUBXrs:
1238 if (MI.getOperand(3).isImm()) {
1239 unsigned val = MI.getOperand(3).getImm();
1247 /// Return true if this is this instruction has a non-zero immediate
1248 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1249 switch (MI.getOpcode()) {
1252 case AArch64::ADDSWrx:
1253 case AArch64::ADDSXrx:
1254 case AArch64::ADDSXrx64:
1255 case AArch64::ADDWrx:
1256 case AArch64::ADDXrx:
1257 case AArch64::ADDXrx64:
1258 case AArch64::SUBSWrx:
1259 case AArch64::SUBSXrx:
1260 case AArch64::SUBSXrx64:
1261 case AArch64::SUBWrx:
1262 case AArch64::SUBXrx:
1263 case AArch64::SUBXrx64:
1264 if (MI.getOperand(3).isImm()) {
1265 unsigned val = MI.getOperand(3).getImm();
1274 // Return true if this instruction simply sets its single destination register
1275 // to zero. This is equivalent to a register rename of the zero-register.
1276 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1277 switch (MI.getOpcode()) {
1280 case AArch64::MOVZWi:
1281 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1282 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1283 assert(MI.getDesc().getNumOperands() == 3 &&
1284 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1288 case AArch64::ANDWri: // and Rd, Rzr, #imm
1289 return MI.getOperand(1).getReg() == AArch64::WZR;
1290 case AArch64::ANDXri:
1291 return MI.getOperand(1).getReg() == AArch64::XZR;
1292 case TargetOpcode::COPY:
1293 return MI.getOperand(1).getReg() == AArch64::WZR;
1298 // Return true if this instruction simply renames a general register without
1300 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1301 switch (MI.getOpcode()) {
1304 case TargetOpcode::COPY: {
1305 // GPR32 copies will by lowered to ORRXrs
1306 unsigned DstReg = MI.getOperand(0).getReg();
1307 return (AArch64::GPR32RegClass.contains(DstReg) ||
1308 AArch64::GPR64RegClass.contains(DstReg));
1310 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1311 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1312 assert(MI.getDesc().getNumOperands() == 4 &&
1313 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1317 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1318 if (MI.getOperand(2).getImm() == 0) {
1319 assert(MI.getDesc().getNumOperands() == 4 &&
1320 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1328 // Return true if this instruction simply renames a general register without
1330 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1331 switch (MI.getOpcode()) {
1334 case TargetOpcode::COPY: {
1335 // FPR64 copies will by lowered to ORR.16b
1336 unsigned DstReg = MI.getOperand(0).getReg();
1337 return (AArch64::FPR64RegClass.contains(DstReg) ||
1338 AArch64::FPR128RegClass.contains(DstReg));
1340 case AArch64::ORRv16i8:
1341 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1342 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1343 "invalid ORRv16i8 operands");
1351 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1352 int &FrameIndex) const {
1353 switch (MI.getOpcode()) {
1356 case AArch64::LDRWui:
1357 case AArch64::LDRXui:
1358 case AArch64::LDRBui:
1359 case AArch64::LDRHui:
1360 case AArch64::LDRSui:
1361 case AArch64::LDRDui:
1362 case AArch64::LDRQui:
1363 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1364 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1365 FrameIndex = MI.getOperand(1).getIndex();
1366 return MI.getOperand(0).getReg();
1374 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1375 int &FrameIndex) const {
1376 switch (MI.getOpcode()) {
1379 case AArch64::STRWui:
1380 case AArch64::STRXui:
1381 case AArch64::STRBui:
1382 case AArch64::STRHui:
1383 case AArch64::STRSui:
1384 case AArch64::STRDui:
1385 case AArch64::STRQui:
1386 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1387 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1388 FrameIndex = MI.getOperand(1).getIndex();
1389 return MI.getOperand(0).getReg();
1396 /// Return true if this is load/store scales or extends its register offset.
1397 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1398 /// MI should be a memory op that allows scaled addressing.
1399 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1400 switch (MI.getOpcode()) {
1403 case AArch64::LDRBBroW:
1404 case AArch64::LDRBroW:
1405 case AArch64::LDRDroW:
1406 case AArch64::LDRHHroW:
1407 case AArch64::LDRHroW:
1408 case AArch64::LDRQroW:
1409 case AArch64::LDRSBWroW:
1410 case AArch64::LDRSBXroW:
1411 case AArch64::LDRSHWroW:
1412 case AArch64::LDRSHXroW:
1413 case AArch64::LDRSWroW:
1414 case AArch64::LDRSroW:
1415 case AArch64::LDRWroW:
1416 case AArch64::LDRXroW:
1417 case AArch64::STRBBroW:
1418 case AArch64::STRBroW:
1419 case AArch64::STRDroW:
1420 case AArch64::STRHHroW:
1421 case AArch64::STRHroW:
1422 case AArch64::STRQroW:
1423 case AArch64::STRSroW:
1424 case AArch64::STRWroW:
1425 case AArch64::STRXroW:
1426 case AArch64::LDRBBroX:
1427 case AArch64::LDRBroX:
1428 case AArch64::LDRDroX:
1429 case AArch64::LDRHHroX:
1430 case AArch64::LDRHroX:
1431 case AArch64::LDRQroX:
1432 case AArch64::LDRSBWroX:
1433 case AArch64::LDRSBXroX:
1434 case AArch64::LDRSHWroX:
1435 case AArch64::LDRSHXroX:
1436 case AArch64::LDRSWroX:
1437 case AArch64::LDRSroX:
1438 case AArch64::LDRWroX:
1439 case AArch64::LDRXroX:
1440 case AArch64::STRBBroX:
1441 case AArch64::STRBroX:
1442 case AArch64::STRDroX:
1443 case AArch64::STRHHroX:
1444 case AArch64::STRHroX:
1445 case AArch64::STRQroX:
1446 case AArch64::STRSroX:
1447 case AArch64::STRWroX:
1448 case AArch64::STRXroX:
1450 unsigned Val = MI.getOperand(3).getImm();
1451 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1452 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1457 /// Check all MachineMemOperands for a hint to suppress pairing.
1458 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
1459 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1460 return MMO->getFlags() & MOSuppressPair;
1464 /// Set a flag on the first MachineMemOperand to suppress pairing.
1465 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1466 if (MI.memoperands_empty())
1468 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1471 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1475 case AArch64::STURSi:
1476 case AArch64::STURDi:
1477 case AArch64::STURQi:
1478 case AArch64::STURBBi:
1479 case AArch64::STURHHi:
1480 case AArch64::STURWi:
1481 case AArch64::STURXi:
1482 case AArch64::LDURSi:
1483 case AArch64::LDURDi:
1484 case AArch64::LDURQi:
1485 case AArch64::LDURWi:
1486 case AArch64::LDURXi:
1487 case AArch64::LDURSWi:
1488 case AArch64::LDURHHi:
1489 case AArch64::LDURBBi:
1490 case AArch64::LDURSBWi:
1491 case AArch64::LDURSHWi:
1496 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1497 return isUnscaledLdSt(MI.getOpcode());
1500 // Is this a candidate for ld/st merging or pairing? For example, we don't
1501 // touch volatiles or load/stores that have a hint to avoid pair formation.
1502 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1503 // If this is a volatile load/store, don't mess with it.
1504 if (MI.hasOrderedMemoryRef())
1507 // Make sure this is a reg+imm (as opposed to an address reloc).
1508 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1509 if (!MI.getOperand(2).isImm())
1512 // Can't merge/pair if the instruction modifies the base register.
1513 // e.g., ldr x0, [x0]
1514 unsigned BaseReg = MI.getOperand(1).getReg();
1515 const TargetRegisterInfo *TRI = &getRegisterInfo();
1516 if (MI.modifiesRegister(BaseReg, TRI))
1519 // Check if this load/store has a hint to avoid pair formation.
1520 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1521 if (isLdStPairSuppressed(MI))
1524 // On some CPUs quad load/store pairs are slower than two single load/stores.
1525 if (Subtarget.avoidQuadLdStPairs()) {
1526 switch (MI.getOpcode()) {
1530 case AArch64::LDURQi:
1531 case AArch64::STURQi:
1532 case AArch64::LDRQui:
1533 case AArch64::STRQui:
1541 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1542 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1543 const TargetRegisterInfo *TRI) const {
1544 switch (LdSt.getOpcode()) {
1547 // Scaled instructions.
1548 case AArch64::STRSui:
1549 case AArch64::STRDui:
1550 case AArch64::STRQui:
1551 case AArch64::STRXui:
1552 case AArch64::STRWui:
1553 case AArch64::LDRSui:
1554 case AArch64::LDRDui:
1555 case AArch64::LDRQui:
1556 case AArch64::LDRXui:
1557 case AArch64::LDRWui:
1558 case AArch64::LDRSWui:
1559 // Unscaled instructions.
1560 case AArch64::STURSi:
1561 case AArch64::STURDi:
1562 case AArch64::STURQi:
1563 case AArch64::STURXi:
1564 case AArch64::STURWi:
1565 case AArch64::LDURSi:
1566 case AArch64::LDURDi:
1567 case AArch64::LDURQi:
1568 case AArch64::LDURWi:
1569 case AArch64::LDURXi:
1570 case AArch64::LDURSWi:
1572 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1576 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
1577 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1578 const TargetRegisterInfo *TRI) const {
1579 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1580 // Handle only loads/stores with base register followed by immediate offset.
1581 if (LdSt.getNumExplicitOperands() == 3) {
1582 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1583 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1585 } else if (LdSt.getNumExplicitOperands() == 4) {
1586 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1587 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1588 !LdSt.getOperand(3).isImm())
1593 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1594 // Unscaled instructions have scaling factor set to 1.
1596 switch (LdSt.getOpcode()) {
1599 case AArch64::LDURQi:
1600 case AArch64::STURQi:
1604 case AArch64::LDURXi:
1605 case AArch64::LDURDi:
1606 case AArch64::STURXi:
1607 case AArch64::STURDi:
1611 case AArch64::LDURWi:
1612 case AArch64::LDURSi:
1613 case AArch64::LDURSWi:
1614 case AArch64::STURWi:
1615 case AArch64::STURSi:
1619 case AArch64::LDURHi:
1620 case AArch64::LDURHHi:
1621 case AArch64::LDURSHXi:
1622 case AArch64::LDURSHWi:
1623 case AArch64::STURHi:
1624 case AArch64::STURHHi:
1628 case AArch64::LDURBi:
1629 case AArch64::LDURBBi:
1630 case AArch64::LDURSBXi:
1631 case AArch64::LDURSBWi:
1632 case AArch64::STURBi:
1633 case AArch64::STURBBi:
1637 case AArch64::LDPQi:
1638 case AArch64::LDNPQi:
1639 case AArch64::STPQi:
1640 case AArch64::STNPQi:
1644 case AArch64::LDRQui:
1645 case AArch64::STRQui:
1648 case AArch64::LDPXi:
1649 case AArch64::LDPDi:
1650 case AArch64::LDNPXi:
1651 case AArch64::LDNPDi:
1652 case AArch64::STPXi:
1653 case AArch64::STPDi:
1654 case AArch64::STNPXi:
1655 case AArch64::STNPDi:
1659 case AArch64::LDRXui:
1660 case AArch64::LDRDui:
1661 case AArch64::STRXui:
1662 case AArch64::STRDui:
1665 case AArch64::LDPWi:
1666 case AArch64::LDPSi:
1667 case AArch64::LDNPWi:
1668 case AArch64::LDNPSi:
1669 case AArch64::STPWi:
1670 case AArch64::STPSi:
1671 case AArch64::STNPWi:
1672 case AArch64::STNPSi:
1676 case AArch64::LDRWui:
1677 case AArch64::LDRSui:
1678 case AArch64::LDRSWui:
1679 case AArch64::STRWui:
1680 case AArch64::STRSui:
1683 case AArch64::LDRHui:
1684 case AArch64::LDRHHui:
1685 case AArch64::STRHui:
1686 case AArch64::STRHHui:
1689 case AArch64::LDRBui:
1690 case AArch64::LDRBBui:
1691 case AArch64::STRBui:
1692 case AArch64::STRBBui:
1697 if (LdSt.getNumExplicitOperands() == 3) {
1698 BaseReg = LdSt.getOperand(1).getReg();
1699 Offset = LdSt.getOperand(2).getImm() * Scale;
1701 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1702 BaseReg = LdSt.getOperand(2).getReg();
1703 Offset = LdSt.getOperand(3).getImm() * Scale;
1708 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
1710 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1711 unsigned OffsetStride = 1;
1715 case AArch64::LDURQi:
1716 case AArch64::STURQi:
1719 case AArch64::LDURXi:
1720 case AArch64::LDURDi:
1721 case AArch64::STURXi:
1722 case AArch64::STURDi:
1725 case AArch64::LDURWi:
1726 case AArch64::LDURSi:
1727 case AArch64::LDURSWi:
1728 case AArch64::STURWi:
1729 case AArch64::STURSi:
1733 // If the byte-offset isn't a multiple of the stride, we can't scale this
1735 if (Offset % OffsetStride != 0)
1738 // Convert the byte-offset used by unscaled into an "element" offset used
1739 // by the scaled pair load/store instructions.
1740 Offset /= OffsetStride;
1744 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1745 if (FirstOpc == SecondOpc)
1747 // We can also pair sign-ext and zero-ext instructions.
1751 case AArch64::LDRWui:
1752 case AArch64::LDURWi:
1753 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1754 case AArch64::LDRSWui:
1755 case AArch64::LDURSWi:
1756 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1758 // These instructions can't be paired based on their opcodes.
1762 /// Detect opportunities for ldp/stp formation.
1764 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
1765 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1766 MachineInstr &SecondLdSt,
1767 unsigned NumLoads) const {
1768 // Only cluster up to a single pair.
1772 // Can we pair these instructions based on their opcodes?
1773 unsigned FirstOpc = FirstLdSt.getOpcode();
1774 unsigned SecondOpc = SecondLdSt.getOpcode();
1775 if (!canPairLdStOpc(FirstOpc, SecondOpc))
1778 // Can't merge volatiles or load/stores that have a hint to avoid pair
1779 // formation, for example.
1780 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1781 !isCandidateToMergeOrPair(SecondLdSt))
1784 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1785 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
1786 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1789 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
1790 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1793 // Pairwise instructions have a 7-bit signed offset field.
1794 if (Offset1 > 63 || Offset1 < -64)
1797 // The caller should already have ordered First/SecondLdSt by offset.
1798 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1799 return Offset1 + 1 == Offset2;
1802 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1803 MachineInstr &Second) const {
1804 if (Subtarget.hasMacroOpFusion()) {
1805 // Fuse CMN, CMP, TST followed by Bcc.
1806 unsigned SecondOpcode = Second.getOpcode();
1807 if (SecondOpcode == AArch64::Bcc) {
1808 switch (First.getOpcode()) {
1811 case AArch64::SUBSWri:
1812 case AArch64::ADDSWri:
1813 case AArch64::ANDSWri:
1814 case AArch64::SUBSXri:
1815 case AArch64::ADDSXri:
1816 case AArch64::ANDSXri:
1820 // Fuse ALU operations followed by CBZ/CBNZ.
1821 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1822 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1823 switch (First.getOpcode()) {
1826 case AArch64::ADDWri:
1827 case AArch64::ADDXri:
1828 case AArch64::ANDWri:
1829 case AArch64::ANDXri:
1830 case AArch64::EORWri:
1831 case AArch64::EORXri:
1832 case AArch64::ORRWri:
1833 case AArch64::ORRXri:
1834 case AArch64::SUBWri:
1835 case AArch64::SUBXri:
1843 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1844 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1845 const MDNode *Expr, const DebugLoc &DL) const {
1846 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1847 .addFrameIndex(FrameIx)
1855 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1856 unsigned Reg, unsigned SubIdx,
1858 const TargetRegisterInfo *TRI) {
1860 return MIB.addReg(Reg, State);
1862 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1863 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1864 return MIB.addReg(Reg, State, SubIdx);
1867 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1869 // We really want the positive remainder mod 32 here, that happens to be
1870 // easily obtainable with a mask.
1871 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1874 void AArch64InstrInfo::copyPhysRegTuple(
1875 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
1876 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1877 llvm::ArrayRef<unsigned> Indices) const {
1878 assert(Subtarget.hasNEON() &&
1879 "Unexpected register copy without NEON");
1880 const TargetRegisterInfo *TRI = &getRegisterInfo();
1881 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1882 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1883 unsigned NumRegs = Indices.size();
1885 int SubReg = 0, End = NumRegs, Incr = 1;
1886 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1887 SubReg = NumRegs - 1;
1892 for (; SubReg != End; SubReg += Incr) {
1893 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1894 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1895 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1896 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1900 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1901 MachineBasicBlock::iterator I,
1902 const DebugLoc &DL, unsigned DestReg,
1903 unsigned SrcReg, bool KillSrc) const {
1904 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1905 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1906 const TargetRegisterInfo *TRI = &getRegisterInfo();
1908 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1909 // If either operand is WSP, expand to ADD #0.
1910 if (Subtarget.hasZeroCycleRegMove()) {
1911 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1912 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1913 &AArch64::GPR64spRegClass);
1914 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1915 &AArch64::GPR64spRegClass);
1916 // This instruction is reading and writing X registers. This may upset
1917 // the register scavenger and machine verifier, so we need to indicate
1918 // that we are reading an undefined value from SrcRegX, but a proper
1919 // value from SrcReg.
1920 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1921 .addReg(SrcRegX, RegState::Undef)
1923 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1924 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1926 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1927 .addReg(SrcReg, getKillRegState(KillSrc))
1929 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1931 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1932 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1933 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1935 if (Subtarget.hasZeroCycleRegMove()) {
1936 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1937 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1938 &AArch64::GPR64spRegClass);
1939 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1940 &AArch64::GPR64spRegClass);
1941 // This instruction is reading and writing X registers. This may upset
1942 // the register scavenger and machine verifier, so we need to indicate
1943 // that we are reading an undefined value from SrcRegX, but a proper
1944 // value from SrcReg.
1945 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1946 .addReg(AArch64::XZR)
1947 .addReg(SrcRegX, RegState::Undef)
1948 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1950 // Otherwise, expand to ORR WZR.
1951 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1952 .addReg(AArch64::WZR)
1953 .addReg(SrcReg, getKillRegState(KillSrc));
1959 if (AArch64::GPR64spRegClass.contains(DestReg) &&
1960 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1961 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1962 // If either operand is SP, expand to ADD #0.
1963 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1964 .addReg(SrcReg, getKillRegState(KillSrc))
1966 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1967 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1968 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1969 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1971 // Otherwise, expand to ORR XZR.
1972 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1973 .addReg(AArch64::XZR)
1974 .addReg(SrcReg, getKillRegState(KillSrc));
1979 // Copy a DDDD register quad by copying the individual sub-registers.
1980 if (AArch64::DDDDRegClass.contains(DestReg) &&
1981 AArch64::DDDDRegClass.contains(SrcReg)) {
1982 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1983 AArch64::dsub2, AArch64::dsub3 };
1984 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1989 // Copy a DDD register triple by copying the individual sub-registers.
1990 if (AArch64::DDDRegClass.contains(DestReg) &&
1991 AArch64::DDDRegClass.contains(SrcReg)) {
1992 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1994 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1999 // Copy a DD register pair by copying the individual sub-registers.
2000 if (AArch64::DDRegClass.contains(DestReg) &&
2001 AArch64::DDRegClass.contains(SrcReg)) {
2002 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2003 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2008 // Copy a QQQQ register quad by copying the individual sub-registers.
2009 if (AArch64::QQQQRegClass.contains(DestReg) &&
2010 AArch64::QQQQRegClass.contains(SrcReg)) {
2011 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2012 AArch64::qsub2, AArch64::qsub3 };
2013 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2018 // Copy a QQQ register triple by copying the individual sub-registers.
2019 if (AArch64::QQQRegClass.contains(DestReg) &&
2020 AArch64::QQQRegClass.contains(SrcReg)) {
2021 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2023 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2028 // Copy a QQ register pair by copying the individual sub-registers.
2029 if (AArch64::QQRegClass.contains(DestReg) &&
2030 AArch64::QQRegClass.contains(SrcReg)) {
2031 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2032 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2037 if (AArch64::FPR128RegClass.contains(DestReg) &&
2038 AArch64::FPR128RegClass.contains(SrcReg)) {
2039 if(Subtarget.hasNEON()) {
2040 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2042 .addReg(SrcReg, getKillRegState(KillSrc));
2044 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2045 .addReg(AArch64::SP, RegState::Define)
2046 .addReg(SrcReg, getKillRegState(KillSrc))
2047 .addReg(AArch64::SP)
2049 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2050 .addReg(AArch64::SP, RegState::Define)
2051 .addReg(DestReg, RegState::Define)
2052 .addReg(AArch64::SP)
2058 if (AArch64::FPR64RegClass.contains(DestReg) &&
2059 AArch64::FPR64RegClass.contains(SrcReg)) {
2060 if(Subtarget.hasNEON()) {
2061 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2062 &AArch64::FPR128RegClass);
2063 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2064 &AArch64::FPR128RegClass);
2065 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2067 .addReg(SrcReg, getKillRegState(KillSrc));
2069 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2070 .addReg(SrcReg, getKillRegState(KillSrc));
2075 if (AArch64::FPR32RegClass.contains(DestReg) &&
2076 AArch64::FPR32RegClass.contains(SrcReg)) {
2077 if(Subtarget.hasNEON()) {
2078 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2079 &AArch64::FPR128RegClass);
2080 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2081 &AArch64::FPR128RegClass);
2082 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2084 .addReg(SrcReg, getKillRegState(KillSrc));
2086 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2087 .addReg(SrcReg, getKillRegState(KillSrc));
2092 if (AArch64::FPR16RegClass.contains(DestReg) &&
2093 AArch64::FPR16RegClass.contains(SrcReg)) {
2094 if(Subtarget.hasNEON()) {
2095 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2096 &AArch64::FPR128RegClass);
2097 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2098 &AArch64::FPR128RegClass);
2099 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2101 .addReg(SrcReg, getKillRegState(KillSrc));
2103 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2104 &AArch64::FPR32RegClass);
2105 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2106 &AArch64::FPR32RegClass);
2107 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2108 .addReg(SrcReg, getKillRegState(KillSrc));
2113 if (AArch64::FPR8RegClass.contains(DestReg) &&
2114 AArch64::FPR8RegClass.contains(SrcReg)) {
2115 if(Subtarget.hasNEON()) {
2116 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2117 &AArch64::FPR128RegClass);
2118 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2119 &AArch64::FPR128RegClass);
2120 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2122 .addReg(SrcReg, getKillRegState(KillSrc));
2124 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2125 &AArch64::FPR32RegClass);
2126 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2127 &AArch64::FPR32RegClass);
2128 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2129 .addReg(SrcReg, getKillRegState(KillSrc));
2134 // Copies between GPR64 and FPR64.
2135 if (AArch64::FPR64RegClass.contains(DestReg) &&
2136 AArch64::GPR64RegClass.contains(SrcReg)) {
2137 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2138 .addReg(SrcReg, getKillRegState(KillSrc));
2141 if (AArch64::GPR64RegClass.contains(DestReg) &&
2142 AArch64::FPR64RegClass.contains(SrcReg)) {
2143 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2144 .addReg(SrcReg, getKillRegState(KillSrc));
2147 // Copies between GPR32 and FPR32.
2148 if (AArch64::FPR32RegClass.contains(DestReg) &&
2149 AArch64::GPR32RegClass.contains(SrcReg)) {
2150 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2151 .addReg(SrcReg, getKillRegState(KillSrc));
2154 if (AArch64::GPR32RegClass.contains(DestReg) &&
2155 AArch64::FPR32RegClass.contains(SrcReg)) {
2156 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2157 .addReg(SrcReg, getKillRegState(KillSrc));
2161 if (DestReg == AArch64::NZCV) {
2162 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2163 BuildMI(MBB, I, DL, get(AArch64::MSR))
2164 .addImm(AArch64SysReg::NZCV)
2165 .addReg(SrcReg, getKillRegState(KillSrc))
2166 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2170 if (SrcReg == AArch64::NZCV) {
2171 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2172 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2173 .addImm(AArch64SysReg::NZCV)
2174 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2178 llvm_unreachable("unimplemented reg-to-reg copy");
2181 void AArch64InstrInfo::storeRegToStackSlot(
2182 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2183 bool isKill, int FI, const TargetRegisterClass *RC,
2184 const TargetRegisterInfo *TRI) const {
2186 if (MBBI != MBB.end())
2187 DL = MBBI->getDebugLoc();
2188 MachineFunction &MF = *MBB.getParent();
2189 MachineFrameInfo &MFI = *MF.getFrameInfo();
2190 unsigned Align = MFI.getObjectAlignment(FI);
2192 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2193 MachineMemOperand *MMO = MF.getMachineMemOperand(
2194 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2197 switch (RC->getSize()) {
2199 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2200 Opc = AArch64::STRBui;
2203 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2204 Opc = AArch64::STRHui;
2207 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2208 Opc = AArch64::STRWui;
2209 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2210 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2212 assert(SrcReg != AArch64::WSP);
2213 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2214 Opc = AArch64::STRSui;
2217 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2218 Opc = AArch64::STRXui;
2219 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2220 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2222 assert(SrcReg != AArch64::SP);
2223 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2224 Opc = AArch64::STRDui;
2227 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2228 Opc = AArch64::STRQui;
2229 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2230 assert(Subtarget.hasNEON() &&
2231 "Unexpected register store without NEON");
2232 Opc = AArch64::ST1Twov1d;
2237 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2238 assert(Subtarget.hasNEON() &&
2239 "Unexpected register store without NEON");
2240 Opc = AArch64::ST1Threev1d;
2245 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2246 assert(Subtarget.hasNEON() &&
2247 "Unexpected register store without NEON");
2248 Opc = AArch64::ST1Fourv1d;
2250 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2251 assert(Subtarget.hasNEON() &&
2252 "Unexpected register store without NEON");
2253 Opc = AArch64::ST1Twov2d;
2258 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2259 assert(Subtarget.hasNEON() &&
2260 "Unexpected register store without NEON");
2261 Opc = AArch64::ST1Threev2d;
2266 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2267 assert(Subtarget.hasNEON() &&
2268 "Unexpected register store without NEON");
2269 Opc = AArch64::ST1Fourv2d;
2274 assert(Opc && "Unknown register class");
2276 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2277 .addReg(SrcReg, getKillRegState(isKill))
2282 MI.addMemOperand(MMO);
2285 void AArch64InstrInfo::loadRegFromStackSlot(
2286 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2287 int FI, const TargetRegisterClass *RC,
2288 const TargetRegisterInfo *TRI) const {
2290 if (MBBI != MBB.end())
2291 DL = MBBI->getDebugLoc();
2292 MachineFunction &MF = *MBB.getParent();
2293 MachineFrameInfo &MFI = *MF.getFrameInfo();
2294 unsigned Align = MFI.getObjectAlignment(FI);
2295 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2296 MachineMemOperand *MMO = MF.getMachineMemOperand(
2297 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2301 switch (RC->getSize()) {
2303 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2304 Opc = AArch64::LDRBui;
2307 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2308 Opc = AArch64::LDRHui;
2311 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2312 Opc = AArch64::LDRWui;
2313 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2314 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2316 assert(DestReg != AArch64::WSP);
2317 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2318 Opc = AArch64::LDRSui;
2321 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2322 Opc = AArch64::LDRXui;
2323 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2324 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2326 assert(DestReg != AArch64::SP);
2327 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2328 Opc = AArch64::LDRDui;
2331 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2332 Opc = AArch64::LDRQui;
2333 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2334 assert(Subtarget.hasNEON() &&
2335 "Unexpected register load without NEON");
2336 Opc = AArch64::LD1Twov1d;
2341 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2342 assert(Subtarget.hasNEON() &&
2343 "Unexpected register load without NEON");
2344 Opc = AArch64::LD1Threev1d;
2349 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2350 assert(Subtarget.hasNEON() &&
2351 "Unexpected register load without NEON");
2352 Opc = AArch64::LD1Fourv1d;
2354 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2355 assert(Subtarget.hasNEON() &&
2356 "Unexpected register load without NEON");
2357 Opc = AArch64::LD1Twov2d;
2362 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2363 assert(Subtarget.hasNEON() &&
2364 "Unexpected register load without NEON");
2365 Opc = AArch64::LD1Threev2d;
2370 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2371 assert(Subtarget.hasNEON() &&
2372 "Unexpected register load without NEON");
2373 Opc = AArch64::LD1Fourv2d;
2378 assert(Opc && "Unknown register class");
2380 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2381 .addReg(DestReg, getDefRegState(true))
2385 MI.addMemOperand(MMO);
2388 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2389 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2390 unsigned DestReg, unsigned SrcReg, int Offset,
2391 const TargetInstrInfo *TII,
2392 MachineInstr::MIFlag Flag, bool SetNZCV) {
2393 if (DestReg == SrcReg && Offset == 0)
2396 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2397 "SP increment/decrement not 16-byte aligned");
2399 bool isSub = Offset < 0;
2403 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2404 // scratch register. If DestReg is a virtual register, use it as the
2405 // scratch register; otherwise, create a new virtual register (to be
2406 // replaced by the scavenger at the end of PEI). That case can be optimized
2407 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2408 // register can be loaded with offset%8 and the add/sub can use an extending
2409 // instruction with LSL#3.
2410 // Currently the function handles any offsets but generates a poor sequence
2412 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2416 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2418 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2419 const unsigned MaxEncoding = 0xfff;
2420 const unsigned ShiftSize = 12;
2421 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2422 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2424 if (((unsigned)Offset) > MaxEncodableValue) {
2425 ThisVal = MaxEncodableValue;
2427 ThisVal = Offset & MaxEncodableValue;
2429 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2430 "Encoding cannot handle value that big");
2431 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2433 .addImm(ThisVal >> ShiftSize)
2434 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2442 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2445 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2449 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2450 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2451 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2452 LiveIntervals *LIS) const {
2453 // This is a bit of a hack. Consider this instruction:
2455 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2457 // We explicitly chose GPR64all for the virtual register so such a copy might
2458 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2459 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2460 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2462 // To prevent that, we are going to constrain the %vreg0 register class here.
2464 // <rdar://problem/11522048>
2467 unsigned DstReg = MI.getOperand(0).getReg();
2468 unsigned SrcReg = MI.getOperand(1).getReg();
2469 if (SrcReg == AArch64::SP &&
2470 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2471 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2474 if (DstReg == AArch64::SP &&
2475 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2476 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2485 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2486 bool *OutUseUnscaledOp,
2487 unsigned *OutUnscaledOp,
2488 int *EmittableOffset) {
2490 bool IsSigned = false;
2491 // The ImmIdx should be changed case by case if it is not 2.
2492 unsigned ImmIdx = 2;
2493 unsigned UnscaledOp = 0;
2494 // Set output values in case of early exit.
2495 if (EmittableOffset)
2496 *EmittableOffset = 0;
2497 if (OutUseUnscaledOp)
2498 *OutUseUnscaledOp = false;
2501 switch (MI.getOpcode()) {
2503 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2504 // Vector spills/fills can't take an immediate offset.
2505 case AArch64::LD1Twov2d:
2506 case AArch64::LD1Threev2d:
2507 case AArch64::LD1Fourv2d:
2508 case AArch64::LD1Twov1d:
2509 case AArch64::LD1Threev1d:
2510 case AArch64::LD1Fourv1d:
2511 case AArch64::ST1Twov2d:
2512 case AArch64::ST1Threev2d:
2513 case AArch64::ST1Fourv2d:
2514 case AArch64::ST1Twov1d:
2515 case AArch64::ST1Threev1d:
2516 case AArch64::ST1Fourv1d:
2517 return AArch64FrameOffsetCannotUpdate;
2518 case AArch64::PRFMui:
2520 UnscaledOp = AArch64::PRFUMi;
2522 case AArch64::LDRXui:
2524 UnscaledOp = AArch64::LDURXi;
2526 case AArch64::LDRWui:
2528 UnscaledOp = AArch64::LDURWi;
2530 case AArch64::LDRBui:
2532 UnscaledOp = AArch64::LDURBi;
2534 case AArch64::LDRHui:
2536 UnscaledOp = AArch64::LDURHi;
2538 case AArch64::LDRSui:
2540 UnscaledOp = AArch64::LDURSi;
2542 case AArch64::LDRDui:
2544 UnscaledOp = AArch64::LDURDi;
2546 case AArch64::LDRQui:
2548 UnscaledOp = AArch64::LDURQi;
2550 case AArch64::LDRBBui:
2552 UnscaledOp = AArch64::LDURBBi;
2554 case AArch64::LDRHHui:
2556 UnscaledOp = AArch64::LDURHHi;
2558 case AArch64::LDRSBXui:
2560 UnscaledOp = AArch64::LDURSBXi;
2562 case AArch64::LDRSBWui:
2564 UnscaledOp = AArch64::LDURSBWi;
2566 case AArch64::LDRSHXui:
2568 UnscaledOp = AArch64::LDURSHXi;
2570 case AArch64::LDRSHWui:
2572 UnscaledOp = AArch64::LDURSHWi;
2574 case AArch64::LDRSWui:
2576 UnscaledOp = AArch64::LDURSWi;
2579 case AArch64::STRXui:
2581 UnscaledOp = AArch64::STURXi;
2583 case AArch64::STRWui:
2585 UnscaledOp = AArch64::STURWi;
2587 case AArch64::STRBui:
2589 UnscaledOp = AArch64::STURBi;
2591 case AArch64::STRHui:
2593 UnscaledOp = AArch64::STURHi;
2595 case AArch64::STRSui:
2597 UnscaledOp = AArch64::STURSi;
2599 case AArch64::STRDui:
2601 UnscaledOp = AArch64::STURDi;
2603 case AArch64::STRQui:
2605 UnscaledOp = AArch64::STURQi;
2607 case AArch64::STRBBui:
2609 UnscaledOp = AArch64::STURBBi;
2611 case AArch64::STRHHui:
2613 UnscaledOp = AArch64::STURHHi;
2616 case AArch64::LDPXi:
2617 case AArch64::LDPDi:
2618 case AArch64::STPXi:
2619 case AArch64::STPDi:
2620 case AArch64::LDNPXi:
2621 case AArch64::LDNPDi:
2622 case AArch64::STNPXi:
2623 case AArch64::STNPDi:
2628 case AArch64::LDPQi:
2629 case AArch64::STPQi:
2630 case AArch64::LDNPQi:
2631 case AArch64::STNPQi:
2636 case AArch64::LDPWi:
2637 case AArch64::LDPSi:
2638 case AArch64::STPWi:
2639 case AArch64::STPSi:
2640 case AArch64::LDNPWi:
2641 case AArch64::LDNPSi:
2642 case AArch64::STNPWi:
2643 case AArch64::STNPSi:
2649 case AArch64::LDURXi:
2650 case AArch64::LDURWi:
2651 case AArch64::LDURBi:
2652 case AArch64::LDURHi:
2653 case AArch64::LDURSi:
2654 case AArch64::LDURDi:
2655 case AArch64::LDURQi:
2656 case AArch64::LDURHHi:
2657 case AArch64::LDURBBi:
2658 case AArch64::LDURSBXi:
2659 case AArch64::LDURSBWi:
2660 case AArch64::LDURSHXi:
2661 case AArch64::LDURSHWi:
2662 case AArch64::LDURSWi:
2663 case AArch64::STURXi:
2664 case AArch64::STURWi:
2665 case AArch64::STURBi:
2666 case AArch64::STURHi:
2667 case AArch64::STURSi:
2668 case AArch64::STURDi:
2669 case AArch64::STURQi:
2670 case AArch64::STURBBi:
2671 case AArch64::STURHHi:
2676 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2678 bool useUnscaledOp = false;
2679 // If the offset doesn't match the scale, we rewrite the instruction to
2680 // use the unscaled instruction instead. Likewise, if we have a negative
2681 // offset (and have an unscaled op to use).
2682 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2683 useUnscaledOp = true;
2685 // Use an unscaled addressing mode if the instruction has a negative offset
2686 // (or if the instruction is already using an unscaled addressing mode).
2689 // ldp/stp instructions.
2692 } else if (UnscaledOp == 0 || useUnscaledOp) {
2702 // Attempt to fold address computation.
2703 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2704 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2705 if (Offset >= MinOff && Offset <= MaxOff) {
2706 if (EmittableOffset)
2707 *EmittableOffset = Offset;
2710 int NewOff = Offset < 0 ? MinOff : MaxOff;
2711 if (EmittableOffset)
2712 *EmittableOffset = NewOff;
2713 Offset = (Offset - NewOff) * Scale;
2715 if (OutUseUnscaledOp)
2716 *OutUseUnscaledOp = useUnscaledOp;
2718 *OutUnscaledOp = UnscaledOp;
2719 return AArch64FrameOffsetCanUpdate |
2720 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2723 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2724 unsigned FrameReg, int &Offset,
2725 const AArch64InstrInfo *TII) {
2726 unsigned Opcode = MI.getOpcode();
2727 unsigned ImmIdx = FrameRegIdx + 1;
2729 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2730 Offset += MI.getOperand(ImmIdx).getImm();
2731 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2732 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2733 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2734 MI.eraseFromParent();
2740 unsigned UnscaledOp;
2742 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2743 &UnscaledOp, &NewOffset);
2744 if (Status & AArch64FrameOffsetCanUpdate) {
2745 if (Status & AArch64FrameOffsetIsLegal)
2746 // Replace the FrameIndex with FrameReg.
2747 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2749 MI.setDesc(TII->get(UnscaledOp));
2751 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2758 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2759 NopInst.setOpcode(AArch64::HINT);
2760 NopInst.addOperand(MCOperand::createImm(0));
2763 // AArch64 supports MachineCombiner.
2764 bool AArch64InstrInfo::useMachineCombiner() const {
2769 // True when Opc sets flag
2770 static bool isCombineInstrSettingFlag(unsigned Opc) {
2772 case AArch64::ADDSWrr:
2773 case AArch64::ADDSWri:
2774 case AArch64::ADDSXrr:
2775 case AArch64::ADDSXri:
2776 case AArch64::SUBSWrr:
2777 case AArch64::SUBSXrr:
2778 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2779 case AArch64::SUBSWri:
2780 case AArch64::SUBSXri:
2788 // 32b Opcodes that can be combined with a MUL
2789 static bool isCombineInstrCandidate32(unsigned Opc) {
2791 case AArch64::ADDWrr:
2792 case AArch64::ADDWri:
2793 case AArch64::SUBWrr:
2794 case AArch64::ADDSWrr:
2795 case AArch64::ADDSWri:
2796 case AArch64::SUBSWrr:
2797 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2798 case AArch64::SUBWri:
2799 case AArch64::SUBSWri:
2807 // 64b Opcodes that can be combined with a MUL
2808 static bool isCombineInstrCandidate64(unsigned Opc) {
2810 case AArch64::ADDXrr:
2811 case AArch64::ADDXri:
2812 case AArch64::SUBXrr:
2813 case AArch64::ADDSXrr:
2814 case AArch64::ADDSXri:
2815 case AArch64::SUBSXrr:
2816 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2817 case AArch64::SUBXri:
2818 case AArch64::SUBSXri:
2826 // FP Opcodes that can be combined with a FMUL
2827 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2828 switch (Inst.getOpcode()) {
2829 case AArch64::FADDSrr:
2830 case AArch64::FADDDrr:
2831 case AArch64::FADDv2f32:
2832 case AArch64::FADDv2f64:
2833 case AArch64::FADDv4f32:
2834 case AArch64::FSUBSrr:
2835 case AArch64::FSUBDrr:
2836 case AArch64::FSUBv2f32:
2837 case AArch64::FSUBv2f64:
2838 case AArch64::FSUBv4f32:
2839 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2846 // Opcodes that can be combined with a MUL
2847 static bool isCombineInstrCandidate(unsigned Opc) {
2848 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2852 // Utility routine that checks if \param MO is defined by an
2853 // \param CombineOpc instruction in the basic block \param MBB
2854 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2855 unsigned CombineOpc, unsigned ZeroReg = 0,
2856 bool CheckZeroReg = false) {
2857 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2858 MachineInstr *MI = nullptr;
2860 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2861 MI = MRI.getUniqueVRegDef(MO.getReg());
2862 // And it needs to be in the trace (otherwise, it won't have a depth).
2863 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
2865 // Must only used by the user we combine with.
2866 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2870 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2871 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2872 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2873 // The third input reg must be zero.
2874 if (MI->getOperand(3).getReg() != ZeroReg)
2882 // Is \param MO defined by an integer multiply and can be combined?
2883 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2884 unsigned MulOpc, unsigned ZeroReg) {
2885 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2889 // Is \param MO defined by a floating-point multiply and can be combined?
2890 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2892 return canCombine(MBB, MO, MulOpc);
2895 // TODO: There are many more machine instruction opcodes to match:
2896 // 1. Other data types (integer, vectors)
2897 // 2. Other math / logic operations (xor, or)
2898 // 3. Other forms of the same operation (intrinsics and other variants)
2899 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2900 switch (Inst.getOpcode()) {
2901 case AArch64::FADDDrr:
2902 case AArch64::FADDSrr:
2903 case AArch64::FADDv2f32:
2904 case AArch64::FADDv2f64:
2905 case AArch64::FADDv4f32:
2906 case AArch64::FMULDrr:
2907 case AArch64::FMULSrr:
2908 case AArch64::FMULX32:
2909 case AArch64::FMULX64:
2910 case AArch64::FMULXv2f32:
2911 case AArch64::FMULXv2f64:
2912 case AArch64::FMULXv4f32:
2913 case AArch64::FMULv2f32:
2914 case AArch64::FMULv2f64:
2915 case AArch64::FMULv4f32:
2916 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2922 /// Find instructions that can be turned into madd.
2923 static bool getMaddPatterns(MachineInstr &Root,
2924 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
2925 unsigned Opc = Root.getOpcode();
2926 MachineBasicBlock &MBB = *Root.getParent();
2929 if (!isCombineInstrCandidate(Opc))
2931 if (isCombineInstrSettingFlag(Opc)) {
2932 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2933 // When NZCV is live bail out.
2936 unsigned NewOpc = convertFlagSettingOpcode(Root);
2937 // When opcode can't change bail out.
2938 // CHECKME: do we miss any cases for opcode conversion?
2947 case AArch64::ADDWrr:
2948 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2949 "ADDWrr does not have register operands");
2950 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2952 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
2955 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2957 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
2961 case AArch64::ADDXrr:
2962 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2964 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
2967 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2969 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
2973 case AArch64::SUBWrr:
2974 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2976 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
2979 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2981 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
2985 case AArch64::SUBXrr:
2986 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2988 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
2991 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2993 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
2997 case AArch64::ADDWri:
2998 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3000 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3004 case AArch64::ADDXri:
3005 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3007 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3011 case AArch64::SUBWri:
3012 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3014 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3018 case AArch64::SUBXri:
3019 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3021 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3028 /// Floating-Point Support
3030 /// Find instructions that can be turned into madd.
3031 static bool getFMAPatterns(MachineInstr &Root,
3032 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3034 if (!isCombineInstrCandidateFP(Root))
3037 MachineBasicBlock &MBB = *Root.getParent();
3040 switch (Root.getOpcode()) {
3042 assert(false && "Unsupported FP instruction in combiner\n");
3044 case AArch64::FADDSrr:
3045 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3046 "FADDWrr does not have register operands");
3047 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3048 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3050 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3051 AArch64::FMULv1i32_indexed)) {
3052 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3055 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3056 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3058 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3059 AArch64::FMULv1i32_indexed)) {
3060 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3064 case AArch64::FADDDrr:
3065 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3066 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3068 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3069 AArch64::FMULv1i64_indexed)) {
3070 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3073 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3074 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3076 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3077 AArch64::FMULv1i64_indexed)) {
3078 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3082 case AArch64::FADDv2f32:
3083 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3084 AArch64::FMULv2i32_indexed)) {
3085 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3087 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3088 AArch64::FMULv2f32)) {
3089 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3092 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3093 AArch64::FMULv2i32_indexed)) {
3094 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3096 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3097 AArch64::FMULv2f32)) {
3098 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3102 case AArch64::FADDv2f64:
3103 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3104 AArch64::FMULv2i64_indexed)) {
3105 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3107 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3108 AArch64::FMULv2f64)) {
3109 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3112 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3113 AArch64::FMULv2i64_indexed)) {
3114 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3116 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3117 AArch64::FMULv2f64)) {
3118 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3122 case AArch64::FADDv4f32:
3123 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3124 AArch64::FMULv4i32_indexed)) {
3125 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3127 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3128 AArch64::FMULv4f32)) {
3129 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3132 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3133 AArch64::FMULv4i32_indexed)) {
3134 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3136 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3137 AArch64::FMULv4f32)) {
3138 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3143 case AArch64::FSUBSrr:
3144 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3145 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3148 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3149 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3151 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3152 AArch64::FMULv1i32_indexed)) {
3153 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3157 case AArch64::FSUBDrr:
3158 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3159 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3162 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3163 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3165 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3166 AArch64::FMULv1i64_indexed)) {
3167 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3171 case AArch64::FSUBv2f32:
3172 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3173 AArch64::FMULv2i32_indexed)) {
3174 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3176 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3177 AArch64::FMULv2f32)) {
3178 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3182 case AArch64::FSUBv2f64:
3183 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3184 AArch64::FMULv2i64_indexed)) {
3185 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3187 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3188 AArch64::FMULv2f64)) {
3189 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3193 case AArch64::FSUBv4f32:
3194 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3195 AArch64::FMULv4i32_indexed)) {
3196 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3198 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3199 AArch64::FMULv4f32)) {
3200 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3208 /// Return true when a code sequence can improve throughput. It
3209 /// should be called only for instructions in loops.
3210 /// \param Pattern - combiner pattern
3212 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3216 case MachineCombinerPattern::FMULADDS_OP1:
3217 case MachineCombinerPattern::FMULADDS_OP2:
3218 case MachineCombinerPattern::FMULSUBS_OP1:
3219 case MachineCombinerPattern::FMULSUBS_OP2:
3220 case MachineCombinerPattern::FMULADDD_OP1:
3221 case MachineCombinerPattern::FMULADDD_OP2:
3222 case MachineCombinerPattern::FMULSUBD_OP1:
3223 case MachineCombinerPattern::FMULSUBD_OP2:
3224 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3225 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3226 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3227 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3228 case MachineCombinerPattern::FMLAv2f32_OP2:
3229 case MachineCombinerPattern::FMLAv2f32_OP1:
3230 case MachineCombinerPattern::FMLAv2f64_OP1:
3231 case MachineCombinerPattern::FMLAv2f64_OP2:
3232 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3233 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3234 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3235 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3236 case MachineCombinerPattern::FMLAv4f32_OP1:
3237 case MachineCombinerPattern::FMLAv4f32_OP2:
3238 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3239 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3240 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3241 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3242 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3243 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3244 case MachineCombinerPattern::FMLSv2f32_OP2:
3245 case MachineCombinerPattern::FMLSv2f64_OP2:
3246 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3247 case MachineCombinerPattern::FMLSv4f32_OP2:
3249 } // end switch (Pattern)
3252 /// Return true when there is potentially a faster code sequence for an
3253 /// instruction chain ending in \p Root. All potential patterns are listed in
3254 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3255 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3257 bool AArch64InstrInfo::getMachineCombinerPatterns(
3259 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3261 if (getMaddPatterns(Root, Patterns))
3263 // Floating point patterns
3264 if (getFMAPatterns(Root, Patterns))
3267 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3270 enum class FMAInstKind { Default, Indexed, Accumulator };
3271 /// genFusedMultiply - Generate fused multiply instructions.
3272 /// This function supports both integer and floating point instructions.
3273 /// A typical example:
3276 /// ==> F|MADD R,A,B,C
3277 /// \param Root is the F|ADD instruction
3278 /// \param [out] InsInstrs is a vector of machine instructions and will
3279 /// contain the generated madd instruction
3280 /// \param IdxMulOpd is index of operand in Root that is the result of
3281 /// the F|MUL. In the example above IdxMulOpd is 1.
3282 /// \param MaddOpc the opcode fo the f|madd instruction
3283 static MachineInstr *
3284 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3285 const TargetInstrInfo *TII, MachineInstr &Root,
3286 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3287 unsigned MaddOpc, const TargetRegisterClass *RC,
3288 FMAInstKind kind = FMAInstKind::Default) {
3289 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3291 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3292 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3293 unsigned ResultReg = Root.getOperand(0).getReg();
3294 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3295 bool Src0IsKill = MUL->getOperand(1).isKill();
3296 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3297 bool Src1IsKill = MUL->getOperand(2).isKill();
3298 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3299 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3301 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3302 MRI.constrainRegClass(ResultReg, RC);
3303 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3304 MRI.constrainRegClass(SrcReg0, RC);
3305 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3306 MRI.constrainRegClass(SrcReg1, RC);
3307 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3308 MRI.constrainRegClass(SrcReg2, RC);
3310 MachineInstrBuilder MIB;
3311 if (kind == FMAInstKind::Default)
3312 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3313 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3314 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3315 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3316 else if (kind == FMAInstKind::Indexed)
3317 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3318 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3319 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3320 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3321 .addImm(MUL->getOperand(3).getImm());
3322 else if (kind == FMAInstKind::Accumulator)
3323 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3324 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3325 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3326 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3328 assert(false && "Invalid FMA instruction kind \n");
3329 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3330 InsInstrs.push_back(MIB);
3334 /// genMaddR - Generate madd instruction and combine mul and add using
3335 /// an extra virtual register
3336 /// Example - an ADD intermediate needs to be stored in a register:
3339 /// ==> ORR V, ZR, Imm
3340 /// ==> MADD R,A,B,V
3341 /// \param Root is the ADD instruction
3342 /// \param [out] InsInstrs is a vector of machine instructions and will
3343 /// contain the generated madd instruction
3344 /// \param IdxMulOpd is index of operand in Root that is the result of
3345 /// the MUL. In the example above IdxMulOpd is 1.
3346 /// \param MaddOpc the opcode fo the madd instruction
3347 /// \param VR is a virtual register that holds the value of an ADD operand
3348 /// (V in the example above).
3349 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3350 const TargetInstrInfo *TII, MachineInstr &Root,
3351 SmallVectorImpl<MachineInstr *> &InsInstrs,
3352 unsigned IdxMulOpd, unsigned MaddOpc,
3353 unsigned VR, const TargetRegisterClass *RC) {
3354 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3356 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3357 unsigned ResultReg = Root.getOperand(0).getReg();
3358 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3359 bool Src0IsKill = MUL->getOperand(1).isKill();
3360 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3361 bool Src1IsKill = MUL->getOperand(2).isKill();
3363 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3364 MRI.constrainRegClass(ResultReg, RC);
3365 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3366 MRI.constrainRegClass(SrcReg0, RC);
3367 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3368 MRI.constrainRegClass(SrcReg1, RC);
3369 if (TargetRegisterInfo::isVirtualRegister(VR))
3370 MRI.constrainRegClass(VR, RC);
3372 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3374 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3375 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3378 InsInstrs.push_back(MIB);
3382 /// When getMachineCombinerPatterns() finds potential patterns,
3383 /// this function generates the instructions that could replace the
3384 /// original code sequence
3385 void AArch64InstrInfo::genAlternativeCodeSequence(
3386 MachineInstr &Root, MachineCombinerPattern Pattern,
3387 SmallVectorImpl<MachineInstr *> &InsInstrs,
3388 SmallVectorImpl<MachineInstr *> &DelInstrs,
3389 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3390 MachineBasicBlock &MBB = *Root.getParent();
3391 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3392 MachineFunction &MF = *MBB.getParent();
3393 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3396 const TargetRegisterClass *RC;
3400 // Reassociate instructions.
3401 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3402 DelInstrs, InstrIdxForVirtReg);
3404 case MachineCombinerPattern::MULADDW_OP1:
3405 case MachineCombinerPattern::MULADDX_OP1:
3409 // --- Create(MADD);
3410 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3411 Opc = AArch64::MADDWrrr;
3412 RC = &AArch64::GPR32RegClass;
3414 Opc = AArch64::MADDXrrr;
3415 RC = &AArch64::GPR64RegClass;
3417 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3419 case MachineCombinerPattern::MULADDW_OP2:
3420 case MachineCombinerPattern::MULADDX_OP2:
3424 // --- Create(MADD);
3425 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3426 Opc = AArch64::MADDWrrr;
3427 RC = &AArch64::GPR32RegClass;
3429 Opc = AArch64::MADDXrrr;
3430 RC = &AArch64::GPR64RegClass;
3432 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3434 case MachineCombinerPattern::MULADDWI_OP1:
3435 case MachineCombinerPattern::MULADDXI_OP1: {
3438 // ==> ORR V, ZR, Imm
3440 // --- Create(MADD);
3441 const TargetRegisterClass *OrrRC;
3442 unsigned BitSize, OrrOpc, ZeroReg;
3443 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3444 OrrOpc = AArch64::ORRWri;
3445 OrrRC = &AArch64::GPR32spRegClass;
3447 ZeroReg = AArch64::WZR;
3448 Opc = AArch64::MADDWrrr;
3449 RC = &AArch64::GPR32RegClass;
3451 OrrOpc = AArch64::ORRXri;
3452 OrrRC = &AArch64::GPR64spRegClass;
3454 ZeroReg = AArch64::XZR;
3455 Opc = AArch64::MADDXrrr;
3456 RC = &AArch64::GPR64RegClass;
3458 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3459 uint64_t Imm = Root.getOperand(2).getImm();
3461 if (Root.getOperand(3).isImm()) {
3462 unsigned Val = Root.getOperand(3).getImm();
3465 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
3467 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3468 MachineInstrBuilder MIB1 =
3469 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3472 InsInstrs.push_back(MIB1);
3473 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3474 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3478 case MachineCombinerPattern::MULSUBW_OP1:
3479 case MachineCombinerPattern::MULSUBX_OP1: {
3483 // ==> MADD R,A,B,V // = -C + A*B
3484 // --- Create(MADD);
3485 const TargetRegisterClass *SubRC;
3486 unsigned SubOpc, ZeroReg;
3487 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
3488 SubOpc = AArch64::SUBWrr;
3489 SubRC = &AArch64::GPR32spRegClass;
3490 ZeroReg = AArch64::WZR;
3491 Opc = AArch64::MADDWrrr;
3492 RC = &AArch64::GPR32RegClass;
3494 SubOpc = AArch64::SUBXrr;
3495 SubRC = &AArch64::GPR64spRegClass;
3496 ZeroReg = AArch64::XZR;
3497 Opc = AArch64::MADDXrrr;
3498 RC = &AArch64::GPR64RegClass;
3500 unsigned NewVR = MRI.createVirtualRegister(SubRC);
3502 MachineInstrBuilder MIB1 =
3503 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
3505 .addOperand(Root.getOperand(2));
3506 InsInstrs.push_back(MIB1);
3507 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3508 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3511 case MachineCombinerPattern::MULSUBW_OP2:
3512 case MachineCombinerPattern::MULSUBX_OP2:
3515 // ==> MSUB R,A,B,C (computes C - A*B)
3516 // --- Create(MSUB);
3517 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
3518 Opc = AArch64::MSUBWrrr;
3519 RC = &AArch64::GPR32RegClass;
3521 Opc = AArch64::MSUBXrrr;
3522 RC = &AArch64::GPR64RegClass;
3524 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3526 case MachineCombinerPattern::MULSUBWI_OP1:
3527 case MachineCombinerPattern::MULSUBXI_OP1: {
3530 // ==> ORR V, ZR, -Imm
3531 // ==> MADD R,A,B,V // = -Imm + A*B
3532 // --- Create(MADD);
3533 const TargetRegisterClass *OrrRC;
3534 unsigned BitSize, OrrOpc, ZeroReg;
3535 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
3536 OrrOpc = AArch64::ORRWri;
3537 OrrRC = &AArch64::GPR32spRegClass;
3539 ZeroReg = AArch64::WZR;
3540 Opc = AArch64::MADDWrrr;
3541 RC = &AArch64::GPR32RegClass;
3543 OrrOpc = AArch64::ORRXri;
3544 OrrRC = &AArch64::GPR64spRegClass;
3546 ZeroReg = AArch64::XZR;
3547 Opc = AArch64::MADDXrrr;
3548 RC = &AArch64::GPR64RegClass;
3550 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3551 int Imm = Root.getOperand(2).getImm();
3552 if (Root.getOperand(3).isImm()) {
3553 unsigned Val = Root.getOperand(3).getImm();
3556 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
3558 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3559 MachineInstrBuilder MIB1 =
3560 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3563 InsInstrs.push_back(MIB1);
3564 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3565 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3569 // Floating Point Support
3570 case MachineCombinerPattern::FMULADDS_OP1:
3571 case MachineCombinerPattern::FMULADDD_OP1:
3575 // --- Create(MADD);
3576 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3577 Opc = AArch64::FMADDSrrr;
3578 RC = &AArch64::FPR32RegClass;
3580 Opc = AArch64::FMADDDrrr;
3581 RC = &AArch64::FPR64RegClass;
3583 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3585 case MachineCombinerPattern::FMULADDS_OP2:
3586 case MachineCombinerPattern::FMULADDD_OP2:
3589 // ==> FMADD R,A,B,C
3590 // --- Create(FMADD);
3591 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3592 Opc = AArch64::FMADDSrrr;
3593 RC = &AArch64::FPR32RegClass;
3595 Opc = AArch64::FMADDDrrr;
3596 RC = &AArch64::FPR64RegClass;
3598 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3601 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3602 Opc = AArch64::FMLAv1i32_indexed;
3603 RC = &AArch64::FPR32RegClass;
3604 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3605 FMAInstKind::Indexed);
3607 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3608 Opc = AArch64::FMLAv1i32_indexed;
3609 RC = &AArch64::FPR32RegClass;
3610 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3611 FMAInstKind::Indexed);
3614 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3615 Opc = AArch64::FMLAv1i64_indexed;
3616 RC = &AArch64::FPR64RegClass;
3617 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3618 FMAInstKind::Indexed);
3620 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3621 Opc = AArch64::FMLAv1i64_indexed;
3622 RC = &AArch64::FPR64RegClass;
3623 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3624 FMAInstKind::Indexed);
3627 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3628 case MachineCombinerPattern::FMLAv2f32_OP1:
3629 RC = &AArch64::FPR64RegClass;
3630 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3631 Opc = AArch64::FMLAv2i32_indexed;
3632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3633 FMAInstKind::Indexed);
3635 Opc = AArch64::FMLAv2f32;
3636 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3637 FMAInstKind::Accumulator);
3640 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3641 case MachineCombinerPattern::FMLAv2f32_OP2:
3642 RC = &AArch64::FPR64RegClass;
3643 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3644 Opc = AArch64::FMLAv2i32_indexed;
3645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3646 FMAInstKind::Indexed);
3648 Opc = AArch64::FMLAv2f32;
3649 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3650 FMAInstKind::Accumulator);
3654 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3655 case MachineCombinerPattern::FMLAv2f64_OP1:
3656 RC = &AArch64::FPR128RegClass;
3657 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3658 Opc = AArch64::FMLAv2i64_indexed;
3659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3660 FMAInstKind::Indexed);
3662 Opc = AArch64::FMLAv2f64;
3663 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3664 FMAInstKind::Accumulator);
3667 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3668 case MachineCombinerPattern::FMLAv2f64_OP2:
3669 RC = &AArch64::FPR128RegClass;
3670 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3671 Opc = AArch64::FMLAv2i64_indexed;
3672 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3673 FMAInstKind::Indexed);
3675 Opc = AArch64::FMLAv2f64;
3676 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3677 FMAInstKind::Accumulator);
3681 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3682 case MachineCombinerPattern::FMLAv4f32_OP1:
3683 RC = &AArch64::FPR128RegClass;
3684 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3685 Opc = AArch64::FMLAv4i32_indexed;
3686 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3687 FMAInstKind::Indexed);
3689 Opc = AArch64::FMLAv4f32;
3690 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3691 FMAInstKind::Accumulator);
3695 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3696 case MachineCombinerPattern::FMLAv4f32_OP2:
3697 RC = &AArch64::FPR128RegClass;
3698 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3699 Opc = AArch64::FMLAv4i32_indexed;
3700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3701 FMAInstKind::Indexed);
3703 Opc = AArch64::FMLAv4f32;
3704 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3705 FMAInstKind::Accumulator);
3709 case MachineCombinerPattern::FMULSUBS_OP1:
3710 case MachineCombinerPattern::FMULSUBD_OP1: {
3713 // ==> FNMSUB R,A,B,C // = -C + A*B
3714 // --- Create(FNMSUB);
3715 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3716 Opc = AArch64::FNMSUBSrrr;
3717 RC = &AArch64::FPR32RegClass;
3719 Opc = AArch64::FNMSUBDrrr;
3720 RC = &AArch64::FPR64RegClass;
3722 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3725 case MachineCombinerPattern::FMULSUBS_OP2:
3726 case MachineCombinerPattern::FMULSUBD_OP2: {
3729 // ==> FMSUB R,A,B,C (computes C - A*B)
3730 // --- Create(FMSUB);
3731 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3732 Opc = AArch64::FMSUBSrrr;
3733 RC = &AArch64::FPR32RegClass;
3735 Opc = AArch64::FMSUBDrrr;
3736 RC = &AArch64::FPR64RegClass;
3738 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3741 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3742 Opc = AArch64::FMLSv1i32_indexed;
3743 RC = &AArch64::FPR32RegClass;
3744 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3745 FMAInstKind::Indexed);
3748 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3749 Opc = AArch64::FMLSv1i64_indexed;
3750 RC = &AArch64::FPR64RegClass;
3751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3752 FMAInstKind::Indexed);
3755 case MachineCombinerPattern::FMLSv2f32_OP2:
3756 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3757 RC = &AArch64::FPR64RegClass;
3758 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3759 Opc = AArch64::FMLSv2i32_indexed;
3760 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3761 FMAInstKind::Indexed);
3763 Opc = AArch64::FMLSv2f32;
3764 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3765 FMAInstKind::Accumulator);
3769 case MachineCombinerPattern::FMLSv2f64_OP2:
3770 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3771 RC = &AArch64::FPR128RegClass;
3772 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3773 Opc = AArch64::FMLSv2i64_indexed;
3774 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3775 FMAInstKind::Indexed);
3777 Opc = AArch64::FMLSv2f64;
3778 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3779 FMAInstKind::Accumulator);
3783 case MachineCombinerPattern::FMLSv4f32_OP2:
3784 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3785 RC = &AArch64::FPR128RegClass;
3786 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3787 Opc = AArch64::FMLSv4i32_indexed;
3788 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3789 FMAInstKind::Indexed);
3791 Opc = AArch64::FMLSv4f32;
3792 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3793 FMAInstKind::Accumulator);
3797 } // end switch (Pattern)
3798 // Record MUL and ADD/SUB for deletion
3799 DelInstrs.push_back(MUL);
3800 DelInstrs.push_back(&Root);
3805 /// \brief Replace csincr-branch sequence by simple conditional branch
3809 /// csinc w9, wzr, wzr, <condition code>
3810 /// tbnz w9, #0, 0x44
3812 /// b.<inverted condition code>
3815 /// csinc w9, wzr, wzr, <condition code>
3816 /// tbz w9, #0, 0x44
3818 /// b.<condition code>
3820 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3821 /// compare's constant operand is power of 2.
3824 /// and w8, w8, #0x400
3827 /// tbnz w8, #10, L1
3829 /// \param MI Conditional Branch
3830 /// \return True when the simple conditional branch is generated
3832 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
3833 bool IsNegativeBranch = false;
3834 bool IsTestAndBranch = false;
3835 unsigned TargetBBInMI = 0;
3836 switch (MI.getOpcode()) {
3838 llvm_unreachable("Unknown branch instruction?");
3845 case AArch64::CBNZW:
3846 case AArch64::CBNZX:
3848 IsNegativeBranch = true;
3853 IsTestAndBranch = true;
3855 case AArch64::TBNZW:
3856 case AArch64::TBNZX:
3858 IsNegativeBranch = true;
3859 IsTestAndBranch = true;
3862 // So we increment a zero register and test for bits other
3863 // than bit 0? Conservatively bail out in case the verifier
3864 // missed this case.
3865 if (IsTestAndBranch && MI.getOperand(1).getImm())
3869 assert(MI.getParent() && "Incomplete machine instruciton\n");
3870 MachineBasicBlock *MBB = MI.getParent();
3871 MachineFunction *MF = MBB->getParent();
3872 MachineRegisterInfo *MRI = &MF->getRegInfo();
3873 unsigned VReg = MI.getOperand(0).getReg();
3874 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3877 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3879 // Look through COPY instructions to find definition.
3880 while (DefMI->isCopy()) {
3881 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3882 if (!MRI->hasOneNonDBGUse(CopyVReg))
3884 if (!MRI->hasOneDef(CopyVReg))
3886 DefMI = MRI->getVRegDef(CopyVReg);
3889 switch (DefMI->getOpcode()) {
3892 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3893 case AArch64::ANDWri:
3894 case AArch64::ANDXri: {
3895 if (IsTestAndBranch)
3897 if (DefMI->getParent() != MBB)
3899 if (!MRI->hasOneNonDBGUse(VReg))
3902 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
3903 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
3904 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
3905 if (!isPowerOf2_64(Mask))
3908 MachineOperand &MO = DefMI->getOperand(1);
3909 unsigned NewReg = MO.getReg();
3910 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3913 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3915 MachineBasicBlock &RefToMBB = *MBB;
3916 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3917 DebugLoc DL = MI.getDebugLoc();
3918 unsigned Imm = Log2_64(Mask);
3919 unsigned Opc = (Imm < 32)
3920 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3921 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
3922 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3926 // Register lives on to the CBZ now.
3927 MO.setIsKill(false);
3929 // For immediate smaller than 32, we need to use the 32-bit
3930 // variant (W) in all cases. Indeed the 64-bit variant does not
3931 // allow to encode them.
3932 // Therefore, if the input register is 64-bit, we need to take the
3934 if (!Is32Bit && Imm < 32)
3935 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
3936 MI.eraseFromParent();
3940 case AArch64::CSINCWr:
3941 case AArch64::CSINCXr: {
3942 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3943 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3944 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3945 DefMI->getOperand(2).getReg() == AArch64::XZR))
3948 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3951 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
3952 // Convert only when the condition code is not modified between
3953 // the CSINC and the branch. The CC may be used by other
3954 // instructions in between.
3955 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
3957 MachineBasicBlock &RefToMBB = *MBB;
3958 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
3959 DebugLoc DL = MI.getDebugLoc();
3960 if (IsNegativeBranch)
3961 CC = AArch64CC::getInvertedCondCode(CC);
3962 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
3963 MI.eraseFromParent();
3969 std::pair<unsigned, unsigned>
3970 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3971 const unsigned Mask = AArch64II::MO_FRAGMENT;
3972 return std::make_pair(TF & Mask, TF & ~Mask);
3975 ArrayRef<std::pair<unsigned, const char *>>
3976 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3977 using namespace AArch64II;
3978 static const std::pair<unsigned, const char *> TargetFlags[] = {
3979 {MO_PAGE, "aarch64-page"},
3980 {MO_PAGEOFF, "aarch64-pageoff"},
3981 {MO_G3, "aarch64-g3"},
3982 {MO_G2, "aarch64-g2"},
3983 {MO_G1, "aarch64-g1"},
3984 {MO_G0, "aarch64-g0"},
3985 {MO_HI12, "aarch64-hi12"}};
3986 return makeArrayRef(TargetFlags);
3989 ArrayRef<std::pair<unsigned, const char *>>
3990 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
3991 using namespace AArch64II;
3992 static const std::pair<unsigned, const char *> TargetFlags[] = {
3993 {MO_GOT, "aarch64-got"},
3994 {MO_NC, "aarch64-nc"},
3995 {MO_TLS, "aarch64-tls"}};
3996 return makeArrayRef(TargetFlags);