1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/LiveRegUnits.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/MachineModuleInfo.h"
32 #include "llvm/CodeGen/StackMaps.h"
33 #include "llvm/CodeGen/TargetRegisterInfo.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstrDesc.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetOptions.h"
54 #define GET_INSTRINFO_CTOR_DTOR
55 #include "AArch64GenInstrInfo.inc"
57 static cl::opt<unsigned> TBZDisplacementBits(
58 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
61 static cl::opt<unsigned> CBZDisplacementBits(
62 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
65 static cl::opt<unsigned>
66 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
71 RI(STI.getTargetTriple()), Subtarget(STI) {}
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
76 const MachineBasicBlock &MBB = *MI.getParent();
77 const MachineFunction *MF = MBB.getParent();
78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80 if (MI.getOpcode() == AArch64::INLINEASM)
81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
83 // FIXME: We currently only handle pseudoinstructions that don't get expanded
84 // before the assembly printer.
85 unsigned NumBytes = 0;
86 const MCInstrDesc &Desc = MI.getDesc();
87 switch (Desc.getOpcode()) {
89 // Anything not explicitly designated otherwise is a normal 4-byte insn.
92 case TargetOpcode::DBG_VALUE:
93 case TargetOpcode::EH_LABEL:
94 case TargetOpcode::IMPLICIT_DEF:
95 case TargetOpcode::KILL:
98 case TargetOpcode::STACKMAP:
99 // The upper bound for a stackmap intrinsic is the full length of its shadow
100 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
103 case TargetOpcode::PATCHPOINT:
104 // The size of the patchpoint intrinsic is the number of bytes requested
105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
108 case AArch64::TLSDESC_CALLSEQ:
109 // This gets lowered to an instruction sequence which takes 16 bytes
117 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
118 SmallVectorImpl<MachineOperand> &Cond) {
119 // Block ends with fall-through condbranch.
120 switch (LastInst->getOpcode()) {
122 llvm_unreachable("Unknown branch instruction?");
124 Target = LastInst->getOperand(1).getMBB();
125 Cond.push_back(LastInst->getOperand(0));
131 Target = LastInst->getOperand(1).getMBB();
132 Cond.push_back(MachineOperand::CreateImm(-1));
133 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
134 Cond.push_back(LastInst->getOperand(0));
140 Target = LastInst->getOperand(2).getMBB();
141 Cond.push_back(MachineOperand::CreateImm(-1));
142 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
143 Cond.push_back(LastInst->getOperand(0));
144 Cond.push_back(LastInst->getOperand(1));
148 static unsigned getBranchDisplacementBits(unsigned Opc) {
151 llvm_unreachable("unexpected opcode!");
158 return TBZDisplacementBits;
163 return CBZDisplacementBits;
165 return BCCDisplacementBits;
169 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
170 int64_t BrOffset) const {
171 unsigned Bits = getBranchDisplacementBits(BranchOp);
172 assert(Bits >= 3 && "max branch displacement must be enough to jump"
173 "over conditional branch expansion");
174 return isIntN(Bits, BrOffset / 4);
178 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
179 switch (MI.getOpcode()) {
181 llvm_unreachable("unexpected opcode!");
183 return MI.getOperand(0).getMBB();
188 return MI.getOperand(2).getMBB();
194 return MI.getOperand(1).getMBB();
199 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
200 MachineBasicBlock *&TBB,
201 MachineBasicBlock *&FBB,
202 SmallVectorImpl<MachineOperand> &Cond,
203 bool AllowModify) const {
204 // If the block has no terminators, it just falls into the block after it.
205 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
209 if (!isUnpredicatedTerminator(*I))
212 // Get the last instruction in the block.
213 MachineInstr *LastInst = &*I;
215 // If there is only one terminator instruction, process it.
216 unsigned LastOpc = LastInst->getOpcode();
217 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
218 if (isUncondBranchOpcode(LastOpc)) {
219 TBB = LastInst->getOperand(0).getMBB();
222 if (isCondBranchOpcode(LastOpc)) {
223 // Block ends with fall-through condbranch.
224 parseCondBranch(LastInst, TBB, Cond);
227 return true; // Can't handle indirect branch.
230 // Get the instruction before it if it is a terminator.
231 MachineInstr *SecondLastInst = &*I;
232 unsigned SecondLastOpc = SecondLastInst->getOpcode();
234 // If AllowModify is true and the block ends with two or more unconditional
235 // branches, delete all but the first unconditional branch.
236 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
237 while (isUncondBranchOpcode(SecondLastOpc)) {
238 LastInst->eraseFromParent();
239 LastInst = SecondLastInst;
240 LastOpc = LastInst->getOpcode();
241 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
242 // Return now the only terminator is an unconditional branch.
243 TBB = LastInst->getOperand(0).getMBB();
246 SecondLastInst = &*I;
247 SecondLastOpc = SecondLastInst->getOpcode();
252 // If there are three terminators, we don't know what sort of block this is.
253 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
256 // If the block ends with a B and a Bcc, handle it.
257 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
258 parseCondBranch(SecondLastInst, TBB, Cond);
259 FBB = LastInst->getOperand(0).getMBB();
263 // If the block ends with two unconditional branches, handle it. The second
264 // one is not executed, so remove it.
265 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266 TBB = SecondLastInst->getOperand(0).getMBB();
269 I->eraseFromParent();
273 // ...likewise if it ends with an indirect branch followed by an unconditional
275 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
278 I->eraseFromParent();
282 // Otherwise, can't handle this.
286 bool AArch64InstrInfo::reverseBranchCondition(
287 SmallVectorImpl<MachineOperand> &Cond) const {
288 if (Cond[0].getImm() != -1) {
290 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
291 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
293 // Folded compare-and-branch
294 switch (Cond[1].getImm()) {
296 llvm_unreachable("Unknown conditional branch!");
298 Cond[1].setImm(AArch64::CBNZW);
301 Cond[1].setImm(AArch64::CBZW);
304 Cond[1].setImm(AArch64::CBNZX);
307 Cond[1].setImm(AArch64::CBZX);
310 Cond[1].setImm(AArch64::TBNZW);
313 Cond[1].setImm(AArch64::TBZW);
316 Cond[1].setImm(AArch64::TBNZX);
319 Cond[1].setImm(AArch64::TBZX);
327 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
328 int *BytesRemoved) const {
329 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
333 if (!isUncondBranchOpcode(I->getOpcode()) &&
334 !isCondBranchOpcode(I->getOpcode()))
337 // Remove the branch.
338 I->eraseFromParent();
342 if (I == MBB.begin()) {
348 if (!isCondBranchOpcode(I->getOpcode())) {
354 // Remove the branch.
355 I->eraseFromParent();
362 void AArch64InstrInfo::instantiateCondBranch(
363 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
364 ArrayRef<MachineOperand> Cond) const {
365 if (Cond[0].getImm() != -1) {
367 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
369 // Folded compare-and-branch
370 // Note that we use addOperand instead of addReg to keep the flags.
371 const MachineInstrBuilder MIB =
372 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
374 MIB.addImm(Cond[3].getImm());
379 unsigned AArch64InstrInfo::insertBranch(
380 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
381 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
382 // Shouldn't be a fall through.
383 assert(TBB && "insertBranch must not be told to insert a fallthrough");
386 if (Cond.empty()) // Unconditional branch?
387 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
389 instantiateCondBranch(MBB, DL, TBB, Cond);
397 // Two-way conditional branch.
398 instantiateCondBranch(MBB, DL, TBB, Cond);
399 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
407 // Find the original register that VReg is copied from.
408 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
409 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
410 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
411 if (!DefMI->isFullCopy())
413 VReg = DefMI->getOperand(1).getReg();
418 // Determine if VReg is defined by an instruction that can be folded into a
419 // csel instruction. If so, return the folded opcode, and the replacement
421 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
422 unsigned *NewVReg = nullptr) {
423 VReg = removeCopies(MRI, VReg);
424 if (!TargetRegisterInfo::isVirtualRegister(VReg))
427 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
428 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
430 unsigned SrcOpNum = 0;
431 switch (DefMI->getOpcode()) {
432 case AArch64::ADDSXri:
433 case AArch64::ADDSWri:
434 // if NZCV is used, do not fold.
435 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
437 // fall-through to ADDXri and ADDWri.
439 case AArch64::ADDXri:
440 case AArch64::ADDWri:
441 // add x, 1 -> csinc.
442 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
443 DefMI->getOperand(3).getImm() != 0)
446 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
449 case AArch64::ORNXrr:
450 case AArch64::ORNWrr: {
451 // not x -> csinv, represented as orn dst, xzr, src.
452 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
453 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
456 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
460 case AArch64::SUBSXrr:
461 case AArch64::SUBSWrr:
462 // if NZCV is used, do not fold.
463 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
465 // fall-through to SUBXrr and SUBWrr.
467 case AArch64::SUBXrr:
468 case AArch64::SUBWrr: {
469 // neg x -> csneg, represented as sub dst, xzr, src.
470 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
471 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
474 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
480 assert(Opc && SrcOpNum && "Missing parameters");
483 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
487 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
488 ArrayRef<MachineOperand> Cond,
489 unsigned TrueReg, unsigned FalseReg,
490 int &CondCycles, int &TrueCycles,
491 int &FalseCycles) const {
492 // Check register classes.
493 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
494 const TargetRegisterClass *RC =
495 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
499 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
500 unsigned ExtraCondLat = Cond.size() != 1;
502 // GPRs are handled by csel.
503 // FIXME: Fold in x+1, -x, and ~x when applicable.
504 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
505 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
506 // Single-cycle csel, csinc, csinv, and csneg.
507 CondCycles = 1 + ExtraCondLat;
508 TrueCycles = FalseCycles = 1;
509 if (canFoldIntoCSel(MRI, TrueReg))
511 else if (canFoldIntoCSel(MRI, FalseReg))
516 // Scalar floating point is handled by fcsel.
517 // FIXME: Form fabs, fmin, and fmax when applicable.
518 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
519 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
520 CondCycles = 5 + ExtraCondLat;
521 TrueCycles = FalseCycles = 2;
529 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
530 MachineBasicBlock::iterator I,
531 const DebugLoc &DL, unsigned DstReg,
532 ArrayRef<MachineOperand> Cond,
533 unsigned TrueReg, unsigned FalseReg) const {
534 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
536 // Parse the condition code, see parseCondBranch() above.
537 AArch64CC::CondCode CC;
538 switch (Cond.size()) {
540 llvm_unreachable("Unknown condition opcode in Cond");
542 CC = AArch64CC::CondCode(Cond[0].getImm());
544 case 3: { // cbz/cbnz
545 // We must insert a compare against 0.
547 switch (Cond[1].getImm()) {
549 llvm_unreachable("Unknown branch opcode in Cond");
567 unsigned SrcReg = Cond[2].getReg();
569 // cmp reg, #0 is actually subs xzr, reg, #0.
570 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
571 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
576 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
577 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
584 case 4: { // tbz/tbnz
585 // We must insert a tst instruction.
586 switch (Cond[1].getImm()) {
588 llvm_unreachable("Unknown branch opcode in Cond");
598 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
599 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
600 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
601 .addReg(Cond[2].getReg())
603 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
605 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
606 .addReg(Cond[2].getReg())
608 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
614 const TargetRegisterClass *RC = nullptr;
615 bool TryFold = false;
616 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
617 RC = &AArch64::GPR64RegClass;
618 Opc = AArch64::CSELXr;
620 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
621 RC = &AArch64::GPR32RegClass;
622 Opc = AArch64::CSELWr;
624 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
625 RC = &AArch64::FPR64RegClass;
626 Opc = AArch64::FCSELDrrr;
627 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
628 RC = &AArch64::FPR32RegClass;
629 Opc = AArch64::FCSELSrrr;
631 assert(RC && "Unsupported regclass");
633 // Try folding simple instructions into the csel.
635 unsigned NewVReg = 0;
636 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
638 // The folded opcodes csinc, csinc and csneg apply the operation to
639 // FalseReg, so we need to invert the condition.
640 CC = AArch64CC::getInvertedCondCode(CC);
643 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
645 // Fold the operation. Leave any dead instructions for DCE to clean up.
649 // The extends the live range of NewVReg.
650 MRI.clearKillFlags(NewVReg);
654 // Pull all virtual register into the appropriate class.
655 MRI.constrainRegClass(TrueReg, RC);
656 MRI.constrainRegClass(FalseReg, RC);
659 BuildMI(MBB, I, DL, get(Opc), DstReg)
665 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
666 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
667 uint64_t Imm = MI.getOperand(1).getImm();
668 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
670 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
673 // FIXME: this implementation should be micro-architecture dependent, so a
674 // micro-architecture target hook should be introduced here in future.
675 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
676 if (!Subtarget.hasCustomCheapAsMoveHandling())
677 return MI.isAsCheapAsAMove();
678 if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
679 isExynosShiftLeftFast(MI))
682 switch (MI.getOpcode()) {
686 // add/sub on register without shift
687 case AArch64::ADDWri:
688 case AArch64::ADDXri:
689 case AArch64::SUBWri:
690 case AArch64::SUBXri:
691 return (MI.getOperand(3).getImm() == 0);
693 // logical ops on immediate
694 case AArch64::ANDWri:
695 case AArch64::ANDXri:
696 case AArch64::EORWri:
697 case AArch64::EORXri:
698 case AArch64::ORRWri:
699 case AArch64::ORRXri:
702 // logical ops on register without shift
703 case AArch64::ANDWrr:
704 case AArch64::ANDXrr:
705 case AArch64::BICWrr:
706 case AArch64::BICXrr:
707 case AArch64::EONWrr:
708 case AArch64::EONXrr:
709 case AArch64::EORWrr:
710 case AArch64::EORXrr:
711 case AArch64::ORNWrr:
712 case AArch64::ORNXrr:
713 case AArch64::ORRWrr:
714 case AArch64::ORRXrr:
717 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
718 // ORRXri, it is as cheap as MOV
719 case AArch64::MOVi32imm:
720 return canBeExpandedToORR(MI, 32);
721 case AArch64::MOVi64imm:
722 return canBeExpandedToORR(MI, 64);
724 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
726 case AArch64::FMOVH0:
727 case AArch64::FMOVS0:
728 case AArch64::FMOVD0:
729 return Subtarget.hasZeroCycleZeroing();
730 case TargetOpcode::COPY:
731 return (Subtarget.hasZeroCycleZeroing() &&
732 (MI.getOperand(1).getReg() == AArch64::WZR ||
733 MI.getOperand(1).getReg() == AArch64::XZR));
736 llvm_unreachable("Unknown opcode to check as cheap as a move!");
739 bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
741 AArch64_AM::ShiftExtendType Ext;
743 switch (MI.getOpcode()) {
748 case AArch64::ADDSWri:
749 case AArch64::ADDSXri:
750 case AArch64::ADDWri:
751 case AArch64::ADDXri:
752 case AArch64::SUBSWri:
753 case AArch64::SUBSXri:
754 case AArch64::SUBWri:
755 case AArch64::SUBXri:
759 case AArch64::ADDSWrs:
760 case AArch64::ADDSXrs:
761 case AArch64::ADDWrs:
762 case AArch64::ADDXrs:
763 case AArch64::ANDSWrs:
764 case AArch64::ANDSXrs:
765 case AArch64::ANDWrs:
766 case AArch64::ANDXrs:
767 case AArch64::BICSWrs:
768 case AArch64::BICSXrs:
769 case AArch64::BICWrs:
770 case AArch64::BICXrs:
771 case AArch64::EONWrs:
772 case AArch64::EONXrs:
773 case AArch64::EORWrs:
774 case AArch64::EORXrs:
775 case AArch64::ORNWrs:
776 case AArch64::ORNXrs:
777 case AArch64::ORRWrs:
778 case AArch64::ORRXrs:
779 case AArch64::SUBSWrs:
780 case AArch64::SUBSXrs:
781 case AArch64::SUBWrs:
782 case AArch64::SUBXrs:
783 Imm = MI.getOperand(3).getImm();
784 Shift = AArch64_AM::getShiftValue(Imm);
785 Ext = AArch64_AM::getShiftType(Imm);
786 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
789 case AArch64::ADDSWrx:
790 case AArch64::ADDSXrx:
791 case AArch64::ADDSXrx64:
792 case AArch64::ADDWrx:
793 case AArch64::ADDXrx:
794 case AArch64::ADDXrx64:
795 case AArch64::SUBSWrx:
796 case AArch64::SUBSXrx:
797 case AArch64::SUBSXrx64:
798 case AArch64::SUBWrx:
799 case AArch64::SUBXrx:
800 case AArch64::SUBXrx64:
801 Imm = MI.getOperand(3).getImm();
802 Shift = AArch64_AM::getArithShiftValue(Imm);
803 Ext = AArch64_AM::getArithExtendType(Imm);
804 return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
806 case AArch64::PRFMroW:
807 case AArch64::PRFMroX:
810 case AArch64::LDRBBroW:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRHHroW:
813 case AArch64::LDRHHroX:
814 case AArch64::LDRSBWroW:
815 case AArch64::LDRSBWroX:
816 case AArch64::LDRSBXroW:
817 case AArch64::LDRSBXroX:
818 case AArch64::LDRSHWroW:
819 case AArch64::LDRSHWroX:
820 case AArch64::LDRSHXroW:
821 case AArch64::LDRSHXroX:
822 case AArch64::LDRSWroW:
823 case AArch64::LDRSWroX:
824 case AArch64::LDRWroW:
825 case AArch64::LDRWroX:
826 case AArch64::LDRXroW:
827 case AArch64::LDRXroX:
829 case AArch64::LDRBroW:
830 case AArch64::LDRBroX:
831 case AArch64::LDRDroW:
832 case AArch64::LDRDroX:
833 case AArch64::LDRHroW:
834 case AArch64::LDRHroX:
835 case AArch64::LDRSroW:
836 case AArch64::LDRSroX:
839 case AArch64::STRBBroW:
840 case AArch64::STRBBroX:
841 case AArch64::STRHHroW:
842 case AArch64::STRHHroX:
843 case AArch64::STRWroW:
844 case AArch64::STRWroX:
845 case AArch64::STRXroW:
846 case AArch64::STRXroX:
848 case AArch64::STRBroW:
849 case AArch64::STRBroX:
850 case AArch64::STRDroW:
851 case AArch64::STRDroX:
852 case AArch64::STRHroW:
853 case AArch64::STRHroX:
854 case AArch64::STRSroW:
855 case AArch64::STRSroX:
856 Imm = MI.getOperand(3).getImm();
857 Ext = AArch64_AM::getMemExtendType(Imm);
858 return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
862 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
863 switch (MI.getOpcode()) {
867 case AArch64::ADDWrs:
868 case AArch64::ADDXrs:
869 case AArch64::ADDSWrs:
870 case AArch64::ADDSXrs: {
871 unsigned Imm = MI.getOperand(3).getImm();
872 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
875 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
878 case AArch64::ADDWrx:
879 case AArch64::ADDXrx:
880 case AArch64::ADDXrx64:
881 case AArch64::ADDSWrx:
882 case AArch64::ADDSXrx:
883 case AArch64::ADDSXrx64: {
884 unsigned Imm = MI.getOperand(3).getImm();
885 switch (AArch64_AM::getArithExtendType(Imm)) {
888 case AArch64_AM::UXTB:
889 case AArch64_AM::UXTH:
890 case AArch64_AM::UXTW:
891 case AArch64_AM::UXTX:
892 return AArch64_AM::getArithShiftValue(Imm) <= 4;
896 case AArch64::SUBWrs:
897 case AArch64::SUBSWrs: {
898 unsigned Imm = MI.getOperand(3).getImm();
899 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
900 return ShiftVal == 0 ||
901 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
904 case AArch64::SUBXrs:
905 case AArch64::SUBSXrs: {
906 unsigned Imm = MI.getOperand(3).getImm();
907 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
908 return ShiftVal == 0 ||
909 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
912 case AArch64::SUBWrx:
913 case AArch64::SUBXrx:
914 case AArch64::SUBXrx64:
915 case AArch64::SUBSWrx:
916 case AArch64::SUBSXrx:
917 case AArch64::SUBSXrx64: {
918 unsigned Imm = MI.getOperand(3).getImm();
919 switch (AArch64_AM::getArithExtendType(Imm)) {
922 case AArch64_AM::UXTB:
923 case AArch64_AM::UXTH:
924 case AArch64_AM::UXTW:
925 case AArch64_AM::UXTX:
926 return AArch64_AM::getArithShiftValue(Imm) == 0;
930 case AArch64::LDRBBroW:
931 case AArch64::LDRBBroX:
932 case AArch64::LDRBroW:
933 case AArch64::LDRBroX:
934 case AArch64::LDRDroW:
935 case AArch64::LDRDroX:
936 case AArch64::LDRHHroW:
937 case AArch64::LDRHHroX:
938 case AArch64::LDRHroW:
939 case AArch64::LDRHroX:
940 case AArch64::LDRQroW:
941 case AArch64::LDRQroX:
942 case AArch64::LDRSBWroW:
943 case AArch64::LDRSBWroX:
944 case AArch64::LDRSBXroW:
945 case AArch64::LDRSBXroX:
946 case AArch64::LDRSHWroW:
947 case AArch64::LDRSHWroX:
948 case AArch64::LDRSHXroW:
949 case AArch64::LDRSHXroX:
950 case AArch64::LDRSWroW:
951 case AArch64::LDRSWroX:
952 case AArch64::LDRSroW:
953 case AArch64::LDRSroX:
954 case AArch64::LDRWroW:
955 case AArch64::LDRWroX:
956 case AArch64::LDRXroW:
957 case AArch64::LDRXroX:
958 case AArch64::PRFMroW:
959 case AArch64::PRFMroX:
960 case AArch64::STRBBroW:
961 case AArch64::STRBBroX:
962 case AArch64::STRBroW:
963 case AArch64::STRBroX:
964 case AArch64::STRDroW:
965 case AArch64::STRDroX:
966 case AArch64::STRHHroW:
967 case AArch64::STRHHroX:
968 case AArch64::STRHroW:
969 case AArch64::STRHroX:
970 case AArch64::STRQroW:
971 case AArch64::STRQroX:
972 case AArch64::STRSroW:
973 case AArch64::STRSroX:
974 case AArch64::STRWroW:
975 case AArch64::STRWroX:
976 case AArch64::STRXroW:
977 case AArch64::STRXroX: {
978 unsigned IsSigned = MI.getOperand(3).getImm();
984 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
985 unsigned &SrcReg, unsigned &DstReg,
986 unsigned &SubIdx) const {
987 switch (MI.getOpcode()) {
990 case AArch64::SBFMXri: // aka sxtw
991 case AArch64::UBFMXri: // aka uxtw
992 // Check for the 32 -> 64 bit extension case, these instructions can do
994 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
996 // This is a signed or unsigned 32 -> 64 bit extension.
997 SrcReg = MI.getOperand(1).getReg();
998 DstReg = MI.getOperand(0).getReg();
999 SubIdx = AArch64::sub_32;
1004 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1005 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1006 const TargetRegisterInfo *TRI = &getRegisterInfo();
1007 unsigned BaseRegA = 0, BaseRegB = 0;
1008 int64_t OffsetA = 0, OffsetB = 0;
1009 unsigned WidthA = 0, WidthB = 0;
1011 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1012 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1014 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1015 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1018 // Retrieve the base register, offset from the base register and width. Width
1019 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1020 // base registers are identical, and the offset of a lower memory access +
1021 // the width doesn't overlap the offset of a higher memory access,
1022 // then the memory accesses are different.
1023 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1024 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1025 if (BaseRegA == BaseRegB) {
1026 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1027 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1028 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1029 if (LowOffset + LowWidth <= HighOffset)
1036 /// analyzeCompare - For a comparison instruction, return the source registers
1037 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1038 /// Return true if the comparison instruction can be analyzed.
1039 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1040 unsigned &SrcReg2, int &CmpMask,
1041 int &CmpValue) const {
1042 // The first operand can be a frame index where we'd normally expect a
1044 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1045 if (!MI.getOperand(1).isReg())
1048 switch (MI.getOpcode()) {
1051 case AArch64::SUBSWrr:
1052 case AArch64::SUBSWrs:
1053 case AArch64::SUBSWrx:
1054 case AArch64::SUBSXrr:
1055 case AArch64::SUBSXrs:
1056 case AArch64::SUBSXrx:
1057 case AArch64::ADDSWrr:
1058 case AArch64::ADDSWrs:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrr:
1061 case AArch64::ADDSXrs:
1062 case AArch64::ADDSXrx:
1063 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1064 SrcReg = MI.getOperand(1).getReg();
1065 SrcReg2 = MI.getOperand(2).getReg();
1069 case AArch64::SUBSWri:
1070 case AArch64::ADDSWri:
1071 case AArch64::SUBSXri:
1072 case AArch64::ADDSXri:
1073 SrcReg = MI.getOperand(1).getReg();
1076 // FIXME: In order to convert CmpValue to 0 or 1
1077 CmpValue = MI.getOperand(2).getImm() != 0;
1079 case AArch64::ANDSWri:
1080 case AArch64::ANDSXri:
1081 // ANDS does not use the same encoding scheme as the others xxxS
1083 SrcReg = MI.getOperand(1).getReg();
1086 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1087 // while the type of CmpValue is int. When converting uint64_t to int,
1088 // the high 32 bits of uint64_t will be lost.
1089 // In fact it causes a bug in spec2006-483.xalancbmk
1090 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1091 CmpValue = AArch64_AM::decodeLogicalImmediate(
1092 MI.getOperand(2).getImm(),
1093 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1100 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1101 MachineBasicBlock *MBB = Instr.getParent();
1102 assert(MBB && "Can't get MachineBasicBlock here");
1103 MachineFunction *MF = MBB->getParent();
1104 assert(MF && "Can't get MachineFunction here");
1105 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1106 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1107 MachineRegisterInfo *MRI = &MF->getRegInfo();
1109 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1111 MachineOperand &MO = Instr.getOperand(OpIdx);
1112 const TargetRegisterClass *OpRegCstraints =
1113 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1115 // If there's no constraint, there's nothing to do.
1116 if (!OpRegCstraints)
1118 // If the operand is a frame index, there's nothing to do here.
1119 // A frame index operand will resolve correctly during PEI.
1123 assert(MO.isReg() &&
1124 "Operand has register constraints without being a register!");
1126 unsigned Reg = MO.getReg();
1127 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1128 if (!OpRegCstraints->contains(Reg))
1130 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1131 !MRI->constrainRegClass(Reg, OpRegCstraints))
1138 /// \brief Return the opcode that does not set flags when possible - otherwise
1139 /// return the original opcode. The caller is responsible to do the actual
1140 /// substitution and legality checking.
1141 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1142 // Don't convert all compare instructions, because for some the zero register
1143 // encoding becomes the sp register.
1144 bool MIDefinesZeroReg = false;
1145 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1146 MIDefinesZeroReg = true;
1148 switch (MI.getOpcode()) {
1150 return MI.getOpcode();
1151 case AArch64::ADDSWrr:
1152 return AArch64::ADDWrr;
1153 case AArch64::ADDSWri:
1154 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1155 case AArch64::ADDSWrs:
1156 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1157 case AArch64::ADDSWrx:
1158 return AArch64::ADDWrx;
1159 case AArch64::ADDSXrr:
1160 return AArch64::ADDXrr;
1161 case AArch64::ADDSXri:
1162 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1163 case AArch64::ADDSXrs:
1164 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1165 case AArch64::ADDSXrx:
1166 return AArch64::ADDXrx;
1167 case AArch64::SUBSWrr:
1168 return AArch64::SUBWrr;
1169 case AArch64::SUBSWri:
1170 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1171 case AArch64::SUBSWrs:
1172 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1173 case AArch64::SUBSWrx:
1174 return AArch64::SUBWrx;
1175 case AArch64::SUBSXrr:
1176 return AArch64::SUBXrr;
1177 case AArch64::SUBSXri:
1178 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1179 case AArch64::SUBSXrs:
1180 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1181 case AArch64::SUBSXrx:
1182 return AArch64::SUBXrx;
1186 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1188 /// True when condition flags are accessed (either by writing or reading)
1189 /// on the instruction trace starting at From and ending at To.
1191 /// Note: If From and To are from different blocks it's assumed CC are accessed
1193 static bool areCFlagsAccessedBetweenInstrs(
1194 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1195 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1196 // Early exit if To is at the beginning of the BB.
1197 if (To == To->getParent()->begin())
1200 // Check whether the instructions are in the same basic block
1201 // If not, assume the condition flags might get modified somewhere.
1202 if (To->getParent() != From->getParent())
1205 // From must be above To.
1206 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1207 [From](MachineInstr &MI) {
1208 return MI.getIterator() == From;
1209 }) != To->getParent()->rend());
1211 // We iterate backward starting \p To until we hit \p From.
1212 for (--To; To != From; --To) {
1213 const MachineInstr &Instr = *To;
1215 if (((AccessToCheck & AK_Write) &&
1216 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1217 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1223 /// Try to optimize a compare instruction. A compare instruction is an
1224 /// instruction which produces AArch64::NZCV. It can be truly compare
1226 /// when there are no uses of its destination register.
1228 /// The following steps are tried in order:
1229 /// 1. Convert CmpInstr into an unconditional version.
1230 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1231 /// condition code or an instruction which can be converted into such an
1233 /// Only comparison with zero is supported.
1234 bool AArch64InstrInfo::optimizeCompareInstr(
1235 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1236 int CmpValue, const MachineRegisterInfo *MRI) const {
1237 assert(CmpInstr.getParent());
1240 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1241 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1242 if (DeadNZCVIdx != -1) {
1243 if (CmpInstr.definesRegister(AArch64::WZR) ||
1244 CmpInstr.definesRegister(AArch64::XZR)) {
1245 CmpInstr.eraseFromParent();
1248 unsigned Opc = CmpInstr.getOpcode();
1249 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1252 const MCInstrDesc &MCID = get(NewOpc);
1253 CmpInstr.setDesc(MCID);
1254 CmpInstr.RemoveOperand(DeadNZCVIdx);
1255 bool succeeded = UpdateOperandRegClass(CmpInstr);
1257 assert(succeeded && "Some operands reg class are incompatible!");
1261 // Continue only if we have a "ri" where immediate is zero.
1262 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1264 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1265 if (CmpValue != 0 || SrcReg2 != 0)
1268 // CmpInstr is a Compare instruction if destination register is not used.
1269 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1272 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1275 /// Get opcode of S version of Instr.
1276 /// If Instr is S version its opcode is returned.
1277 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1278 /// or we are not interested in it.
1279 static unsigned sForm(MachineInstr &Instr) {
1280 switch (Instr.getOpcode()) {
1282 return AArch64::INSTRUCTION_LIST_END;
1284 case AArch64::ADDSWrr:
1285 case AArch64::ADDSWri:
1286 case AArch64::ADDSXrr:
1287 case AArch64::ADDSXri:
1288 case AArch64::SUBSWrr:
1289 case AArch64::SUBSWri:
1290 case AArch64::SUBSXrr:
1291 case AArch64::SUBSXri:
1292 return Instr.getOpcode();
1294 case AArch64::ADDWrr:
1295 return AArch64::ADDSWrr;
1296 case AArch64::ADDWri:
1297 return AArch64::ADDSWri;
1298 case AArch64::ADDXrr:
1299 return AArch64::ADDSXrr;
1300 case AArch64::ADDXri:
1301 return AArch64::ADDSXri;
1302 case AArch64::ADCWr:
1303 return AArch64::ADCSWr;
1304 case AArch64::ADCXr:
1305 return AArch64::ADCSXr;
1306 case AArch64::SUBWrr:
1307 return AArch64::SUBSWrr;
1308 case AArch64::SUBWri:
1309 return AArch64::SUBSWri;
1310 case AArch64::SUBXrr:
1311 return AArch64::SUBSXrr;
1312 case AArch64::SUBXri:
1313 return AArch64::SUBSXri;
1314 case AArch64::SBCWr:
1315 return AArch64::SBCSWr;
1316 case AArch64::SBCXr:
1317 return AArch64::SBCSXr;
1318 case AArch64::ANDWri:
1319 return AArch64::ANDSWri;
1320 case AArch64::ANDXri:
1321 return AArch64::ANDSXri;
1325 /// Check if AArch64::NZCV should be alive in successors of MBB.
1326 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1327 for (auto *BB : MBB->successors())
1328 if (BB->isLiveIn(AArch64::NZCV))
1341 UsedNZCV() = default;
1343 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1344 this->N |= UsedFlags.N;
1345 this->Z |= UsedFlags.Z;
1346 this->C |= UsedFlags.C;
1347 this->V |= UsedFlags.V;
1352 } // end anonymous namespace
1354 /// Find a condition code used by the instruction.
1355 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1356 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1357 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1358 switch (Instr.getOpcode()) {
1360 return AArch64CC::Invalid;
1362 case AArch64::Bcc: {
1363 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1365 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1368 case AArch64::CSINVWr:
1369 case AArch64::CSINVXr:
1370 case AArch64::CSINCWr:
1371 case AArch64::CSINCXr:
1372 case AArch64::CSELWr:
1373 case AArch64::CSELXr:
1374 case AArch64::CSNEGWr:
1375 case AArch64::CSNEGXr:
1376 case AArch64::FCSELSrrr:
1377 case AArch64::FCSELDrrr: {
1378 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1380 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1385 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1386 assert(CC != AArch64CC::Invalid);
1392 case AArch64CC::EQ: // Z set
1393 case AArch64CC::NE: // Z clear
1397 case AArch64CC::HI: // Z clear and C set
1398 case AArch64CC::LS: // Z set or C clear
1401 case AArch64CC::HS: // C set
1402 case AArch64CC::LO: // C clear
1406 case AArch64CC::MI: // N set
1407 case AArch64CC::PL: // N clear
1411 case AArch64CC::VS: // V set
1412 case AArch64CC::VC: // V clear
1416 case AArch64CC::GT: // Z clear, N and V the same
1417 case AArch64CC::LE: // Z set, N and V differ
1420 case AArch64CC::GE: // N and V the same
1421 case AArch64CC::LT: // N and V differ
1429 static bool isADDSRegImm(unsigned Opcode) {
1430 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1433 static bool isSUBSRegImm(unsigned Opcode) {
1434 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1437 /// Check if CmpInstr can be substituted by MI.
1439 /// CmpInstr can be substituted:
1440 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1441 /// - and, MI and CmpInstr are from the same MachineBB
1442 /// - and, condition flags are not alive in successors of the CmpInstr parent
1443 /// - and, if MI opcode is the S form there must be no defs of flags between
1445 /// or if MI opcode is not the S form there must be neither defs of flags
1446 /// nor uses of flags between MI and CmpInstr.
1447 /// - and C/V flags are not used after CmpInstr
1448 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1449 const TargetRegisterInfo *TRI) {
1451 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1454 const unsigned CmpOpcode = CmpInstr->getOpcode();
1455 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1458 if (MI->getParent() != CmpInstr->getParent())
1461 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1464 AccessKind AccessToCheck = AK_Write;
1465 if (sForm(*MI) != MI->getOpcode())
1466 AccessToCheck = AK_All;
1467 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1470 UsedNZCV NZCVUsedAfterCmp;
1471 for (auto I = std::next(CmpInstr->getIterator()),
1472 E = CmpInstr->getParent()->instr_end();
1474 const MachineInstr &Instr = *I;
1475 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1476 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1477 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1479 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1482 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1486 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1489 /// Substitute an instruction comparing to zero with another instruction
1490 /// which produces needed condition flags.
1492 /// Return true on success.
1493 bool AArch64InstrInfo::substituteCmpToZero(
1494 MachineInstr &CmpInstr, unsigned SrcReg,
1495 const MachineRegisterInfo *MRI) const {
1497 // Get the unique definition of SrcReg.
1498 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1502 const TargetRegisterInfo *TRI = &getRegisterInfo();
1504 unsigned NewOpc = sForm(*MI);
1505 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1508 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1511 // Update the instruction to set NZCV.
1512 MI->setDesc(get(NewOpc));
1513 CmpInstr.eraseFromParent();
1514 bool succeeded = UpdateOperandRegClass(*MI);
1516 assert(succeeded && "Some operands reg class are incompatible!");
1517 MI->addRegisterDefined(AArch64::NZCV, TRI);
1521 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1522 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1525 MachineBasicBlock &MBB = *MI.getParent();
1526 DebugLoc DL = MI.getDebugLoc();
1527 unsigned Reg = MI.getOperand(0).getReg();
1528 const GlobalValue *GV =
1529 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1530 const TargetMachine &TM = MBB.getParent()->getTarget();
1531 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1532 const unsigned char MO_NC = AArch64II::MO_NC;
1534 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1535 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1536 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1537 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1538 .addReg(Reg, RegState::Kill)
1540 .addMemOperand(*MI.memoperands_begin());
1541 } else if (TM.getCodeModel() == CodeModel::Large) {
1542 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1543 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1545 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1546 .addReg(Reg, RegState::Kill)
1547 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1549 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1550 .addReg(Reg, RegState::Kill)
1551 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1553 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1554 .addReg(Reg, RegState::Kill)
1555 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1557 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1558 .addReg(Reg, RegState::Kill)
1560 .addMemOperand(*MI.memoperands_begin());
1562 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1563 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1564 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1565 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1566 .addReg(Reg, RegState::Kill)
1567 .addGlobalAddress(GV, 0, LoFlags)
1568 .addMemOperand(*MI.memoperands_begin());
1576 /// Return true if this is this instruction has a non-zero immediate
1577 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1578 switch (MI.getOpcode()) {
1581 case AArch64::ADDSWrs:
1582 case AArch64::ADDSXrs:
1583 case AArch64::ADDWrs:
1584 case AArch64::ADDXrs:
1585 case AArch64::ANDSWrs:
1586 case AArch64::ANDSXrs:
1587 case AArch64::ANDWrs:
1588 case AArch64::ANDXrs:
1589 case AArch64::BICSWrs:
1590 case AArch64::BICSXrs:
1591 case AArch64::BICWrs:
1592 case AArch64::BICXrs:
1593 case AArch64::EONWrs:
1594 case AArch64::EONXrs:
1595 case AArch64::EORWrs:
1596 case AArch64::EORXrs:
1597 case AArch64::ORNWrs:
1598 case AArch64::ORNXrs:
1599 case AArch64::ORRWrs:
1600 case AArch64::ORRXrs:
1601 case AArch64::SUBSWrs:
1602 case AArch64::SUBSXrs:
1603 case AArch64::SUBWrs:
1604 case AArch64::SUBXrs:
1605 if (MI.getOperand(3).isImm()) {
1606 unsigned val = MI.getOperand(3).getImm();
1614 /// Return true if this is this instruction has a non-zero immediate
1615 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1616 switch (MI.getOpcode()) {
1619 case AArch64::ADDSWrx:
1620 case AArch64::ADDSXrx:
1621 case AArch64::ADDSXrx64:
1622 case AArch64::ADDWrx:
1623 case AArch64::ADDXrx:
1624 case AArch64::ADDXrx64:
1625 case AArch64::SUBSWrx:
1626 case AArch64::SUBSXrx:
1627 case AArch64::SUBSXrx64:
1628 case AArch64::SUBWrx:
1629 case AArch64::SUBXrx:
1630 case AArch64::SUBXrx64:
1631 if (MI.getOperand(3).isImm()) {
1632 unsigned val = MI.getOperand(3).getImm();
1641 // Return true if this instruction simply sets its single destination register
1642 // to zero. This is equivalent to a register rename of the zero-register.
1643 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1644 switch (MI.getOpcode()) {
1647 case AArch64::MOVZWi:
1648 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1649 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1650 assert(MI.getDesc().getNumOperands() == 3 &&
1651 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1655 case AArch64::ANDWri: // and Rd, Rzr, #imm
1656 return MI.getOperand(1).getReg() == AArch64::WZR;
1657 case AArch64::ANDXri:
1658 return MI.getOperand(1).getReg() == AArch64::XZR;
1659 case TargetOpcode::COPY:
1660 return MI.getOperand(1).getReg() == AArch64::WZR;
1665 // Return true if this instruction simply renames a general register without
1667 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1668 switch (MI.getOpcode()) {
1671 case TargetOpcode::COPY: {
1672 // GPR32 copies will by lowered to ORRXrs
1673 unsigned DstReg = MI.getOperand(0).getReg();
1674 return (AArch64::GPR32RegClass.contains(DstReg) ||
1675 AArch64::GPR64RegClass.contains(DstReg));
1677 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1678 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1679 assert(MI.getDesc().getNumOperands() == 4 &&
1680 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1684 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1685 if (MI.getOperand(2).getImm() == 0) {
1686 assert(MI.getDesc().getNumOperands() == 4 &&
1687 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1695 // Return true if this instruction simply renames a general register without
1697 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1698 switch (MI.getOpcode()) {
1701 case TargetOpcode::COPY: {
1702 // FPR64 copies will by lowered to ORR.16b
1703 unsigned DstReg = MI.getOperand(0).getReg();
1704 return (AArch64::FPR64RegClass.contains(DstReg) ||
1705 AArch64::FPR128RegClass.contains(DstReg));
1707 case AArch64::ORRv16i8:
1708 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1709 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1710 "invalid ORRv16i8 operands");
1718 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1719 int &FrameIndex) const {
1720 switch (MI.getOpcode()) {
1723 case AArch64::LDRWui:
1724 case AArch64::LDRXui:
1725 case AArch64::LDRBui:
1726 case AArch64::LDRHui:
1727 case AArch64::LDRSui:
1728 case AArch64::LDRDui:
1729 case AArch64::LDRQui:
1730 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1731 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1732 FrameIndex = MI.getOperand(1).getIndex();
1733 return MI.getOperand(0).getReg();
1741 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1742 int &FrameIndex) const {
1743 switch (MI.getOpcode()) {
1746 case AArch64::STRWui:
1747 case AArch64::STRXui:
1748 case AArch64::STRBui:
1749 case AArch64::STRHui:
1750 case AArch64::STRSui:
1751 case AArch64::STRDui:
1752 case AArch64::STRQui:
1753 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1754 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1755 FrameIndex = MI.getOperand(1).getIndex();
1756 return MI.getOperand(0).getReg();
1763 /// Return true if this is load/store scales or extends its register offset.
1764 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1765 /// MI should be a memory op that allows scaled addressing.
1766 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1767 switch (MI.getOpcode()) {
1770 case AArch64::LDRBBroW:
1771 case AArch64::LDRBroW:
1772 case AArch64::LDRDroW:
1773 case AArch64::LDRHHroW:
1774 case AArch64::LDRHroW:
1775 case AArch64::LDRQroW:
1776 case AArch64::LDRSBWroW:
1777 case AArch64::LDRSBXroW:
1778 case AArch64::LDRSHWroW:
1779 case AArch64::LDRSHXroW:
1780 case AArch64::LDRSWroW:
1781 case AArch64::LDRSroW:
1782 case AArch64::LDRWroW:
1783 case AArch64::LDRXroW:
1784 case AArch64::STRBBroW:
1785 case AArch64::STRBroW:
1786 case AArch64::STRDroW:
1787 case AArch64::STRHHroW:
1788 case AArch64::STRHroW:
1789 case AArch64::STRQroW:
1790 case AArch64::STRSroW:
1791 case AArch64::STRWroW:
1792 case AArch64::STRXroW:
1793 case AArch64::LDRBBroX:
1794 case AArch64::LDRBroX:
1795 case AArch64::LDRDroX:
1796 case AArch64::LDRHHroX:
1797 case AArch64::LDRHroX:
1798 case AArch64::LDRQroX:
1799 case AArch64::LDRSBWroX:
1800 case AArch64::LDRSBXroX:
1801 case AArch64::LDRSHWroX:
1802 case AArch64::LDRSHXroX:
1803 case AArch64::LDRSWroX:
1804 case AArch64::LDRSroX:
1805 case AArch64::LDRWroX:
1806 case AArch64::LDRXroX:
1807 case AArch64::STRBBroX:
1808 case AArch64::STRBroX:
1809 case AArch64::STRDroX:
1810 case AArch64::STRHHroX:
1811 case AArch64::STRHroX:
1812 case AArch64::STRQroX:
1813 case AArch64::STRSroX:
1814 case AArch64::STRWroX:
1815 case AArch64::STRXroX:
1817 unsigned Val = MI.getOperand(3).getImm();
1818 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1819 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1824 /// Check all MachineMemOperands for a hint to suppress pairing.
1825 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
1826 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1827 return MMO->getFlags() & MOSuppressPair;
1831 /// Set a flag on the first MachineMemOperand to suppress pairing.
1832 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1833 if (MI.memoperands_empty())
1835 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1838 /// Check all MachineMemOperands for a hint that the load/store is strided.
1839 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
1840 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1841 return MMO->getFlags() & MOStridedAccess;
1845 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1849 case AArch64::STURSi:
1850 case AArch64::STURDi:
1851 case AArch64::STURQi:
1852 case AArch64::STURBBi:
1853 case AArch64::STURHHi:
1854 case AArch64::STURWi:
1855 case AArch64::STURXi:
1856 case AArch64::LDURSi:
1857 case AArch64::LDURDi:
1858 case AArch64::LDURQi:
1859 case AArch64::LDURWi:
1860 case AArch64::LDURXi:
1861 case AArch64::LDURSWi:
1862 case AArch64::LDURHHi:
1863 case AArch64::LDURBBi:
1864 case AArch64::LDURSBWi:
1865 case AArch64::LDURSHWi:
1870 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1871 return isUnscaledLdSt(MI.getOpcode());
1874 // Is this a candidate for ld/st merging or pairing? For example, we don't
1875 // touch volatiles or load/stores that have a hint to avoid pair formation.
1876 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1877 // If this is a volatile load/store, don't mess with it.
1878 if (MI.hasOrderedMemoryRef())
1881 // Make sure this is a reg+imm (as opposed to an address reloc).
1882 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1883 if (!MI.getOperand(2).isImm())
1886 // Can't merge/pair if the instruction modifies the base register.
1887 // e.g., ldr x0, [x0]
1888 unsigned BaseReg = MI.getOperand(1).getReg();
1889 const TargetRegisterInfo *TRI = &getRegisterInfo();
1890 if (MI.modifiesRegister(BaseReg, TRI))
1893 // Check if this load/store has a hint to avoid pair formation.
1894 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1895 if (isLdStPairSuppressed(MI))
1898 // On some CPUs quad load/store pairs are slower than two single load/stores.
1899 if (Subtarget.isPaired128Slow()) {
1900 switch (MI.getOpcode()) {
1903 case AArch64::LDURQi:
1904 case AArch64::STURQi:
1905 case AArch64::LDRQui:
1906 case AArch64::STRQui:
1914 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1915 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1916 const TargetRegisterInfo *TRI) const {
1918 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1921 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
1922 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1923 const TargetRegisterInfo *TRI) const {
1924 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1925 // Handle only loads/stores with base register followed by immediate offset.
1926 if (LdSt.getNumExplicitOperands() == 3) {
1927 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1928 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1930 } else if (LdSt.getNumExplicitOperands() == 4) {
1931 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1932 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1933 !LdSt.getOperand(3).isImm())
1938 // Get the scaling factor for the instruction and set the width for the
1941 int64_t Dummy1, Dummy2;
1943 // If this returns false, then it's an instruction we don't want to handle.
1944 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1947 // Compute the offset. Offset is calculated as the immediate operand
1948 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1950 if (LdSt.getNumExplicitOperands() == 3) {
1951 BaseReg = LdSt.getOperand(1).getReg();
1952 Offset = LdSt.getOperand(2).getImm() * Scale;
1954 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1955 BaseReg = LdSt.getOperand(2).getReg();
1956 Offset = LdSt.getOperand(3).getImm() * Scale;
1962 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1963 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1964 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1965 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1969 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1970 unsigned &Width, int64_t &MinOffset,
1971 int64_t &MaxOffset) const {
1973 // Not a memory operation or something we want to handle.
1976 MinOffset = MaxOffset = 0;
1978 case AArch64::STRWpost:
1979 case AArch64::LDRWpost:
1985 case AArch64::LDURQi:
1986 case AArch64::STURQi:
1992 case AArch64::LDURXi:
1993 case AArch64::LDURDi:
1994 case AArch64::STURXi:
1995 case AArch64::STURDi:
2001 case AArch64::LDURWi:
2002 case AArch64::LDURSi:
2003 case AArch64::LDURSWi:
2004 case AArch64::STURWi:
2005 case AArch64::STURSi:
2011 case AArch64::LDURHi:
2012 case AArch64::LDURHHi:
2013 case AArch64::LDURSHXi:
2014 case AArch64::LDURSHWi:
2015 case AArch64::STURHi:
2016 case AArch64::STURHHi:
2022 case AArch64::LDURBi:
2023 case AArch64::LDURBBi:
2024 case AArch64::LDURSBXi:
2025 case AArch64::LDURSBWi:
2026 case AArch64::STURBi:
2027 case AArch64::STURBBi:
2033 case AArch64::LDPQi:
2034 case AArch64::LDNPQi:
2035 case AArch64::STPQi:
2036 case AArch64::STNPQi:
2042 case AArch64::LDRQui:
2043 case AArch64::STRQui:
2048 case AArch64::LDPXi:
2049 case AArch64::LDPDi:
2050 case AArch64::LDNPXi:
2051 case AArch64::LDNPDi:
2052 case AArch64::STPXi:
2053 case AArch64::STPDi:
2054 case AArch64::STNPXi:
2055 case AArch64::STNPDi:
2061 case AArch64::LDRXui:
2062 case AArch64::LDRDui:
2063 case AArch64::STRXui:
2064 case AArch64::STRDui:
2069 case AArch64::LDPWi:
2070 case AArch64::LDPSi:
2071 case AArch64::LDNPWi:
2072 case AArch64::LDNPSi:
2073 case AArch64::STPWi:
2074 case AArch64::STPSi:
2075 case AArch64::STNPWi:
2076 case AArch64::STNPSi:
2082 case AArch64::LDRWui:
2083 case AArch64::LDRSui:
2084 case AArch64::LDRSWui:
2085 case AArch64::STRWui:
2086 case AArch64::STRSui:
2091 case AArch64::LDRHui:
2092 case AArch64::LDRHHui:
2093 case AArch64::STRHui:
2094 case AArch64::STRHHui:
2099 case AArch64::LDRBui:
2100 case AArch64::LDRBBui:
2101 case AArch64::STRBui:
2102 case AArch64::STRBBui:
2112 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2114 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2115 unsigned OffsetStride = 1;
2119 case AArch64::LDURQi:
2120 case AArch64::STURQi:
2123 case AArch64::LDURXi:
2124 case AArch64::LDURDi:
2125 case AArch64::STURXi:
2126 case AArch64::STURDi:
2129 case AArch64::LDURWi:
2130 case AArch64::LDURSi:
2131 case AArch64::LDURSWi:
2132 case AArch64::STURWi:
2133 case AArch64::STURSi:
2137 // If the byte-offset isn't a multiple of the stride, we can't scale this
2139 if (Offset % OffsetStride != 0)
2142 // Convert the byte-offset used by unscaled into an "element" offset used
2143 // by the scaled pair load/store instructions.
2144 Offset /= OffsetStride;
2148 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2149 if (FirstOpc == SecondOpc)
2151 // We can also pair sign-ext and zero-ext instructions.
2155 case AArch64::LDRWui:
2156 case AArch64::LDURWi:
2157 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2158 case AArch64::LDRSWui:
2159 case AArch64::LDURSWi:
2160 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2162 // These instructions can't be paired based on their opcodes.
2166 /// Detect opportunities for ldp/stp formation.
2168 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2169 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
2171 MachineInstr &SecondLdSt,
2173 unsigned NumLoads) const {
2174 if (BaseReg1 != BaseReg2)
2177 // Only cluster up to a single pair.
2181 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2184 // Can we pair these instructions based on their opcodes?
2185 unsigned FirstOpc = FirstLdSt.getOpcode();
2186 unsigned SecondOpc = SecondLdSt.getOpcode();
2187 if (!canPairLdStOpc(FirstOpc, SecondOpc))
2190 // Can't merge volatiles or load/stores that have a hint to avoid pair
2191 // formation, for example.
2192 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2193 !isCandidateToMergeOrPair(SecondLdSt))
2196 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2197 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2198 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2201 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2202 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2205 // Pairwise instructions have a 7-bit signed offset field.
2206 if (Offset1 > 63 || Offset1 < -64)
2209 // The caller should already have ordered First/SecondLdSt by offset.
2210 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2211 return Offset1 + 1 == Offset2;
2214 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2215 unsigned Reg, unsigned SubIdx,
2217 const TargetRegisterInfo *TRI) {
2219 return MIB.addReg(Reg, State);
2221 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2222 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2223 return MIB.addReg(Reg, State, SubIdx);
2226 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2228 // We really want the positive remainder mod 32 here, that happens to be
2229 // easily obtainable with a mask.
2230 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2233 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2234 MachineBasicBlock::iterator I,
2235 const DebugLoc &DL, unsigned DestReg,
2236 unsigned SrcReg, bool KillSrc,
2238 ArrayRef<unsigned> Indices) const {
2239 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2240 const TargetRegisterInfo *TRI = &getRegisterInfo();
2241 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2242 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2243 unsigned NumRegs = Indices.size();
2245 int SubReg = 0, End = NumRegs, Incr = 1;
2246 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2247 SubReg = NumRegs - 1;
2252 for (; SubReg != End; SubReg += Incr) {
2253 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2254 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2255 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2256 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2260 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2261 MachineBasicBlock::iterator I,
2262 const DebugLoc &DL, unsigned DestReg,
2263 unsigned SrcReg, bool KillSrc) const {
2264 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2265 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2266 const TargetRegisterInfo *TRI = &getRegisterInfo();
2268 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2269 // If either operand is WSP, expand to ADD #0.
2270 if (Subtarget.hasZeroCycleRegMove()) {
2271 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2272 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2273 &AArch64::GPR64spRegClass);
2274 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2275 &AArch64::GPR64spRegClass);
2276 // This instruction is reading and writing X registers. This may upset
2277 // the register scavenger and machine verifier, so we need to indicate
2278 // that we are reading an undefined value from SrcRegX, but a proper
2279 // value from SrcReg.
2280 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2281 .addReg(SrcRegX, RegState::Undef)
2283 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2284 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2286 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2287 .addReg(SrcReg, getKillRegState(KillSrc))
2289 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2291 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2292 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2294 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2296 if (Subtarget.hasZeroCycleRegMove()) {
2297 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2298 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2299 &AArch64::GPR64spRegClass);
2300 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2301 &AArch64::GPR64spRegClass);
2302 // This instruction is reading and writing X registers. This may upset
2303 // the register scavenger and machine verifier, so we need to indicate
2304 // that we are reading an undefined value from SrcRegX, but a proper
2305 // value from SrcReg.
2306 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2307 .addReg(AArch64::XZR)
2308 .addReg(SrcRegX, RegState::Undef)
2309 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2311 // Otherwise, expand to ORR WZR.
2312 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2313 .addReg(AArch64::WZR)
2314 .addReg(SrcReg, getKillRegState(KillSrc));
2320 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2321 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2322 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2323 // If either operand is SP, expand to ADD #0.
2324 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2325 .addReg(SrcReg, getKillRegState(KillSrc))
2327 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2328 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2329 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2331 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2333 // Otherwise, expand to ORR XZR.
2334 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2335 .addReg(AArch64::XZR)
2336 .addReg(SrcReg, getKillRegState(KillSrc));
2341 // Copy a DDDD register quad by copying the individual sub-registers.
2342 if (AArch64::DDDDRegClass.contains(DestReg) &&
2343 AArch64::DDDDRegClass.contains(SrcReg)) {
2344 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2345 AArch64::dsub2, AArch64::dsub3};
2346 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2351 // Copy a DDD register triple by copying the individual sub-registers.
2352 if (AArch64::DDDRegClass.contains(DestReg) &&
2353 AArch64::DDDRegClass.contains(SrcReg)) {
2354 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2356 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2361 // Copy a DD register pair by copying the individual sub-registers.
2362 if (AArch64::DDRegClass.contains(DestReg) &&
2363 AArch64::DDRegClass.contains(SrcReg)) {
2364 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2365 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2370 // Copy a QQQQ register quad by copying the individual sub-registers.
2371 if (AArch64::QQQQRegClass.contains(DestReg) &&
2372 AArch64::QQQQRegClass.contains(SrcReg)) {
2373 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2374 AArch64::qsub2, AArch64::qsub3};
2375 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2380 // Copy a QQQ register triple by copying the individual sub-registers.
2381 if (AArch64::QQQRegClass.contains(DestReg) &&
2382 AArch64::QQQRegClass.contains(SrcReg)) {
2383 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2385 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2390 // Copy a QQ register pair by copying the individual sub-registers.
2391 if (AArch64::QQRegClass.contains(DestReg) &&
2392 AArch64::QQRegClass.contains(SrcReg)) {
2393 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2394 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2399 if (AArch64::FPR128RegClass.contains(DestReg) &&
2400 AArch64::FPR128RegClass.contains(SrcReg)) {
2401 if (Subtarget.hasNEON()) {
2402 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2404 .addReg(SrcReg, getKillRegState(KillSrc));
2406 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2407 .addReg(AArch64::SP, RegState::Define)
2408 .addReg(SrcReg, getKillRegState(KillSrc))
2409 .addReg(AArch64::SP)
2411 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2412 .addReg(AArch64::SP, RegState::Define)
2413 .addReg(DestReg, RegState::Define)
2414 .addReg(AArch64::SP)
2420 if (AArch64::FPR64RegClass.contains(DestReg) &&
2421 AArch64::FPR64RegClass.contains(SrcReg)) {
2422 if (Subtarget.hasNEON()) {
2423 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2424 &AArch64::FPR128RegClass);
2425 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2426 &AArch64::FPR128RegClass);
2427 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2429 .addReg(SrcReg, getKillRegState(KillSrc));
2431 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2432 .addReg(SrcReg, getKillRegState(KillSrc));
2437 if (AArch64::FPR32RegClass.contains(DestReg) &&
2438 AArch64::FPR32RegClass.contains(SrcReg)) {
2439 if (Subtarget.hasNEON()) {
2440 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2441 &AArch64::FPR128RegClass);
2442 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2443 &AArch64::FPR128RegClass);
2444 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2446 .addReg(SrcReg, getKillRegState(KillSrc));
2448 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2449 .addReg(SrcReg, getKillRegState(KillSrc));
2454 if (AArch64::FPR16RegClass.contains(DestReg) &&
2455 AArch64::FPR16RegClass.contains(SrcReg)) {
2456 if (Subtarget.hasNEON()) {
2457 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2458 &AArch64::FPR128RegClass);
2459 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2460 &AArch64::FPR128RegClass);
2461 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2463 .addReg(SrcReg, getKillRegState(KillSrc));
2465 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2466 &AArch64::FPR32RegClass);
2467 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2468 &AArch64::FPR32RegClass);
2469 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2470 .addReg(SrcReg, getKillRegState(KillSrc));
2475 if (AArch64::FPR8RegClass.contains(DestReg) &&
2476 AArch64::FPR8RegClass.contains(SrcReg)) {
2477 if (Subtarget.hasNEON()) {
2478 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2479 &AArch64::FPR128RegClass);
2480 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2481 &AArch64::FPR128RegClass);
2482 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2484 .addReg(SrcReg, getKillRegState(KillSrc));
2486 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2487 &AArch64::FPR32RegClass);
2488 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2489 &AArch64::FPR32RegClass);
2490 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2491 .addReg(SrcReg, getKillRegState(KillSrc));
2496 // Copies between GPR64 and FPR64.
2497 if (AArch64::FPR64RegClass.contains(DestReg) &&
2498 AArch64::GPR64RegClass.contains(SrcReg)) {
2499 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2500 .addReg(SrcReg, getKillRegState(KillSrc));
2503 if (AArch64::GPR64RegClass.contains(DestReg) &&
2504 AArch64::FPR64RegClass.contains(SrcReg)) {
2505 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2506 .addReg(SrcReg, getKillRegState(KillSrc));
2509 // Copies between GPR32 and FPR32.
2510 if (AArch64::FPR32RegClass.contains(DestReg) &&
2511 AArch64::GPR32RegClass.contains(SrcReg)) {
2512 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2513 .addReg(SrcReg, getKillRegState(KillSrc));
2516 if (AArch64::GPR32RegClass.contains(DestReg) &&
2517 AArch64::FPR32RegClass.contains(SrcReg)) {
2518 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2519 .addReg(SrcReg, getKillRegState(KillSrc));
2523 if (DestReg == AArch64::NZCV) {
2524 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2525 BuildMI(MBB, I, DL, get(AArch64::MSR))
2526 .addImm(AArch64SysReg::NZCV)
2527 .addReg(SrcReg, getKillRegState(KillSrc))
2528 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2532 if (SrcReg == AArch64::NZCV) {
2533 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2534 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2535 .addImm(AArch64SysReg::NZCV)
2536 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2540 llvm_unreachable("unimplemented reg-to-reg copy");
2543 void AArch64InstrInfo::storeRegToStackSlot(
2544 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2545 bool isKill, int FI, const TargetRegisterClass *RC,
2546 const TargetRegisterInfo *TRI) const {
2548 if (MBBI != MBB.end())
2549 DL = MBBI->getDebugLoc();
2550 MachineFunction &MF = *MBB.getParent();
2551 MachineFrameInfo &MFI = MF.getFrameInfo();
2552 unsigned Align = MFI.getObjectAlignment(FI);
2554 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2555 MachineMemOperand *MMO = MF.getMachineMemOperand(
2556 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2559 switch (TRI->getSpillSize(*RC)) {
2561 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2562 Opc = AArch64::STRBui;
2565 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2566 Opc = AArch64::STRHui;
2569 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2570 Opc = AArch64::STRWui;
2571 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2572 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2574 assert(SrcReg != AArch64::WSP);
2575 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2576 Opc = AArch64::STRSui;
2579 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2580 Opc = AArch64::STRXui;
2581 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2582 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2584 assert(SrcReg != AArch64::SP);
2585 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2586 Opc = AArch64::STRDui;
2589 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2590 Opc = AArch64::STRQui;
2591 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2592 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2593 Opc = AArch64::ST1Twov1d;
2598 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2599 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2600 Opc = AArch64::ST1Threev1d;
2605 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2606 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2607 Opc = AArch64::ST1Fourv1d;
2609 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2610 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2611 Opc = AArch64::ST1Twov2d;
2616 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2617 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2618 Opc = AArch64::ST1Threev2d;
2623 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2624 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2625 Opc = AArch64::ST1Fourv2d;
2630 assert(Opc && "Unknown register class");
2632 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2633 .addReg(SrcReg, getKillRegState(isKill))
2638 MI.addMemOperand(MMO);
2641 void AArch64InstrInfo::loadRegFromStackSlot(
2642 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2643 int FI, const TargetRegisterClass *RC,
2644 const TargetRegisterInfo *TRI) const {
2646 if (MBBI != MBB.end())
2647 DL = MBBI->getDebugLoc();
2648 MachineFunction &MF = *MBB.getParent();
2649 MachineFrameInfo &MFI = MF.getFrameInfo();
2650 unsigned Align = MFI.getObjectAlignment(FI);
2651 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2652 MachineMemOperand *MMO = MF.getMachineMemOperand(
2653 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2657 switch (TRI->getSpillSize(*RC)) {
2659 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2660 Opc = AArch64::LDRBui;
2663 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2664 Opc = AArch64::LDRHui;
2667 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2668 Opc = AArch64::LDRWui;
2669 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2670 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2672 assert(DestReg != AArch64::WSP);
2673 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2674 Opc = AArch64::LDRSui;
2677 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2678 Opc = AArch64::LDRXui;
2679 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2680 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2682 assert(DestReg != AArch64::SP);
2683 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2684 Opc = AArch64::LDRDui;
2687 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2688 Opc = AArch64::LDRQui;
2689 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2690 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2691 Opc = AArch64::LD1Twov1d;
2696 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2697 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2698 Opc = AArch64::LD1Threev1d;
2703 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2704 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2705 Opc = AArch64::LD1Fourv1d;
2707 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2708 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2709 Opc = AArch64::LD1Twov2d;
2714 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2715 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2716 Opc = AArch64::LD1Threev2d;
2721 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2722 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2723 Opc = AArch64::LD1Fourv2d;
2728 assert(Opc && "Unknown register class");
2730 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2731 .addReg(DestReg, getDefRegState(true))
2735 MI.addMemOperand(MMO);
2738 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2739 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2740 unsigned DestReg, unsigned SrcReg, int Offset,
2741 const TargetInstrInfo *TII,
2742 MachineInstr::MIFlag Flag, bool SetNZCV) {
2743 if (DestReg == SrcReg && Offset == 0)
2746 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2747 "SP increment/decrement not 16-byte aligned");
2749 bool isSub = Offset < 0;
2753 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2754 // scratch register. If DestReg is a virtual register, use it as the
2755 // scratch register; otherwise, create a new virtual register (to be
2756 // replaced by the scavenger at the end of PEI). That case can be optimized
2757 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2758 // register can be loaded with offset%8 and the add/sub can use an extending
2759 // instruction with LSL#3.
2760 // Currently the function handles any offsets but generates a poor sequence
2762 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2766 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2768 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2769 const unsigned MaxEncoding = 0xfff;
2770 const unsigned ShiftSize = 12;
2771 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2772 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2774 if (((unsigned)Offset) > MaxEncodableValue) {
2775 ThisVal = MaxEncodableValue;
2777 ThisVal = Offset & MaxEncodableValue;
2779 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2780 "Encoding cannot handle value that big");
2781 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2783 .addImm(ThisVal >> ShiftSize)
2784 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2792 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2795 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2799 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2800 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2801 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2802 LiveIntervals *LIS) const {
2803 // This is a bit of a hack. Consider this instruction:
2805 // %0 = COPY %sp; GPR64all:%0
2807 // We explicitly chose GPR64all for the virtual register so such a copy might
2808 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2809 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2810 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2812 // To prevent that, we are going to constrain the %0 register class here.
2814 // <rdar://problem/11522048>
2816 if (MI.isFullCopy()) {
2817 unsigned DstReg = MI.getOperand(0).getReg();
2818 unsigned SrcReg = MI.getOperand(1).getReg();
2819 if (SrcReg == AArch64::SP &&
2820 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2821 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2824 if (DstReg == AArch64::SP &&
2825 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2826 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2831 // Handle the case where a copy is being spilled or filled but the source
2832 // and destination register class don't match. For example:
2834 // %0 = COPY %xzr; GPR64common:%0
2836 // In this case we can still safely fold away the COPY and generate the
2837 // following spill code:
2839 // STRXui %xzr, %stack.0
2841 // This also eliminates spilled cross register class COPYs (e.g. between x and
2842 // d regs) of the same size. For example:
2844 // %0 = COPY %1; GPR64:%0, FPR64:%1
2846 // will be filled as
2848 // LDRDui %0, fi<#0>
2852 // LDRXui %Temp, fi<#0>
2855 if (MI.isCopy() && Ops.size() == 1 &&
2856 // Make sure we're only folding the explicit COPY defs/uses.
2857 (Ops[0] == 0 || Ops[0] == 1)) {
2858 bool IsSpill = Ops[0] == 0;
2859 bool IsFill = !IsSpill;
2860 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2861 const MachineRegisterInfo &MRI = MF.getRegInfo();
2862 MachineBasicBlock &MBB = *MI.getParent();
2863 const MachineOperand &DstMO = MI.getOperand(0);
2864 const MachineOperand &SrcMO = MI.getOperand(1);
2865 unsigned DstReg = DstMO.getReg();
2866 unsigned SrcReg = SrcMO.getReg();
2867 // This is slightly expensive to compute for physical regs since
2868 // getMinimalPhysRegClass is slow.
2869 auto getRegClass = [&](unsigned Reg) {
2870 return TargetRegisterInfo::isVirtualRegister(Reg)
2871 ? MRI.getRegClass(Reg)
2872 : TRI.getMinimalPhysRegClass(Reg);
2875 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
2876 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
2877 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
2878 "Mismatched register size in non subreg COPY");
2880 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
2881 getRegClass(SrcReg), &TRI);
2883 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
2884 getRegClass(DstReg), &TRI);
2885 return &*--InsertPt;
2888 // Handle cases like spilling def of:
2890 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
2892 // where the physical register source can be widened and stored to the full
2893 // virtual reg destination stack slot, in this case producing:
2895 // STRXui %xzr, %stack.0
2897 if (IsSpill && DstMO.isUndef() &&
2898 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2899 assert(SrcMO.getSubReg() == 0 &&
2900 "Unexpected subreg on physical register");
2901 const TargetRegisterClass *SpillRC;
2902 unsigned SpillSubreg;
2903 switch (DstMO.getSubReg()) {
2907 case AArch64::sub_32:
2909 if (AArch64::GPR32RegClass.contains(SrcReg)) {
2910 SpillRC = &AArch64::GPR64RegClass;
2911 SpillSubreg = AArch64::sub_32;
2912 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
2913 SpillRC = &AArch64::FPR64RegClass;
2914 SpillSubreg = AArch64::ssub;
2919 if (AArch64::FPR64RegClass.contains(SrcReg)) {
2920 SpillRC = &AArch64::FPR128RegClass;
2921 SpillSubreg = AArch64::dsub;
2928 if (unsigned WidenedSrcReg =
2929 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
2930 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
2931 FrameIndex, SpillRC, &TRI);
2932 return &*--InsertPt;
2936 // Handle cases like filling use of:
2938 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
2940 // where we can load the full virtual reg source stack slot, into the subreg
2941 // destination, in this case producing:
2943 // LDRWui %0:sub_32<def,read-undef>, %stack.0
2945 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
2946 const TargetRegisterClass *FillRC;
2947 switch (DstMO.getSubReg()) {
2951 case AArch64::sub_32:
2952 FillRC = &AArch64::GPR32RegClass;
2955 FillRC = &AArch64::FPR32RegClass;
2958 FillRC = &AArch64::FPR64RegClass;
2963 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
2964 TRI.getRegSizeInBits(*FillRC) &&
2965 "Mismatched regclass size on folded subreg COPY");
2966 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
2967 MachineInstr &LoadMI = *--InsertPt;
2968 MachineOperand &LoadDst = LoadMI.getOperand(0);
2969 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
2970 LoadDst.setSubReg(DstMO.getSubReg());
2971 LoadDst.setIsUndef();
2981 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2982 bool *OutUseUnscaledOp,
2983 unsigned *OutUnscaledOp,
2984 int *EmittableOffset) {
2986 bool IsSigned = false;
2987 // The ImmIdx should be changed case by case if it is not 2.
2988 unsigned ImmIdx = 2;
2989 unsigned UnscaledOp = 0;
2990 // Set output values in case of early exit.
2991 if (EmittableOffset)
2992 *EmittableOffset = 0;
2993 if (OutUseUnscaledOp)
2994 *OutUseUnscaledOp = false;
2997 switch (MI.getOpcode()) {
2999 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3000 // Vector spills/fills can't take an immediate offset.
3001 case AArch64::LD1Twov2d:
3002 case AArch64::LD1Threev2d:
3003 case AArch64::LD1Fourv2d:
3004 case AArch64::LD1Twov1d:
3005 case AArch64::LD1Threev1d:
3006 case AArch64::LD1Fourv1d:
3007 case AArch64::ST1Twov2d:
3008 case AArch64::ST1Threev2d:
3009 case AArch64::ST1Fourv2d:
3010 case AArch64::ST1Twov1d:
3011 case AArch64::ST1Threev1d:
3012 case AArch64::ST1Fourv1d:
3013 return AArch64FrameOffsetCannotUpdate;
3014 case AArch64::PRFMui:
3016 UnscaledOp = AArch64::PRFUMi;
3018 case AArch64::LDRXui:
3020 UnscaledOp = AArch64::LDURXi;
3022 case AArch64::LDRWui:
3024 UnscaledOp = AArch64::LDURWi;
3026 case AArch64::LDRBui:
3028 UnscaledOp = AArch64::LDURBi;
3030 case AArch64::LDRHui:
3032 UnscaledOp = AArch64::LDURHi;
3034 case AArch64::LDRSui:
3036 UnscaledOp = AArch64::LDURSi;
3038 case AArch64::LDRDui:
3040 UnscaledOp = AArch64::LDURDi;
3042 case AArch64::LDRQui:
3044 UnscaledOp = AArch64::LDURQi;
3046 case AArch64::LDRBBui:
3048 UnscaledOp = AArch64::LDURBBi;
3050 case AArch64::LDRHHui:
3052 UnscaledOp = AArch64::LDURHHi;
3054 case AArch64::LDRSBXui:
3056 UnscaledOp = AArch64::LDURSBXi;
3058 case AArch64::LDRSBWui:
3060 UnscaledOp = AArch64::LDURSBWi;
3062 case AArch64::LDRSHXui:
3064 UnscaledOp = AArch64::LDURSHXi;
3066 case AArch64::LDRSHWui:
3068 UnscaledOp = AArch64::LDURSHWi;
3070 case AArch64::LDRSWui:
3072 UnscaledOp = AArch64::LDURSWi;
3075 case AArch64::STRXui:
3077 UnscaledOp = AArch64::STURXi;
3079 case AArch64::STRWui:
3081 UnscaledOp = AArch64::STURWi;
3083 case AArch64::STRBui:
3085 UnscaledOp = AArch64::STURBi;
3087 case AArch64::STRHui:
3089 UnscaledOp = AArch64::STURHi;
3091 case AArch64::STRSui:
3093 UnscaledOp = AArch64::STURSi;
3095 case AArch64::STRDui:
3097 UnscaledOp = AArch64::STURDi;
3099 case AArch64::STRQui:
3101 UnscaledOp = AArch64::STURQi;
3103 case AArch64::STRBBui:
3105 UnscaledOp = AArch64::STURBBi;
3107 case AArch64::STRHHui:
3109 UnscaledOp = AArch64::STURHHi;
3112 case AArch64::LDPXi:
3113 case AArch64::LDPDi:
3114 case AArch64::STPXi:
3115 case AArch64::STPDi:
3116 case AArch64::LDNPXi:
3117 case AArch64::LDNPDi:
3118 case AArch64::STNPXi:
3119 case AArch64::STNPDi:
3124 case AArch64::LDPQi:
3125 case AArch64::STPQi:
3126 case AArch64::LDNPQi:
3127 case AArch64::STNPQi:
3132 case AArch64::LDPWi:
3133 case AArch64::LDPSi:
3134 case AArch64::STPWi:
3135 case AArch64::STPSi:
3136 case AArch64::LDNPWi:
3137 case AArch64::LDNPSi:
3138 case AArch64::STNPWi:
3139 case AArch64::STNPSi:
3145 case AArch64::LDURXi:
3146 case AArch64::LDURWi:
3147 case AArch64::LDURBi:
3148 case AArch64::LDURHi:
3149 case AArch64::LDURSi:
3150 case AArch64::LDURDi:
3151 case AArch64::LDURQi:
3152 case AArch64::LDURHHi:
3153 case AArch64::LDURBBi:
3154 case AArch64::LDURSBXi:
3155 case AArch64::LDURSBWi:
3156 case AArch64::LDURSHXi:
3157 case AArch64::LDURSHWi:
3158 case AArch64::LDURSWi:
3159 case AArch64::STURXi:
3160 case AArch64::STURWi:
3161 case AArch64::STURBi:
3162 case AArch64::STURHi:
3163 case AArch64::STURSi:
3164 case AArch64::STURDi:
3165 case AArch64::STURQi:
3166 case AArch64::STURBBi:
3167 case AArch64::STURHHi:
3172 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3174 bool useUnscaledOp = false;
3175 // If the offset doesn't match the scale, we rewrite the instruction to
3176 // use the unscaled instruction instead. Likewise, if we have a negative
3177 // offset (and have an unscaled op to use).
3178 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3179 useUnscaledOp = true;
3181 // Use an unscaled addressing mode if the instruction has a negative offset
3182 // (or if the instruction is already using an unscaled addressing mode).
3185 // ldp/stp instructions.
3188 } else if (UnscaledOp == 0 || useUnscaledOp) {
3198 // Attempt to fold address computation.
3199 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3200 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3201 if (Offset >= MinOff && Offset <= MaxOff) {
3202 if (EmittableOffset)
3203 *EmittableOffset = Offset;
3206 int NewOff = Offset < 0 ? MinOff : MaxOff;
3207 if (EmittableOffset)
3208 *EmittableOffset = NewOff;
3209 Offset = (Offset - NewOff) * Scale;
3211 if (OutUseUnscaledOp)
3212 *OutUseUnscaledOp = useUnscaledOp;
3214 *OutUnscaledOp = UnscaledOp;
3215 return AArch64FrameOffsetCanUpdate |
3216 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3219 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3220 unsigned FrameReg, int &Offset,
3221 const AArch64InstrInfo *TII) {
3222 unsigned Opcode = MI.getOpcode();
3223 unsigned ImmIdx = FrameRegIdx + 1;
3225 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3226 Offset += MI.getOperand(ImmIdx).getImm();
3227 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3228 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3229 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3230 MI.eraseFromParent();
3236 unsigned UnscaledOp;
3238 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3239 &UnscaledOp, &NewOffset);
3240 if (Status & AArch64FrameOffsetCanUpdate) {
3241 if (Status & AArch64FrameOffsetIsLegal)
3242 // Replace the FrameIndex with FrameReg.
3243 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3245 MI.setDesc(TII->get(UnscaledOp));
3247 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3254 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3255 NopInst.setOpcode(AArch64::HINT);
3256 NopInst.addOperand(MCOperand::createImm(0));
3259 // AArch64 supports MachineCombiner.
3260 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3262 // True when Opc sets flag
3263 static bool isCombineInstrSettingFlag(unsigned Opc) {
3265 case AArch64::ADDSWrr:
3266 case AArch64::ADDSWri:
3267 case AArch64::ADDSXrr:
3268 case AArch64::ADDSXri:
3269 case AArch64::SUBSWrr:
3270 case AArch64::SUBSXrr:
3271 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3272 case AArch64::SUBSWri:
3273 case AArch64::SUBSXri:
3281 // 32b Opcodes that can be combined with a MUL
3282 static bool isCombineInstrCandidate32(unsigned Opc) {
3284 case AArch64::ADDWrr:
3285 case AArch64::ADDWri:
3286 case AArch64::SUBWrr:
3287 case AArch64::ADDSWrr:
3288 case AArch64::ADDSWri:
3289 case AArch64::SUBSWrr:
3290 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3291 case AArch64::SUBWri:
3292 case AArch64::SUBSWri:
3300 // 64b Opcodes that can be combined with a MUL
3301 static bool isCombineInstrCandidate64(unsigned Opc) {
3303 case AArch64::ADDXrr:
3304 case AArch64::ADDXri:
3305 case AArch64::SUBXrr:
3306 case AArch64::ADDSXrr:
3307 case AArch64::ADDSXri:
3308 case AArch64::SUBSXrr:
3309 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3310 case AArch64::SUBXri:
3311 case AArch64::SUBSXri:
3319 // FP Opcodes that can be combined with a FMUL
3320 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3321 switch (Inst.getOpcode()) {
3324 case AArch64::FADDSrr:
3325 case AArch64::FADDDrr:
3326 case AArch64::FADDv2f32:
3327 case AArch64::FADDv2f64:
3328 case AArch64::FADDv4f32:
3329 case AArch64::FSUBSrr:
3330 case AArch64::FSUBDrr:
3331 case AArch64::FSUBv2f32:
3332 case AArch64::FSUBv2f64:
3333 case AArch64::FSUBv4f32:
3334 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3335 return (Options.UnsafeFPMath ||
3336 Options.AllowFPOpFusion == FPOpFusion::Fast);
3341 // Opcodes that can be combined with a MUL
3342 static bool isCombineInstrCandidate(unsigned Opc) {
3343 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3347 // Utility routine that checks if \param MO is defined by an
3348 // \param CombineOpc instruction in the basic block \param MBB
3349 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3350 unsigned CombineOpc, unsigned ZeroReg = 0,
3351 bool CheckZeroReg = false) {
3352 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3353 MachineInstr *MI = nullptr;
3355 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3356 MI = MRI.getUniqueVRegDef(MO.getReg());
3357 // And it needs to be in the trace (otherwise, it won't have a depth).
3358 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3360 // Must only used by the user we combine with.
3361 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3365 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3366 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3367 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3368 // The third input reg must be zero.
3369 if (MI->getOperand(3).getReg() != ZeroReg)
3377 // Is \param MO defined by an integer multiply and can be combined?
3378 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3379 unsigned MulOpc, unsigned ZeroReg) {
3380 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3384 // Is \param MO defined by a floating-point multiply and can be combined?
3385 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3387 return canCombine(MBB, MO, MulOpc);
3390 // TODO: There are many more machine instruction opcodes to match:
3391 // 1. Other data types (integer, vectors)
3392 // 2. Other math / logic operations (xor, or)
3393 // 3. Other forms of the same operation (intrinsics and other variants)
3394 bool AArch64InstrInfo::isAssociativeAndCommutative(
3395 const MachineInstr &Inst) const {
3396 switch (Inst.getOpcode()) {
3397 case AArch64::FADDDrr:
3398 case AArch64::FADDSrr:
3399 case AArch64::FADDv2f32:
3400 case AArch64::FADDv2f64:
3401 case AArch64::FADDv4f32:
3402 case AArch64::FMULDrr:
3403 case AArch64::FMULSrr:
3404 case AArch64::FMULX32:
3405 case AArch64::FMULX64:
3406 case AArch64::FMULXv2f32:
3407 case AArch64::FMULXv2f64:
3408 case AArch64::FMULXv4f32:
3409 case AArch64::FMULv2f32:
3410 case AArch64::FMULv2f64:
3411 case AArch64::FMULv4f32:
3412 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3418 /// Find instructions that can be turned into madd.
3419 static bool getMaddPatterns(MachineInstr &Root,
3420 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3421 unsigned Opc = Root.getOpcode();
3422 MachineBasicBlock &MBB = *Root.getParent();
3425 if (!isCombineInstrCandidate(Opc))
3427 if (isCombineInstrSettingFlag(Opc)) {
3428 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3429 // When NZCV is live bail out.
3432 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3433 // When opcode can't change bail out.
3434 // CHECKME: do we miss any cases for opcode conversion?
3443 case AArch64::ADDWrr:
3444 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3445 "ADDWrr does not have register operands");
3446 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3448 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3451 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3453 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3457 case AArch64::ADDXrr:
3458 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3460 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3463 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3465 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3469 case AArch64::SUBWrr:
3470 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3472 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3475 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3477 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3481 case AArch64::SUBXrr:
3482 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3484 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3487 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3489 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3493 case AArch64::ADDWri:
3494 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3496 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3500 case AArch64::ADDXri:
3501 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3503 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3507 case AArch64::SUBWri:
3508 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3510 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3514 case AArch64::SUBXri:
3515 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3517 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3524 /// Floating-Point Support
3526 /// Find instructions that can be turned into madd.
3527 static bool getFMAPatterns(MachineInstr &Root,
3528 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3530 if (!isCombineInstrCandidateFP(Root))
3533 MachineBasicBlock &MBB = *Root.getParent();
3536 switch (Root.getOpcode()) {
3538 assert(false && "Unsupported FP instruction in combiner\n");
3540 case AArch64::FADDSrr:
3541 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3542 "FADDWrr does not have register operands");
3543 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3544 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3546 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3547 AArch64::FMULv1i32_indexed)) {
3548 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3551 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3552 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3554 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3555 AArch64::FMULv1i32_indexed)) {
3556 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3560 case AArch64::FADDDrr:
3561 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3562 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3564 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3565 AArch64::FMULv1i64_indexed)) {
3566 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3569 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3570 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3572 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3573 AArch64::FMULv1i64_indexed)) {
3574 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3578 case AArch64::FADDv2f32:
3579 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3580 AArch64::FMULv2i32_indexed)) {
3581 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3583 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3584 AArch64::FMULv2f32)) {
3585 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3588 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3589 AArch64::FMULv2i32_indexed)) {
3590 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3592 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3593 AArch64::FMULv2f32)) {
3594 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3598 case AArch64::FADDv2f64:
3599 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3600 AArch64::FMULv2i64_indexed)) {
3601 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3603 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3604 AArch64::FMULv2f64)) {
3605 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3608 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3609 AArch64::FMULv2i64_indexed)) {
3610 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3612 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3613 AArch64::FMULv2f64)) {
3614 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3618 case AArch64::FADDv4f32:
3619 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3620 AArch64::FMULv4i32_indexed)) {
3621 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3623 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3624 AArch64::FMULv4f32)) {
3625 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3628 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3629 AArch64::FMULv4i32_indexed)) {
3630 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3632 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3633 AArch64::FMULv4f32)) {
3634 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3639 case AArch64::FSUBSrr:
3640 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3641 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3644 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3645 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3647 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3648 AArch64::FMULv1i32_indexed)) {
3649 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3652 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3653 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3657 case AArch64::FSUBDrr:
3658 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3659 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3662 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3663 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3665 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3666 AArch64::FMULv1i64_indexed)) {
3667 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3670 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3671 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3675 case AArch64::FSUBv2f32:
3676 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3677 AArch64::FMULv2i32_indexed)) {
3678 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3680 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3681 AArch64::FMULv2f32)) {
3682 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3685 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3686 AArch64::FMULv2i32_indexed)) {
3687 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3689 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3690 AArch64::FMULv2f32)) {
3691 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3695 case AArch64::FSUBv2f64:
3696 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3697 AArch64::FMULv2i64_indexed)) {
3698 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3700 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3701 AArch64::FMULv2f64)) {
3702 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3705 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3706 AArch64::FMULv2i64_indexed)) {
3707 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3709 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3710 AArch64::FMULv2f64)) {
3711 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3715 case AArch64::FSUBv4f32:
3716 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3717 AArch64::FMULv4i32_indexed)) {
3718 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3720 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3721 AArch64::FMULv4f32)) {
3722 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3725 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3726 AArch64::FMULv4i32_indexed)) {
3727 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3729 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3730 AArch64::FMULv4f32)) {
3731 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3739 /// Return true when a code sequence can improve throughput. It
3740 /// should be called only for instructions in loops.
3741 /// \param Pattern - combiner pattern
3742 bool AArch64InstrInfo::isThroughputPattern(
3743 MachineCombinerPattern Pattern) const {
3747 case MachineCombinerPattern::FMULADDS_OP1:
3748 case MachineCombinerPattern::FMULADDS_OP2:
3749 case MachineCombinerPattern::FMULSUBS_OP1:
3750 case MachineCombinerPattern::FMULSUBS_OP2:
3751 case MachineCombinerPattern::FMULADDD_OP1:
3752 case MachineCombinerPattern::FMULADDD_OP2:
3753 case MachineCombinerPattern::FMULSUBD_OP1:
3754 case MachineCombinerPattern::FMULSUBD_OP2:
3755 case MachineCombinerPattern::FNMULSUBS_OP1:
3756 case MachineCombinerPattern::FNMULSUBD_OP1:
3757 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3758 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3759 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3760 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3761 case MachineCombinerPattern::FMLAv2f32_OP2:
3762 case MachineCombinerPattern::FMLAv2f32_OP1:
3763 case MachineCombinerPattern::FMLAv2f64_OP1:
3764 case MachineCombinerPattern::FMLAv2f64_OP2:
3765 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3766 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3767 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3768 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3769 case MachineCombinerPattern::FMLAv4f32_OP1:
3770 case MachineCombinerPattern::FMLAv4f32_OP2:
3771 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3772 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3773 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3774 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3775 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3776 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3777 case MachineCombinerPattern::FMLSv2f32_OP2:
3778 case MachineCombinerPattern::FMLSv2f64_OP2:
3779 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3780 case MachineCombinerPattern::FMLSv4f32_OP2:
3782 } // end switch (Pattern)
3785 /// Return true when there is potentially a faster code sequence for an
3786 /// instruction chain ending in \p Root. All potential patterns are listed in
3787 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3788 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3790 bool AArch64InstrInfo::getMachineCombinerPatterns(
3792 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3794 if (getMaddPatterns(Root, Patterns))
3796 // Floating point patterns
3797 if (getFMAPatterns(Root, Patterns))
3800 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3803 enum class FMAInstKind { Default, Indexed, Accumulator };
3804 /// genFusedMultiply - Generate fused multiply instructions.
3805 /// This function supports both integer and floating point instructions.
3806 /// A typical example:
3809 /// ==> F|MADD R,A,B,C
3810 /// \param MF Containing MachineFunction
3811 /// \param MRI Register information
3812 /// \param TII Target information
3813 /// \param Root is the F|ADD instruction
3814 /// \param [out] InsInstrs is a vector of machine instructions and will
3815 /// contain the generated madd instruction
3816 /// \param IdxMulOpd is index of operand in Root that is the result of
3817 /// the F|MUL. In the example above IdxMulOpd is 1.
3818 /// \param MaddOpc the opcode fo the f|madd instruction
3819 /// \param RC Register class of operands
3820 /// \param kind of fma instruction (addressing mode) to be generated
3821 /// \param ReplacedAddend is the result register from the instruction
3822 /// replacing the non-combined operand, if any.
3823 static MachineInstr *
3824 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3825 const TargetInstrInfo *TII, MachineInstr &Root,
3826 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3827 unsigned MaddOpc, const TargetRegisterClass *RC,
3828 FMAInstKind kind = FMAInstKind::Default,
3829 const unsigned *ReplacedAddend = nullptr) {
3830 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3832 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3833 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3834 unsigned ResultReg = Root.getOperand(0).getReg();
3835 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3836 bool Src0IsKill = MUL->getOperand(1).isKill();
3837 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3838 bool Src1IsKill = MUL->getOperand(2).isKill();
3842 if (ReplacedAddend) {
3843 // If we just generated a new addend, we must be it's only use.
3844 SrcReg2 = *ReplacedAddend;
3847 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3848 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3851 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3852 MRI.constrainRegClass(ResultReg, RC);
3853 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3854 MRI.constrainRegClass(SrcReg0, RC);
3855 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3856 MRI.constrainRegClass(SrcReg1, RC);
3857 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3858 MRI.constrainRegClass(SrcReg2, RC);
3860 MachineInstrBuilder MIB;
3861 if (kind == FMAInstKind::Default)
3862 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3863 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3864 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3865 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3866 else if (kind == FMAInstKind::Indexed)
3867 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3868 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3869 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3870 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3871 .addImm(MUL->getOperand(3).getImm());
3872 else if (kind == FMAInstKind::Accumulator)
3873 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3874 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3875 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3876 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3878 assert(false && "Invalid FMA instruction kind \n");
3879 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3880 InsInstrs.push_back(MIB);
3884 /// genMaddR - Generate madd instruction and combine mul and add using
3885 /// an extra virtual register
3886 /// Example - an ADD intermediate needs to be stored in a register:
3889 /// ==> ORR V, ZR, Imm
3890 /// ==> MADD R,A,B,V
3891 /// \param MF Containing MachineFunction
3892 /// \param MRI Register information
3893 /// \param TII Target information
3894 /// \param Root is the ADD instruction
3895 /// \param [out] InsInstrs is a vector of machine instructions and will
3896 /// contain the generated madd instruction
3897 /// \param IdxMulOpd is index of operand in Root that is the result of
3898 /// the MUL. In the example above IdxMulOpd is 1.
3899 /// \param MaddOpc the opcode fo the madd instruction
3900 /// \param VR is a virtual register that holds the value of an ADD operand
3901 /// (V in the example above).
3902 /// \param RC Register class of operands
3903 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3904 const TargetInstrInfo *TII, MachineInstr &Root,
3905 SmallVectorImpl<MachineInstr *> &InsInstrs,
3906 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
3907 const TargetRegisterClass *RC) {
3908 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3910 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3911 unsigned ResultReg = Root.getOperand(0).getReg();
3912 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3913 bool Src0IsKill = MUL->getOperand(1).isKill();
3914 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3915 bool Src1IsKill = MUL->getOperand(2).isKill();
3917 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3918 MRI.constrainRegClass(ResultReg, RC);
3919 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3920 MRI.constrainRegClass(SrcReg0, RC);
3921 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3922 MRI.constrainRegClass(SrcReg1, RC);
3923 if (TargetRegisterInfo::isVirtualRegister(VR))
3924 MRI.constrainRegClass(VR, RC);
3926 MachineInstrBuilder MIB =
3927 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3928 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3929 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3932 InsInstrs.push_back(MIB);
3936 /// When getMachineCombinerPatterns() finds potential patterns,
3937 /// this function generates the instructions that could replace the
3938 /// original code sequence
3939 void AArch64InstrInfo::genAlternativeCodeSequence(
3940 MachineInstr &Root, MachineCombinerPattern Pattern,
3941 SmallVectorImpl<MachineInstr *> &InsInstrs,
3942 SmallVectorImpl<MachineInstr *> &DelInstrs,
3943 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3944 MachineBasicBlock &MBB = *Root.getParent();
3945 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3946 MachineFunction &MF = *MBB.getParent();
3947 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3950 const TargetRegisterClass *RC;
3954 // Reassociate instructions.
3955 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3956 DelInstrs, InstrIdxForVirtReg);
3958 case MachineCombinerPattern::MULADDW_OP1:
3959 case MachineCombinerPattern::MULADDX_OP1:
3963 // --- Create(MADD);
3964 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3965 Opc = AArch64::MADDWrrr;
3966 RC = &AArch64::GPR32RegClass;
3968 Opc = AArch64::MADDXrrr;
3969 RC = &AArch64::GPR64RegClass;
3971 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3973 case MachineCombinerPattern::MULADDW_OP2:
3974 case MachineCombinerPattern::MULADDX_OP2:
3978 // --- Create(MADD);
3979 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3980 Opc = AArch64::MADDWrrr;
3981 RC = &AArch64::GPR32RegClass;
3983 Opc = AArch64::MADDXrrr;
3984 RC = &AArch64::GPR64RegClass;
3986 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3988 case MachineCombinerPattern::MULADDWI_OP1:
3989 case MachineCombinerPattern::MULADDXI_OP1: {
3992 // ==> ORR V, ZR, Imm
3994 // --- Create(MADD);
3995 const TargetRegisterClass *OrrRC;
3996 unsigned BitSize, OrrOpc, ZeroReg;
3997 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3998 OrrOpc = AArch64::ORRWri;
3999 OrrRC = &AArch64::GPR32spRegClass;
4001 ZeroReg = AArch64::WZR;
4002 Opc = AArch64::MADDWrrr;
4003 RC = &AArch64::GPR32RegClass;
4005 OrrOpc = AArch64::ORRXri;
4006 OrrRC = &AArch64::GPR64spRegClass;
4008 ZeroReg = AArch64::XZR;
4009 Opc = AArch64::MADDXrrr;
4010 RC = &AArch64::GPR64RegClass;
4012 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4013 uint64_t Imm = Root.getOperand(2).getImm();
4015 if (Root.getOperand(3).isImm()) {
4016 unsigned Val = Root.getOperand(3).getImm();
4019 uint64_t UImm = SignExtend64(Imm, BitSize);
4021 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4022 MachineInstrBuilder MIB1 =
4023 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4026 InsInstrs.push_back(MIB1);
4027 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4028 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4032 case MachineCombinerPattern::MULSUBW_OP1:
4033 case MachineCombinerPattern::MULSUBX_OP1: {
4037 // ==> MADD R,A,B,V // = -C + A*B
4038 // --- Create(MADD);
4039 const TargetRegisterClass *SubRC;
4040 unsigned SubOpc, ZeroReg;
4041 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4042 SubOpc = AArch64::SUBWrr;
4043 SubRC = &AArch64::GPR32spRegClass;
4044 ZeroReg = AArch64::WZR;
4045 Opc = AArch64::MADDWrrr;
4046 RC = &AArch64::GPR32RegClass;
4048 SubOpc = AArch64::SUBXrr;
4049 SubRC = &AArch64::GPR64spRegClass;
4050 ZeroReg = AArch64::XZR;
4051 Opc = AArch64::MADDXrrr;
4052 RC = &AArch64::GPR64RegClass;
4054 unsigned NewVR = MRI.createVirtualRegister(SubRC);
4056 MachineInstrBuilder MIB1 =
4057 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4059 .add(Root.getOperand(2));
4060 InsInstrs.push_back(MIB1);
4061 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4062 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4065 case MachineCombinerPattern::MULSUBW_OP2:
4066 case MachineCombinerPattern::MULSUBX_OP2:
4069 // ==> MSUB R,A,B,C (computes C - A*B)
4070 // --- Create(MSUB);
4071 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4072 Opc = AArch64::MSUBWrrr;
4073 RC = &AArch64::GPR32RegClass;
4075 Opc = AArch64::MSUBXrrr;
4076 RC = &AArch64::GPR64RegClass;
4078 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4080 case MachineCombinerPattern::MULSUBWI_OP1:
4081 case MachineCombinerPattern::MULSUBXI_OP1: {
4084 // ==> ORR V, ZR, -Imm
4085 // ==> MADD R,A,B,V // = -Imm + A*B
4086 // --- Create(MADD);
4087 const TargetRegisterClass *OrrRC;
4088 unsigned BitSize, OrrOpc, ZeroReg;
4089 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4090 OrrOpc = AArch64::ORRWri;
4091 OrrRC = &AArch64::GPR32spRegClass;
4093 ZeroReg = AArch64::WZR;
4094 Opc = AArch64::MADDWrrr;
4095 RC = &AArch64::GPR32RegClass;
4097 OrrOpc = AArch64::ORRXri;
4098 OrrRC = &AArch64::GPR64spRegClass;
4100 ZeroReg = AArch64::XZR;
4101 Opc = AArch64::MADDXrrr;
4102 RC = &AArch64::GPR64RegClass;
4104 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4105 uint64_t Imm = Root.getOperand(2).getImm();
4106 if (Root.getOperand(3).isImm()) {
4107 unsigned Val = Root.getOperand(3).getImm();
4110 uint64_t UImm = SignExtend64(-Imm, BitSize);
4112 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4113 MachineInstrBuilder MIB1 =
4114 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4117 InsInstrs.push_back(MIB1);
4118 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4119 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4123 // Floating Point Support
4124 case MachineCombinerPattern::FMULADDS_OP1:
4125 case MachineCombinerPattern::FMULADDD_OP1:
4129 // --- Create(MADD);
4130 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4131 Opc = AArch64::FMADDSrrr;
4132 RC = &AArch64::FPR32RegClass;
4134 Opc = AArch64::FMADDDrrr;
4135 RC = &AArch64::FPR64RegClass;
4137 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4139 case MachineCombinerPattern::FMULADDS_OP2:
4140 case MachineCombinerPattern::FMULADDD_OP2:
4143 // ==> FMADD R,A,B,C
4144 // --- Create(FMADD);
4145 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4146 Opc = AArch64::FMADDSrrr;
4147 RC = &AArch64::FPR32RegClass;
4149 Opc = AArch64::FMADDDrrr;
4150 RC = &AArch64::FPR64RegClass;
4152 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4155 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4156 Opc = AArch64::FMLAv1i32_indexed;
4157 RC = &AArch64::FPR32RegClass;
4158 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4159 FMAInstKind::Indexed);
4161 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4162 Opc = AArch64::FMLAv1i32_indexed;
4163 RC = &AArch64::FPR32RegClass;
4164 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4165 FMAInstKind::Indexed);
4168 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4169 Opc = AArch64::FMLAv1i64_indexed;
4170 RC = &AArch64::FPR64RegClass;
4171 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4172 FMAInstKind::Indexed);
4174 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4175 Opc = AArch64::FMLAv1i64_indexed;
4176 RC = &AArch64::FPR64RegClass;
4177 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4178 FMAInstKind::Indexed);
4181 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4182 case MachineCombinerPattern::FMLAv2f32_OP1:
4183 RC = &AArch64::FPR64RegClass;
4184 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4185 Opc = AArch64::FMLAv2i32_indexed;
4186 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4187 FMAInstKind::Indexed);
4189 Opc = AArch64::FMLAv2f32;
4190 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4191 FMAInstKind::Accumulator);
4194 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4195 case MachineCombinerPattern::FMLAv2f32_OP2:
4196 RC = &AArch64::FPR64RegClass;
4197 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4198 Opc = AArch64::FMLAv2i32_indexed;
4199 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4200 FMAInstKind::Indexed);
4202 Opc = AArch64::FMLAv2f32;
4203 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4204 FMAInstKind::Accumulator);
4208 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4209 case MachineCombinerPattern::FMLAv2f64_OP1:
4210 RC = &AArch64::FPR128RegClass;
4211 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4212 Opc = AArch64::FMLAv2i64_indexed;
4213 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4214 FMAInstKind::Indexed);
4216 Opc = AArch64::FMLAv2f64;
4217 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4218 FMAInstKind::Accumulator);
4221 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4222 case MachineCombinerPattern::FMLAv2f64_OP2:
4223 RC = &AArch64::FPR128RegClass;
4224 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4225 Opc = AArch64::FMLAv2i64_indexed;
4226 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4227 FMAInstKind::Indexed);
4229 Opc = AArch64::FMLAv2f64;
4230 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4231 FMAInstKind::Accumulator);
4235 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4236 case MachineCombinerPattern::FMLAv4f32_OP1:
4237 RC = &AArch64::FPR128RegClass;
4238 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4239 Opc = AArch64::FMLAv4i32_indexed;
4240 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4241 FMAInstKind::Indexed);
4243 Opc = AArch64::FMLAv4f32;
4244 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4245 FMAInstKind::Accumulator);
4249 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4250 case MachineCombinerPattern::FMLAv4f32_OP2:
4251 RC = &AArch64::FPR128RegClass;
4252 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4253 Opc = AArch64::FMLAv4i32_indexed;
4254 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4255 FMAInstKind::Indexed);
4257 Opc = AArch64::FMLAv4f32;
4258 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4259 FMAInstKind::Accumulator);
4263 case MachineCombinerPattern::FMULSUBS_OP1:
4264 case MachineCombinerPattern::FMULSUBD_OP1: {
4267 // ==> FNMSUB R,A,B,C // = -C + A*B
4268 // --- Create(FNMSUB);
4269 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4270 Opc = AArch64::FNMSUBSrrr;
4271 RC = &AArch64::FPR32RegClass;
4273 Opc = AArch64::FNMSUBDrrr;
4274 RC = &AArch64::FPR64RegClass;
4276 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4280 case MachineCombinerPattern::FNMULSUBS_OP1:
4281 case MachineCombinerPattern::FNMULSUBD_OP1: {
4284 // ==> FNMADD R,A,B,C // = -A*B - C
4285 // --- Create(FNMADD);
4286 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4287 Opc = AArch64::FNMADDSrrr;
4288 RC = &AArch64::FPR32RegClass;
4290 Opc = AArch64::FNMADDDrrr;
4291 RC = &AArch64::FPR64RegClass;
4293 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4297 case MachineCombinerPattern::FMULSUBS_OP2:
4298 case MachineCombinerPattern::FMULSUBD_OP2: {
4301 // ==> FMSUB R,A,B,C (computes C - A*B)
4302 // --- Create(FMSUB);
4303 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4304 Opc = AArch64::FMSUBSrrr;
4305 RC = &AArch64::FPR32RegClass;
4307 Opc = AArch64::FMSUBDrrr;
4308 RC = &AArch64::FPR64RegClass;
4310 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4314 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4315 Opc = AArch64::FMLSv1i32_indexed;
4316 RC = &AArch64::FPR32RegClass;
4317 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4318 FMAInstKind::Indexed);
4321 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4322 Opc = AArch64::FMLSv1i64_indexed;
4323 RC = &AArch64::FPR64RegClass;
4324 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4325 FMAInstKind::Indexed);
4328 case MachineCombinerPattern::FMLSv2f32_OP2:
4329 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4330 RC = &AArch64::FPR64RegClass;
4331 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4332 Opc = AArch64::FMLSv2i32_indexed;
4333 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4334 FMAInstKind::Indexed);
4336 Opc = AArch64::FMLSv2f32;
4337 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4338 FMAInstKind::Accumulator);
4342 case MachineCombinerPattern::FMLSv2f64_OP2:
4343 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4344 RC = &AArch64::FPR128RegClass;
4345 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4346 Opc = AArch64::FMLSv2i64_indexed;
4347 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4348 FMAInstKind::Indexed);
4350 Opc = AArch64::FMLSv2f64;
4351 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4352 FMAInstKind::Accumulator);
4356 case MachineCombinerPattern::FMLSv4f32_OP2:
4357 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4358 RC = &AArch64::FPR128RegClass;
4359 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4360 Opc = AArch64::FMLSv4i32_indexed;
4361 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4362 FMAInstKind::Indexed);
4364 Opc = AArch64::FMLSv4f32;
4365 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4366 FMAInstKind::Accumulator);
4369 case MachineCombinerPattern::FMLSv2f32_OP1:
4370 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4371 RC = &AArch64::FPR64RegClass;
4372 unsigned NewVR = MRI.createVirtualRegister(RC);
4373 MachineInstrBuilder MIB1 =
4374 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4375 .add(Root.getOperand(2));
4376 InsInstrs.push_back(MIB1);
4377 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4378 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4379 Opc = AArch64::FMLAv2i32_indexed;
4380 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4381 FMAInstKind::Indexed, &NewVR);
4383 Opc = AArch64::FMLAv2f32;
4384 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4385 FMAInstKind::Accumulator, &NewVR);
4389 case MachineCombinerPattern::FMLSv4f32_OP1:
4390 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4391 RC = &AArch64::FPR128RegClass;
4392 unsigned NewVR = MRI.createVirtualRegister(RC);
4393 MachineInstrBuilder MIB1 =
4394 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4395 .add(Root.getOperand(2));
4396 InsInstrs.push_back(MIB1);
4397 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4398 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4399 Opc = AArch64::FMLAv4i32_indexed;
4400 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4401 FMAInstKind::Indexed, &NewVR);
4403 Opc = AArch64::FMLAv4f32;
4404 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4405 FMAInstKind::Accumulator, &NewVR);
4409 case MachineCombinerPattern::FMLSv2f64_OP1:
4410 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4411 RC = &AArch64::FPR128RegClass;
4412 unsigned NewVR = MRI.createVirtualRegister(RC);
4413 MachineInstrBuilder MIB1 =
4414 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4415 .add(Root.getOperand(2));
4416 InsInstrs.push_back(MIB1);
4417 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4418 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4419 Opc = AArch64::FMLAv2i64_indexed;
4420 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4421 FMAInstKind::Indexed, &NewVR);
4423 Opc = AArch64::FMLAv2f64;
4424 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4425 FMAInstKind::Accumulator, &NewVR);
4429 } // end switch (Pattern)
4430 // Record MUL and ADD/SUB for deletion
4431 DelInstrs.push_back(MUL);
4432 DelInstrs.push_back(&Root);
4435 /// \brief Replace csincr-branch sequence by simple conditional branch
4439 /// csinc w9, wzr, wzr, <condition code>
4440 /// tbnz w9, #0, 0x44
4444 /// b.<inverted condition code>
4448 /// csinc w9, wzr, wzr, <condition code>
4449 /// tbz w9, #0, 0x44
4453 /// b.<condition code>
4456 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4457 /// compare's constant operand is power of 2.
4461 /// and w8, w8, #0x400
4466 /// tbnz w8, #10, L1
4469 /// \param MI Conditional Branch
4470 /// \return True when the simple conditional branch is generated
4472 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4473 bool IsNegativeBranch = false;
4474 bool IsTestAndBranch = false;
4475 unsigned TargetBBInMI = 0;
4476 switch (MI.getOpcode()) {
4478 llvm_unreachable("Unknown branch instruction?");
4485 case AArch64::CBNZW:
4486 case AArch64::CBNZX:
4488 IsNegativeBranch = true;
4493 IsTestAndBranch = true;
4495 case AArch64::TBNZW:
4496 case AArch64::TBNZX:
4498 IsNegativeBranch = true;
4499 IsTestAndBranch = true;
4502 // So we increment a zero register and test for bits other
4503 // than bit 0? Conservatively bail out in case the verifier
4504 // missed this case.
4505 if (IsTestAndBranch && MI.getOperand(1).getImm())
4509 assert(MI.getParent() && "Incomplete machine instruciton\n");
4510 MachineBasicBlock *MBB = MI.getParent();
4511 MachineFunction *MF = MBB->getParent();
4512 MachineRegisterInfo *MRI = &MF->getRegInfo();
4513 unsigned VReg = MI.getOperand(0).getReg();
4514 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4517 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4519 // Look through COPY instructions to find definition.
4520 while (DefMI->isCopy()) {
4521 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4522 if (!MRI->hasOneNonDBGUse(CopyVReg))
4524 if (!MRI->hasOneDef(CopyVReg))
4526 DefMI = MRI->getVRegDef(CopyVReg);
4529 switch (DefMI->getOpcode()) {
4532 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4533 case AArch64::ANDWri:
4534 case AArch64::ANDXri: {
4535 if (IsTestAndBranch)
4537 if (DefMI->getParent() != MBB)
4539 if (!MRI->hasOneNonDBGUse(VReg))
4542 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4543 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4544 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4545 if (!isPowerOf2_64(Mask))
4548 MachineOperand &MO = DefMI->getOperand(1);
4549 unsigned NewReg = MO.getReg();
4550 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4553 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4555 MachineBasicBlock &RefToMBB = *MBB;
4556 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4557 DebugLoc DL = MI.getDebugLoc();
4558 unsigned Imm = Log2_64(Mask);
4559 unsigned Opc = (Imm < 32)
4560 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4561 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4562 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4566 // Register lives on to the CBZ now.
4567 MO.setIsKill(false);
4569 // For immediate smaller than 32, we need to use the 32-bit
4570 // variant (W) in all cases. Indeed the 64-bit variant does not
4571 // allow to encode them.
4572 // Therefore, if the input register is 64-bit, we need to take the
4574 if (!Is32Bit && Imm < 32)
4575 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4576 MI.eraseFromParent();
4580 case AArch64::CSINCWr:
4581 case AArch64::CSINCXr: {
4582 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4583 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4584 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4585 DefMI->getOperand(2).getReg() == AArch64::XZR))
4588 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4591 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4592 // Convert only when the condition code is not modified between
4593 // the CSINC and the branch. The CC may be used by other
4594 // instructions in between.
4595 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4597 MachineBasicBlock &RefToMBB = *MBB;
4598 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4599 DebugLoc DL = MI.getDebugLoc();
4600 if (IsNegativeBranch)
4601 CC = AArch64CC::getInvertedCondCode(CC);
4602 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4603 MI.eraseFromParent();
4609 std::pair<unsigned, unsigned>
4610 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4611 const unsigned Mask = AArch64II::MO_FRAGMENT;
4612 return std::make_pair(TF & Mask, TF & ~Mask);
4615 ArrayRef<std::pair<unsigned, const char *>>
4616 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4617 using namespace AArch64II;
4619 static const std::pair<unsigned, const char *> TargetFlags[] = {
4620 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4621 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4622 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4623 {MO_HI12, "aarch64-hi12"}};
4624 return makeArrayRef(TargetFlags);
4627 ArrayRef<std::pair<unsigned, const char *>>
4628 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4629 using namespace AArch64II;
4631 static const std::pair<unsigned, const char *> TargetFlags[] = {
4632 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
4633 return makeArrayRef(TargetFlags);
4636 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4637 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4638 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4639 {{MOSuppressPair, "aarch64-suppress-pair"},
4640 {MOStridedAccess, "aarch64-strided-access"}};
4641 return makeArrayRef(TargetFlags);
4644 /// Constants defining how certain sequences should be outlined.
4645 /// This encompasses how an outlined function should be called, and what kind of
4646 /// frame should be emitted for that outlined function.
4648 /// \p MachineOutlinerDefault implies that the function should be called with
4649 /// a save and restore of LR to the stack.
4653 /// I1 Save LR OUTLINED_FUNCTION:
4654 /// I2 --> BL OUTLINED_FUNCTION I1
4655 /// I3 Restore LR I2
4659 /// * Call construction overhead: 3 (save + BL + restore)
4660 /// * Frame construction overhead: 1 (ret)
4661 /// * Requires stack fixups? Yes
4663 /// \p MachineOutlinerTailCall implies that the function is being created from
4664 /// a sequence of instructions ending in a return.
4668 /// I1 OUTLINED_FUNCTION:
4669 /// I2 --> B OUTLINED_FUNCTION I1
4673 /// * Call construction overhead: 1 (B)
4674 /// * Frame construction overhead: 0 (Return included in sequence)
4675 /// * Requires stack fixups? No
4677 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4678 /// a BL instruction, but doesn't require LR to be saved and restored. This
4679 /// happens when LR is known to be dead.
4683 /// I1 OUTLINED_FUNCTION:
4684 /// I2 --> BL OUTLINED_FUNCTION I1
4689 /// * Call construction overhead: 1 (BL)
4690 /// * Frame construction overhead: 1 (RET)
4691 /// * Requires stack fixups? No
4693 enum MachineOutlinerClass {
4694 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4695 MachineOutlinerTailCall, /// Only emit a branch.
4696 MachineOutlinerNoLRSave /// Emit a call and return.
4699 bool AArch64InstrInfo::canOutlineWithoutLRSave(
4700 MachineBasicBlock::iterator &CallInsertionPt) const {
4701 // Was LR saved in the function containing this basic block?
4702 MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
4703 LiveRegUnits LRU(getRegisterInfo());
4704 LRU.addLiveOuts(MBB);
4706 // Get liveness information from the end of the block to the end of the
4707 // prospective outlined region.
4708 std::for_each(MBB.rbegin(),
4709 (MachineBasicBlock::reverse_iterator)CallInsertionPt,
4710 [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
4712 // If the link register is available at this point, then we can safely outline
4713 // the region without saving/restoring LR. Otherwise, we must emit a save and
4715 return LRU.available(AArch64::LR);
4718 AArch64GenInstrInfo::MachineOutlinerInfo
4719 AArch64InstrInfo::getOutlininingCandidateInfo(
4721 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
4722 &RepeatedSequenceLocs) const {
4724 unsigned CallID = MachineOutlinerDefault;
4725 unsigned FrameID = MachineOutlinerDefault;
4726 unsigned NumInstrsForCall = 3;
4727 unsigned NumInstrsToCreateFrame = 1;
4729 auto DoesntNeedLRSave =
4730 [this](std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>
4731 &I) { return canOutlineWithoutLRSave(I.second); };
4733 // If the last instruction in any candidate is a terminator, then we should
4734 // tail call all of the candidates.
4735 if (RepeatedSequenceLocs[0].second->isTerminator()) {
4736 CallID = MachineOutlinerTailCall;
4737 FrameID = MachineOutlinerTailCall;
4738 NumInstrsForCall = 1;
4739 NumInstrsToCreateFrame = 0;
4742 else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4743 DoesntNeedLRSave)) {
4744 CallID = MachineOutlinerNoLRSave;
4745 FrameID = MachineOutlinerNoLRSave;
4746 NumInstrsForCall = 1;
4747 NumInstrsToCreateFrame = 1;
4750 // Check if the range contains a call. These require a save + restore of the
4752 if (std::any_of(RepeatedSequenceLocs[0].first, RepeatedSequenceLocs[0].second,
4753 [](const MachineInstr &MI) { return MI.isCall(); }))
4754 NumInstrsToCreateFrame += 2; // Save + restore the link register.
4756 // Handle the last instruction separately. If this is a tail call, then the
4757 // last instruction is a call. We don't want to save + restore in this case.
4758 // However, it could be possible that the last instruction is a call without
4759 // it being valid to tail call this sequence. We should consider this as well.
4760 else if (RepeatedSequenceLocs[0].second->isCall() &&
4761 FrameID != MachineOutlinerTailCall)
4762 NumInstrsToCreateFrame += 2;
4764 return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID,
4768 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
4769 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
4770 const Function &F = MF.getFunction();
4772 // If F uses a redzone, then don't outline from it because it might mess up
4774 if (!F.hasFnAttribute(Attribute::NoRedZone))
4777 // If anyone is using the address of this function, don't outline from it.
4778 if (F.hasAddressTaken())
4781 // Can F be deduplicated by the linker? If it can, don't outline from it.
4782 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
4788 AArch64GenInstrInfo::MachineOutlinerInstrType
4789 AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
4791 MachineFunction *MF = MI.getParent()->getParent();
4792 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
4794 // Don't outline LOHs.
4795 if (FuncInfo->getLOHRelated().count(&MI))
4796 return MachineOutlinerInstrType::Illegal;
4798 // Don't allow debug values to impact outlining type.
4799 if (MI.isDebugValue() || MI.isIndirectDebugValue())
4800 return MachineOutlinerInstrType::Invisible;
4802 // Is this a terminator for a basic block?
4803 if (MI.isTerminator()) {
4805 // Is this the end of a function?
4806 if (MI.getParent()->succ_empty())
4807 return MachineOutlinerInstrType::Legal;
4809 // It's not, so don't outline it.
4810 return MachineOutlinerInstrType::Illegal;
4813 // Outline calls without stack parameters or aggregate parameters.
4815 const Module *M = MF->getFunction().getParent();
4816 assert(M && "No module?");
4818 // Get the function associated with the call. Look at each operand and find
4819 // the one that represents the callee and get its name.
4820 Function *Callee = nullptr;
4821 for (const MachineOperand &MOP : MI.operands()) {
4822 if (MOP.isSymbol()) {
4823 Callee = M->getFunction(MOP.getSymbolName());
4827 else if (MOP.isGlobal()) {
4828 Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier());
4833 // Only handle functions that we have information about.
4835 return MachineOutlinerInstrType::Illegal;
4837 // We have a function we have information about. Check it if it's something
4838 // can safely outline.
4840 // If the callee is vararg, it passes parameters on the stack. Don't touch
4842 // FIXME: Functions like printf are very common and we should be able to
4844 if (Callee->isVarArg())
4845 return MachineOutlinerInstrType::Illegal;
4847 // Check if any of the arguments are a pointer to a struct. We don't want
4848 // to outline these since they might be loaded in two instructions.
4849 for (Argument &Arg : Callee->args()) {
4850 if (Arg.getType()->isPointerTy() &&
4851 Arg.getType()->getPointerElementType()->isAggregateType())
4852 return MachineOutlinerInstrType::Illegal;
4855 // If the thing we're calling doesn't access memory at all, then we're good
4857 if (Callee->doesNotAccessMemory())
4858 return MachineOutlinerInstrType::Legal;
4860 // It accesses memory. Get the machine function for the callee to see if
4861 // it's safe to outline.
4862 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
4864 // We don't know what's going on with the callee at all. Don't touch it.
4866 return MachineOutlinerInstrType::Illegal;
4868 // Does it pass anything on the stack? If it does, don't outline it.
4869 if (CalleeMF->getInfo<AArch64FunctionInfo>()->getBytesInStackArgArea() != 0)
4870 return MachineOutlinerInstrType::Illegal;
4872 // It doesn't, so it's safe to outline and we're done.
4873 return MachineOutlinerInstrType::Legal;
4876 // Don't outline positions.
4877 if (MI.isPosition())
4878 return MachineOutlinerInstrType::Illegal;
4880 // Don't touch the link register or W30.
4881 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
4882 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
4883 return MachineOutlinerInstrType::Illegal;
4885 // Make sure none of the operands are un-outlinable.
4886 for (const MachineOperand &MOP : MI.operands()) {
4887 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
4888 MOP.isTargetIndex())
4889 return MachineOutlinerInstrType::Illegal;
4891 // Don't outline anything that uses the link register.
4892 if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR))
4893 return MachineOutlinerInstrType::Illegal;
4896 // Does this use the stack?
4897 if (MI.modifiesRegister(AArch64::SP, &RI) ||
4898 MI.readsRegister(AArch64::SP, &RI)) {
4900 if (MI.mayLoadOrStore()) {
4901 unsigned Base; // Filled with the base regiser of MI.
4902 int64_t Offset; // Filled with the offset of MI.
4903 unsigned DummyWidth;
4905 // Does it allow us to offset the base register and is the base SP?
4906 if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
4907 Base != AArch64::SP)
4908 return MachineOutlinerInstrType::Illegal;
4910 // Find the minimum/maximum offset for this instruction and check if
4911 // fixing it up would be in range.
4912 int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
4913 unsigned Scale; // The scale to multiply the offsets by.
4914 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4916 // TODO: We should really test what happens if an instruction overflows.
4917 // This is tricky to test with IR tests, but when the outliner is moved
4918 // to a MIR test, it really ought to be checked.
4919 Offset += 16; // Update the offset to what it would be if we outlined.
4920 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
4921 return MachineOutlinerInstrType::Illegal;
4923 // It's in range, so we can outline it.
4924 return MachineOutlinerInstrType::Legal;
4927 // We can't fix it up, so don't outline it.
4928 return MachineOutlinerInstrType::Illegal;
4931 return MachineOutlinerInstrType::Legal;
4934 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
4935 for (MachineInstr &MI : MBB) {
4936 unsigned Base, Width;
4939 // Is this a load or store with an immediate offset with SP as the base?
4940 if (!MI.mayLoadOrStore() ||
4941 !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
4942 Base != AArch64::SP)
4945 // It is, so we have to fix it up.
4947 int64_t Dummy1, Dummy2;
4949 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
4950 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
4951 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
4952 assert(Scale != 0 && "Unexpected opcode!");
4954 // We've pushed the return address to the stack, so add 16 to the offset.
4955 // This is safe, since we already checked if it would overflow when we
4956 // checked if this instruction was legal to outline.
4957 int64_t NewImm = (Offset + 16) / Scale;
4958 StackOffsetOperand.setImm(NewImm);
4962 void AArch64InstrInfo::insertOutlinerEpilogue(
4963 MachineBasicBlock &MBB, MachineFunction &MF,
4964 const MachineOutlinerInfo &MInfo) const {
4966 // Is there a call in the outlined range?
4967 if (std::any_of(MBB.instr_begin(), MBB.instr_end(),
4968 [](MachineInstr &MI) { return MI.isCall(); })) {
4969 // Fix up the instructions in the range, since we're going to modify the
4971 fixupPostOutline(MBB);
4973 // LR has to be a live in so that we can save it.
4974 MBB.addLiveIn(AArch64::LR);
4976 MachineBasicBlock::iterator It = MBB.begin();
4977 MachineBasicBlock::iterator Et = MBB.end();
4979 if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
4980 Et = std::prev(MBB.end());
4982 // Insert a save before the outlined region
4983 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
4984 .addReg(AArch64::SP, RegState::Define)
4985 .addReg(AArch64::LR)
4986 .addReg(AArch64::SP)
4988 It = MBB.insert(It, STRXpre);
4990 // Insert a restore before the terminator for the function.
4991 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
4992 .addReg(AArch64::SP, RegState::Define)
4993 .addReg(AArch64::LR, RegState::Define)
4994 .addReg(AArch64::SP)
4996 Et = MBB.insert(Et, LDRXpost);
4999 // If this is a tail call outlined function, then there's already a return.
5000 if (MInfo.FrameConstructionID == MachineOutlinerTailCall)
5003 // It's not a tail call, so we have to insert the return ourselves.
5004 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5005 .addReg(AArch64::LR, RegState::Undef);
5006 MBB.insert(MBB.end(), ret);
5008 // Did we have to modify the stack by saving the link register?
5009 if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave)
5012 // We modified the stack.
5013 // Walk over the basic block and fix up all the stack accesses.
5014 fixupPostOutline(MBB);
5017 void AArch64InstrInfo::insertOutlinerPrologue(
5018 MachineBasicBlock &MBB, MachineFunction &MF,
5019 const MachineOutlinerInfo &MInfo) const {}
5021 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5022 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5023 MachineFunction &MF, const MachineOutlinerInfo &MInfo) const {
5025 // Are we tail calling?
5026 if (MInfo.CallConstructionID == MachineOutlinerTailCall) {
5027 // If yes, then we can just branch to the label.
5028 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
5029 .addGlobalAddress(M.getNamedValue(MF.getName())));
5033 // Are we saving the link register?
5034 if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) {
5035 // No, so just insert the call.
5036 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5037 .addGlobalAddress(M.getNamedValue(MF.getName())));
5041 // We have a default call. Save the link register.
5042 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5043 .addReg(AArch64::SP, RegState::Define)
5044 .addReg(AArch64::LR)
5045 .addReg(AArch64::SP)
5047 It = MBB.insert(It, STRXpre);
5051 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5052 .addGlobalAddress(M.getNamedValue(MF.getName())));
5056 // Restore the link register.
5057 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5058 .addReg(AArch64::SP, RegState::Define)
5059 .addReg(AArch64::LR, RegState::Define)
5060 .addReg(AArch64::SP)
5062 It = MBB.insert(It, LDRXpost);