1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineMemOperand.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Target/TargetMachine.h"
45 #include "llvm/Target/TargetOptions.h"
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
56 static cl::opt<unsigned> TBZDisplacementBits(
57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 static cl::opt<unsigned> CBZDisplacementBits(
61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 static cl::opt<unsigned>
65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71 RI(STI.getTargetTriple()), Subtarget(STI) {}
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
76 const MachineBasicBlock &MBB = *MI.getParent();
77 const MachineFunction *MF = MBB.getParent();
78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80 if (MI.getOpcode() == AArch64::INLINEASM)
81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
83 // FIXME: We currently only handle pseudoinstructions that don't get expanded
84 // before the assembly printer.
85 unsigned NumBytes = 0;
86 const MCInstrDesc &Desc = MI.getDesc();
87 switch (Desc.getOpcode()) {
89 // Anything not explicitly designated otherwise is a normal 4-byte insn.
92 case TargetOpcode::DBG_VALUE:
93 case TargetOpcode::EH_LABEL:
94 case TargetOpcode::IMPLICIT_DEF:
95 case TargetOpcode::KILL:
98 case TargetOpcode::STACKMAP:
99 // The upper bound for a stackmap intrinsic is the full length of its shadow
100 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
103 case TargetOpcode::PATCHPOINT:
104 // The size of the patchpoint intrinsic is the number of bytes requested
105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
108 case AArch64::TLSDESC_CALLSEQ:
109 // This gets lowered to an instruction sequence which takes 16 bytes
112 case AArch64::JumpTableDest32:
113 case AArch64::JumpTableDest16:
114 case AArch64::JumpTableDest8:
118 NumBytes = MI.getOperand(1).getImm();
125 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
126 SmallVectorImpl<MachineOperand> &Cond) {
127 // Block ends with fall-through condbranch.
128 switch (LastInst->getOpcode()) {
130 llvm_unreachable("Unknown branch instruction?");
132 Target = LastInst->getOperand(1).getMBB();
133 Cond.push_back(LastInst->getOperand(0));
139 Target = LastInst->getOperand(1).getMBB();
140 Cond.push_back(MachineOperand::CreateImm(-1));
141 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142 Cond.push_back(LastInst->getOperand(0));
148 Target = LastInst->getOperand(2).getMBB();
149 Cond.push_back(MachineOperand::CreateImm(-1));
150 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
151 Cond.push_back(LastInst->getOperand(0));
152 Cond.push_back(LastInst->getOperand(1));
156 static unsigned getBranchDisplacementBits(unsigned Opc) {
159 llvm_unreachable("unexpected opcode!");
166 return TBZDisplacementBits;
171 return CBZDisplacementBits;
173 return BCCDisplacementBits;
177 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
178 int64_t BrOffset) const {
179 unsigned Bits = getBranchDisplacementBits(BranchOp);
180 assert(Bits >= 3 && "max branch displacement must be enough to jump"
181 "over conditional branch expansion");
182 return isIntN(Bits, BrOffset / 4);
186 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
187 switch (MI.getOpcode()) {
189 llvm_unreachable("unexpected opcode!");
191 return MI.getOperand(0).getMBB();
196 return MI.getOperand(2).getMBB();
202 return MI.getOperand(1).getMBB();
207 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
208 MachineBasicBlock *&TBB,
209 MachineBasicBlock *&FBB,
210 SmallVectorImpl<MachineOperand> &Cond,
211 bool AllowModify) const {
212 // If the block has no terminators, it just falls into the block after it.
213 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
217 if (!isUnpredicatedTerminator(*I))
220 // Get the last instruction in the block.
221 MachineInstr *LastInst = &*I;
223 // If there is only one terminator instruction, process it.
224 unsigned LastOpc = LastInst->getOpcode();
225 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
226 if (isUncondBranchOpcode(LastOpc)) {
227 TBB = LastInst->getOperand(0).getMBB();
230 if (isCondBranchOpcode(LastOpc)) {
231 // Block ends with fall-through condbranch.
232 parseCondBranch(LastInst, TBB, Cond);
235 return true; // Can't handle indirect branch.
238 // Get the instruction before it if it is a terminator.
239 MachineInstr *SecondLastInst = &*I;
240 unsigned SecondLastOpc = SecondLastInst->getOpcode();
242 // If AllowModify is true and the block ends with two or more unconditional
243 // branches, delete all but the first unconditional branch.
244 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
245 while (isUncondBranchOpcode(SecondLastOpc)) {
246 LastInst->eraseFromParent();
247 LastInst = SecondLastInst;
248 LastOpc = LastInst->getOpcode();
249 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
250 // Return now the only terminator is an unconditional branch.
251 TBB = LastInst->getOperand(0).getMBB();
254 SecondLastInst = &*I;
255 SecondLastOpc = SecondLastInst->getOpcode();
260 // If there are three terminators, we don't know what sort of block this is.
261 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
264 // If the block ends with a B and a Bcc, handle it.
265 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266 parseCondBranch(SecondLastInst, TBB, Cond);
267 FBB = LastInst->getOperand(0).getMBB();
271 // If the block ends with two unconditional branches, handle it. The second
272 // one is not executed, so remove it.
273 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
274 TBB = SecondLastInst->getOperand(0).getMBB();
277 I->eraseFromParent();
281 // ...likewise if it ends with an indirect branch followed by an unconditional
283 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
286 I->eraseFromParent();
290 // Otherwise, can't handle this.
294 bool AArch64InstrInfo::reverseBranchCondition(
295 SmallVectorImpl<MachineOperand> &Cond) const {
296 if (Cond[0].getImm() != -1) {
298 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
299 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
301 // Folded compare-and-branch
302 switch (Cond[1].getImm()) {
304 llvm_unreachable("Unknown conditional branch!");
306 Cond[1].setImm(AArch64::CBNZW);
309 Cond[1].setImm(AArch64::CBZW);
312 Cond[1].setImm(AArch64::CBNZX);
315 Cond[1].setImm(AArch64::CBZX);
318 Cond[1].setImm(AArch64::TBNZW);
321 Cond[1].setImm(AArch64::TBZW);
324 Cond[1].setImm(AArch64::TBNZX);
327 Cond[1].setImm(AArch64::TBZX);
335 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
336 int *BytesRemoved) const {
337 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
341 if (!isUncondBranchOpcode(I->getOpcode()) &&
342 !isCondBranchOpcode(I->getOpcode()))
345 // Remove the branch.
346 I->eraseFromParent();
350 if (I == MBB.begin()) {
356 if (!isCondBranchOpcode(I->getOpcode())) {
362 // Remove the branch.
363 I->eraseFromParent();
370 void AArch64InstrInfo::instantiateCondBranch(
371 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
372 ArrayRef<MachineOperand> Cond) const {
373 if (Cond[0].getImm() != -1) {
375 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
377 // Folded compare-and-branch
378 // Note that we use addOperand instead of addReg to keep the flags.
379 const MachineInstrBuilder MIB =
380 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
382 MIB.addImm(Cond[3].getImm());
387 unsigned AArch64InstrInfo::insertBranch(
388 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
389 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
390 // Shouldn't be a fall through.
391 assert(TBB && "insertBranch must not be told to insert a fallthrough");
394 if (Cond.empty()) // Unconditional branch?
395 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
397 instantiateCondBranch(MBB, DL, TBB, Cond);
405 // Two-way conditional branch.
406 instantiateCondBranch(MBB, DL, TBB, Cond);
407 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
415 // Find the original register that VReg is copied from.
416 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
417 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
418 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
419 if (!DefMI->isFullCopy())
421 VReg = DefMI->getOperand(1).getReg();
426 // Determine if VReg is defined by an instruction that can be folded into a
427 // csel instruction. If so, return the folded opcode, and the replacement
429 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
430 unsigned *NewVReg = nullptr) {
431 VReg = removeCopies(MRI, VReg);
432 if (!TargetRegisterInfo::isVirtualRegister(VReg))
435 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
436 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
438 unsigned SrcOpNum = 0;
439 switch (DefMI->getOpcode()) {
440 case AArch64::ADDSXri:
441 case AArch64::ADDSWri:
442 // if NZCV is used, do not fold.
443 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
445 // fall-through to ADDXri and ADDWri.
447 case AArch64::ADDXri:
448 case AArch64::ADDWri:
449 // add x, 1 -> csinc.
450 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
451 DefMI->getOperand(3).getImm() != 0)
454 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
457 case AArch64::ORNXrr:
458 case AArch64::ORNWrr: {
459 // not x -> csinv, represented as orn dst, xzr, src.
460 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
461 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
464 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
468 case AArch64::SUBSXrr:
469 case AArch64::SUBSWrr:
470 // if NZCV is used, do not fold.
471 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
473 // fall-through to SUBXrr and SUBWrr.
475 case AArch64::SUBXrr:
476 case AArch64::SUBWrr: {
477 // neg x -> csneg, represented as sub dst, xzr, src.
478 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
479 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
482 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
488 assert(Opc && SrcOpNum && "Missing parameters");
491 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
495 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
496 ArrayRef<MachineOperand> Cond,
497 unsigned TrueReg, unsigned FalseReg,
498 int &CondCycles, int &TrueCycles,
499 int &FalseCycles) const {
500 // Check register classes.
501 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
502 const TargetRegisterClass *RC =
503 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
507 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
508 unsigned ExtraCondLat = Cond.size() != 1;
510 // GPRs are handled by csel.
511 // FIXME: Fold in x+1, -x, and ~x when applicable.
512 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
513 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
514 // Single-cycle csel, csinc, csinv, and csneg.
515 CondCycles = 1 + ExtraCondLat;
516 TrueCycles = FalseCycles = 1;
517 if (canFoldIntoCSel(MRI, TrueReg))
519 else if (canFoldIntoCSel(MRI, FalseReg))
524 // Scalar floating point is handled by fcsel.
525 // FIXME: Form fabs, fmin, and fmax when applicable.
526 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
527 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
528 CondCycles = 5 + ExtraCondLat;
529 TrueCycles = FalseCycles = 2;
537 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
538 MachineBasicBlock::iterator I,
539 const DebugLoc &DL, unsigned DstReg,
540 ArrayRef<MachineOperand> Cond,
541 unsigned TrueReg, unsigned FalseReg) const {
542 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
544 // Parse the condition code, see parseCondBranch() above.
545 AArch64CC::CondCode CC;
546 switch (Cond.size()) {
548 llvm_unreachable("Unknown condition opcode in Cond");
550 CC = AArch64CC::CondCode(Cond[0].getImm());
552 case 3: { // cbz/cbnz
553 // We must insert a compare against 0.
555 switch (Cond[1].getImm()) {
557 llvm_unreachable("Unknown branch opcode in Cond");
575 unsigned SrcReg = Cond[2].getReg();
577 // cmp reg, #0 is actually subs xzr, reg, #0.
578 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
579 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
584 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
585 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
592 case 4: { // tbz/tbnz
593 // We must insert a tst instruction.
594 switch (Cond[1].getImm()) {
596 llvm_unreachable("Unknown branch opcode in Cond");
606 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
607 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
608 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
609 .addReg(Cond[2].getReg())
611 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
613 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
614 .addReg(Cond[2].getReg())
616 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
622 const TargetRegisterClass *RC = nullptr;
623 bool TryFold = false;
624 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
625 RC = &AArch64::GPR64RegClass;
626 Opc = AArch64::CSELXr;
628 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
629 RC = &AArch64::GPR32RegClass;
630 Opc = AArch64::CSELWr;
632 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
633 RC = &AArch64::FPR64RegClass;
634 Opc = AArch64::FCSELDrrr;
635 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
636 RC = &AArch64::FPR32RegClass;
637 Opc = AArch64::FCSELSrrr;
639 assert(RC && "Unsupported regclass");
641 // Try folding simple instructions into the csel.
643 unsigned NewVReg = 0;
644 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
646 // The folded opcodes csinc, csinc and csneg apply the operation to
647 // FalseReg, so we need to invert the condition.
648 CC = AArch64CC::getInvertedCondCode(CC);
651 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
653 // Fold the operation. Leave any dead instructions for DCE to clean up.
657 // The extends the live range of NewVReg.
658 MRI.clearKillFlags(NewVReg);
662 // Pull all virtual register into the appropriate class.
663 MRI.constrainRegClass(TrueReg, RC);
664 MRI.constrainRegClass(FalseReg, RC);
667 BuildMI(MBB, I, DL, get(Opc), DstReg)
673 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
674 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
675 uint64_t Imm = MI.getOperand(1).getImm();
676 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
678 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
681 // FIXME: this implementation should be micro-architecture dependent, so a
682 // micro-architecture target hook should be introduced here in future.
683 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
684 if (!Subtarget.hasCustomCheapAsMoveHandling())
685 return MI.isAsCheapAsAMove();
687 const unsigned Opcode = MI.getOpcode();
689 // Firstly, check cases gated by features.
691 if (Subtarget.hasZeroCycleZeroingFP()) {
692 if (Opcode == AArch64::FMOVH0 ||
693 Opcode == AArch64::FMOVS0 ||
694 Opcode == AArch64::FMOVD0)
698 if (Subtarget.hasZeroCycleZeroingGP()) {
699 if (Opcode == TargetOpcode::COPY &&
700 (MI.getOperand(1).getReg() == AArch64::WZR ||
701 MI.getOperand(1).getReg() == AArch64::XZR))
705 // Secondly, check cases specific to sub-targets.
707 if (Subtarget.hasExynosCheapAsMoveHandling()) {
708 if (isExynosCheapAsMove(MI))
711 return MI.isAsCheapAsAMove();
714 // Finally, check generic cases.
720 // add/sub on register without shift
721 case AArch64::ADDWri:
722 case AArch64::ADDXri:
723 case AArch64::SUBWri:
724 case AArch64::SUBXri:
725 return (MI.getOperand(3).getImm() == 0);
727 // logical ops on immediate
728 case AArch64::ANDWri:
729 case AArch64::ANDXri:
730 case AArch64::EORWri:
731 case AArch64::EORXri:
732 case AArch64::ORRWri:
733 case AArch64::ORRXri:
736 // logical ops on register without shift
737 case AArch64::ANDWrr:
738 case AArch64::ANDXrr:
739 case AArch64::BICWrr:
740 case AArch64::BICXrr:
741 case AArch64::EONWrr:
742 case AArch64::EONXrr:
743 case AArch64::EORWrr:
744 case AArch64::EORXrr:
745 case AArch64::ORNWrr:
746 case AArch64::ORNXrr:
747 case AArch64::ORRWrr:
748 case AArch64::ORRXrr:
751 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
752 // ORRXri, it is as cheap as MOV
753 case AArch64::MOVi32imm:
754 return canBeExpandedToORR(MI, 32);
755 case AArch64::MOVi64imm:
756 return canBeExpandedToORR(MI, 64);
759 llvm_unreachable("Unknown opcode to check as cheap as a move!");
762 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
763 switch (MI.getOpcode()) {
767 case AArch64::ADDWrs:
768 case AArch64::ADDXrs:
769 case AArch64::ADDSWrs:
770 case AArch64::ADDSXrs: {
771 unsigned Imm = MI.getOperand(3).getImm();
772 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
775 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
778 case AArch64::ADDWrx:
779 case AArch64::ADDXrx:
780 case AArch64::ADDXrx64:
781 case AArch64::ADDSWrx:
782 case AArch64::ADDSXrx:
783 case AArch64::ADDSXrx64: {
784 unsigned Imm = MI.getOperand(3).getImm();
785 switch (AArch64_AM::getArithExtendType(Imm)) {
788 case AArch64_AM::UXTB:
789 case AArch64_AM::UXTH:
790 case AArch64_AM::UXTW:
791 case AArch64_AM::UXTX:
792 return AArch64_AM::getArithShiftValue(Imm) <= 4;
796 case AArch64::SUBWrs:
797 case AArch64::SUBSWrs: {
798 unsigned Imm = MI.getOperand(3).getImm();
799 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
800 return ShiftVal == 0 ||
801 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
804 case AArch64::SUBXrs:
805 case AArch64::SUBSXrs: {
806 unsigned Imm = MI.getOperand(3).getImm();
807 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
808 return ShiftVal == 0 ||
809 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
812 case AArch64::SUBWrx:
813 case AArch64::SUBXrx:
814 case AArch64::SUBXrx64:
815 case AArch64::SUBSWrx:
816 case AArch64::SUBSXrx:
817 case AArch64::SUBSXrx64: {
818 unsigned Imm = MI.getOperand(3).getImm();
819 switch (AArch64_AM::getArithExtendType(Imm)) {
822 case AArch64_AM::UXTB:
823 case AArch64_AM::UXTH:
824 case AArch64_AM::UXTW:
825 case AArch64_AM::UXTX:
826 return AArch64_AM::getArithShiftValue(Imm) == 0;
830 case AArch64::LDRBBroW:
831 case AArch64::LDRBBroX:
832 case AArch64::LDRBroW:
833 case AArch64::LDRBroX:
834 case AArch64::LDRDroW:
835 case AArch64::LDRDroX:
836 case AArch64::LDRHHroW:
837 case AArch64::LDRHHroX:
838 case AArch64::LDRHroW:
839 case AArch64::LDRHroX:
840 case AArch64::LDRQroW:
841 case AArch64::LDRQroX:
842 case AArch64::LDRSBWroW:
843 case AArch64::LDRSBWroX:
844 case AArch64::LDRSBXroW:
845 case AArch64::LDRSBXroX:
846 case AArch64::LDRSHWroW:
847 case AArch64::LDRSHWroX:
848 case AArch64::LDRSHXroW:
849 case AArch64::LDRSHXroX:
850 case AArch64::LDRSWroW:
851 case AArch64::LDRSWroX:
852 case AArch64::LDRSroW:
853 case AArch64::LDRSroX:
854 case AArch64::LDRWroW:
855 case AArch64::LDRWroX:
856 case AArch64::LDRXroW:
857 case AArch64::LDRXroX:
858 case AArch64::PRFMroW:
859 case AArch64::PRFMroX:
860 case AArch64::STRBBroW:
861 case AArch64::STRBBroX:
862 case AArch64::STRBroW:
863 case AArch64::STRBroX:
864 case AArch64::STRDroW:
865 case AArch64::STRDroX:
866 case AArch64::STRHHroW:
867 case AArch64::STRHHroX:
868 case AArch64::STRHroW:
869 case AArch64::STRHroX:
870 case AArch64::STRQroW:
871 case AArch64::STRQroX:
872 case AArch64::STRSroW:
873 case AArch64::STRSroX:
874 case AArch64::STRWroW:
875 case AArch64::STRWroX:
876 case AArch64::STRXroW:
877 case AArch64::STRXroX: {
878 unsigned IsSigned = MI.getOperand(3).getImm();
884 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
885 unsigned Opc = MI.getOpcode();
889 case AArch64::SEH_StackAlloc:
890 case AArch64::SEH_SaveFPLR:
891 case AArch64::SEH_SaveFPLR_X:
892 case AArch64::SEH_SaveReg:
893 case AArch64::SEH_SaveReg_X:
894 case AArch64::SEH_SaveRegP:
895 case AArch64::SEH_SaveRegP_X:
896 case AArch64::SEH_SaveFReg:
897 case AArch64::SEH_SaveFReg_X:
898 case AArch64::SEH_SaveFRegP:
899 case AArch64::SEH_SaveFRegP_X:
900 case AArch64::SEH_SetFP:
901 case AArch64::SEH_AddFP:
902 case AArch64::SEH_Nop:
903 case AArch64::SEH_PrologEnd:
904 case AArch64::SEH_EpilogStart:
905 case AArch64::SEH_EpilogEnd:
910 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
911 unsigned &SrcReg, unsigned &DstReg,
912 unsigned &SubIdx) const {
913 switch (MI.getOpcode()) {
916 case AArch64::SBFMXri: // aka sxtw
917 case AArch64::UBFMXri: // aka uxtw
918 // Check for the 32 -> 64 bit extension case, these instructions can do
920 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
922 // This is a signed or unsigned 32 -> 64 bit extension.
923 SrcReg = MI.getOperand(1).getReg();
924 DstReg = MI.getOperand(0).getReg();
925 SubIdx = AArch64::sub_32;
930 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
931 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
932 const TargetRegisterInfo *TRI = &getRegisterInfo();
933 MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
934 int64_t OffsetA = 0, OffsetB = 0;
935 unsigned WidthA = 0, WidthB = 0;
937 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
938 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
940 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
941 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
944 // Retrieve the base, offset from the base and width. Width
945 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
946 // base are identical, and the offset of a lower memory access +
947 // the width doesn't overlap the offset of a higher memory access,
948 // then the memory accesses are different.
949 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
950 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
951 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
952 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
953 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
954 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
955 if (LowOffset + LowWidth <= HighOffset)
962 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
963 const MachineBasicBlock *MBB,
964 const MachineFunction &MF) const {
965 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
967 switch (MI.getOpcode()) {
969 // CSDB hints are scheduling barriers.
970 if (MI.getOperand(0).getImm() == 0x14)
975 // DSB and ISB also are scheduling barriers.
979 return isSEHInstruction(MI);
982 /// analyzeCompare - For a comparison instruction, return the source registers
983 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
984 /// Return true if the comparison instruction can be analyzed.
985 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
986 unsigned &SrcReg2, int &CmpMask,
987 int &CmpValue) const {
988 // The first operand can be a frame index where we'd normally expect a
990 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
991 if (!MI.getOperand(1).isReg())
994 switch (MI.getOpcode()) {
997 case AArch64::SUBSWrr:
998 case AArch64::SUBSWrs:
999 case AArch64::SUBSWrx:
1000 case AArch64::SUBSXrr:
1001 case AArch64::SUBSXrs:
1002 case AArch64::SUBSXrx:
1003 case AArch64::ADDSWrr:
1004 case AArch64::ADDSWrs:
1005 case AArch64::ADDSWrx:
1006 case AArch64::ADDSXrr:
1007 case AArch64::ADDSXrs:
1008 case AArch64::ADDSXrx:
1009 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1010 SrcReg = MI.getOperand(1).getReg();
1011 SrcReg2 = MI.getOperand(2).getReg();
1015 case AArch64::SUBSWri:
1016 case AArch64::ADDSWri:
1017 case AArch64::SUBSXri:
1018 case AArch64::ADDSXri:
1019 SrcReg = MI.getOperand(1).getReg();
1022 // FIXME: In order to convert CmpValue to 0 or 1
1023 CmpValue = MI.getOperand(2).getImm() != 0;
1025 case AArch64::ANDSWri:
1026 case AArch64::ANDSXri:
1027 // ANDS does not use the same encoding scheme as the others xxxS
1029 SrcReg = MI.getOperand(1).getReg();
1032 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1033 // while the type of CmpValue is int. When converting uint64_t to int,
1034 // the high 32 bits of uint64_t will be lost.
1035 // In fact it causes a bug in spec2006-483.xalancbmk
1036 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1037 CmpValue = AArch64_AM::decodeLogicalImmediate(
1038 MI.getOperand(2).getImm(),
1039 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1046 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1047 MachineBasicBlock *MBB = Instr.getParent();
1048 assert(MBB && "Can't get MachineBasicBlock here");
1049 MachineFunction *MF = MBB->getParent();
1050 assert(MF && "Can't get MachineFunction here");
1051 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1052 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1053 MachineRegisterInfo *MRI = &MF->getRegInfo();
1055 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1057 MachineOperand &MO = Instr.getOperand(OpIdx);
1058 const TargetRegisterClass *OpRegCstraints =
1059 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1061 // If there's no constraint, there's nothing to do.
1062 if (!OpRegCstraints)
1064 // If the operand is a frame index, there's nothing to do here.
1065 // A frame index operand will resolve correctly during PEI.
1069 assert(MO.isReg() &&
1070 "Operand has register constraints without being a register!");
1072 unsigned Reg = MO.getReg();
1073 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1074 if (!OpRegCstraints->contains(Reg))
1076 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1077 !MRI->constrainRegClass(Reg, OpRegCstraints))
1084 /// Return the opcode that does not set flags when possible - otherwise
1085 /// return the original opcode. The caller is responsible to do the actual
1086 /// substitution and legality checking.
1087 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1088 // Don't convert all compare instructions, because for some the zero register
1089 // encoding becomes the sp register.
1090 bool MIDefinesZeroReg = false;
1091 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1092 MIDefinesZeroReg = true;
1094 switch (MI.getOpcode()) {
1096 return MI.getOpcode();
1097 case AArch64::ADDSWrr:
1098 return AArch64::ADDWrr;
1099 case AArch64::ADDSWri:
1100 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1101 case AArch64::ADDSWrs:
1102 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1103 case AArch64::ADDSWrx:
1104 return AArch64::ADDWrx;
1105 case AArch64::ADDSXrr:
1106 return AArch64::ADDXrr;
1107 case AArch64::ADDSXri:
1108 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1109 case AArch64::ADDSXrs:
1110 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1111 case AArch64::ADDSXrx:
1112 return AArch64::ADDXrx;
1113 case AArch64::SUBSWrr:
1114 return AArch64::SUBWrr;
1115 case AArch64::SUBSWri:
1116 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1117 case AArch64::SUBSWrs:
1118 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1119 case AArch64::SUBSWrx:
1120 return AArch64::SUBWrx;
1121 case AArch64::SUBSXrr:
1122 return AArch64::SUBXrr;
1123 case AArch64::SUBSXri:
1124 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1125 case AArch64::SUBSXrs:
1126 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1127 case AArch64::SUBSXrx:
1128 return AArch64::SUBXrx;
1132 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1134 /// True when condition flags are accessed (either by writing or reading)
1135 /// on the instruction trace starting at From and ending at To.
1137 /// Note: If From and To are from different blocks it's assumed CC are accessed
1139 static bool areCFlagsAccessedBetweenInstrs(
1140 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1141 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1142 // Early exit if To is at the beginning of the BB.
1143 if (To == To->getParent()->begin())
1146 // Check whether the instructions are in the same basic block
1147 // If not, assume the condition flags might get modified somewhere.
1148 if (To->getParent() != From->getParent())
1151 // From must be above To.
1152 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1153 [From](MachineInstr &MI) {
1154 return MI.getIterator() == From;
1155 }) != To->getParent()->rend());
1157 // We iterate backward starting \p To until we hit \p From.
1158 for (--To; To != From; --To) {
1159 const MachineInstr &Instr = *To;
1161 if (((AccessToCheck & AK_Write) &&
1162 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1163 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1169 /// Try to optimize a compare instruction. A compare instruction is an
1170 /// instruction which produces AArch64::NZCV. It can be truly compare
1172 /// when there are no uses of its destination register.
1174 /// The following steps are tried in order:
1175 /// 1. Convert CmpInstr into an unconditional version.
1176 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1177 /// condition code or an instruction which can be converted into such an
1179 /// Only comparison with zero is supported.
1180 bool AArch64InstrInfo::optimizeCompareInstr(
1181 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1182 int CmpValue, const MachineRegisterInfo *MRI) const {
1183 assert(CmpInstr.getParent());
1186 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1187 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1188 if (DeadNZCVIdx != -1) {
1189 if (CmpInstr.definesRegister(AArch64::WZR) ||
1190 CmpInstr.definesRegister(AArch64::XZR)) {
1191 CmpInstr.eraseFromParent();
1194 unsigned Opc = CmpInstr.getOpcode();
1195 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1198 const MCInstrDesc &MCID = get(NewOpc);
1199 CmpInstr.setDesc(MCID);
1200 CmpInstr.RemoveOperand(DeadNZCVIdx);
1201 bool succeeded = UpdateOperandRegClass(CmpInstr);
1203 assert(succeeded && "Some operands reg class are incompatible!");
1207 // Continue only if we have a "ri" where immediate is zero.
1208 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1210 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1211 if (CmpValue != 0 || SrcReg2 != 0)
1214 // CmpInstr is a Compare instruction if destination register is not used.
1215 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1218 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1221 /// Get opcode of S version of Instr.
1222 /// If Instr is S version its opcode is returned.
1223 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1224 /// or we are not interested in it.
1225 static unsigned sForm(MachineInstr &Instr) {
1226 switch (Instr.getOpcode()) {
1228 return AArch64::INSTRUCTION_LIST_END;
1230 case AArch64::ADDSWrr:
1231 case AArch64::ADDSWri:
1232 case AArch64::ADDSXrr:
1233 case AArch64::ADDSXri:
1234 case AArch64::SUBSWrr:
1235 case AArch64::SUBSWri:
1236 case AArch64::SUBSXrr:
1237 case AArch64::SUBSXri:
1238 return Instr.getOpcode();
1240 case AArch64::ADDWrr:
1241 return AArch64::ADDSWrr;
1242 case AArch64::ADDWri:
1243 return AArch64::ADDSWri;
1244 case AArch64::ADDXrr:
1245 return AArch64::ADDSXrr;
1246 case AArch64::ADDXri:
1247 return AArch64::ADDSXri;
1248 case AArch64::ADCWr:
1249 return AArch64::ADCSWr;
1250 case AArch64::ADCXr:
1251 return AArch64::ADCSXr;
1252 case AArch64::SUBWrr:
1253 return AArch64::SUBSWrr;
1254 case AArch64::SUBWri:
1255 return AArch64::SUBSWri;
1256 case AArch64::SUBXrr:
1257 return AArch64::SUBSXrr;
1258 case AArch64::SUBXri:
1259 return AArch64::SUBSXri;
1260 case AArch64::SBCWr:
1261 return AArch64::SBCSWr;
1262 case AArch64::SBCXr:
1263 return AArch64::SBCSXr;
1264 case AArch64::ANDWri:
1265 return AArch64::ANDSWri;
1266 case AArch64::ANDXri:
1267 return AArch64::ANDSXri;
1271 /// Check if AArch64::NZCV should be alive in successors of MBB.
1272 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1273 for (auto *BB : MBB->successors())
1274 if (BB->isLiveIn(AArch64::NZCV))
1287 UsedNZCV() = default;
1289 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1290 this->N |= UsedFlags.N;
1291 this->Z |= UsedFlags.Z;
1292 this->C |= UsedFlags.C;
1293 this->V |= UsedFlags.V;
1298 } // end anonymous namespace
1300 /// Find a condition code used by the instruction.
1301 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1302 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1303 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1304 switch (Instr.getOpcode()) {
1306 return AArch64CC::Invalid;
1308 case AArch64::Bcc: {
1309 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1311 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1314 case AArch64::CSINVWr:
1315 case AArch64::CSINVXr:
1316 case AArch64::CSINCWr:
1317 case AArch64::CSINCXr:
1318 case AArch64::CSELWr:
1319 case AArch64::CSELXr:
1320 case AArch64::CSNEGWr:
1321 case AArch64::CSNEGXr:
1322 case AArch64::FCSELSrrr:
1323 case AArch64::FCSELDrrr: {
1324 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1326 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1331 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1332 assert(CC != AArch64CC::Invalid);
1338 case AArch64CC::EQ: // Z set
1339 case AArch64CC::NE: // Z clear
1343 case AArch64CC::HI: // Z clear and C set
1344 case AArch64CC::LS: // Z set or C clear
1347 case AArch64CC::HS: // C set
1348 case AArch64CC::LO: // C clear
1352 case AArch64CC::MI: // N set
1353 case AArch64CC::PL: // N clear
1357 case AArch64CC::VS: // V set
1358 case AArch64CC::VC: // V clear
1362 case AArch64CC::GT: // Z clear, N and V the same
1363 case AArch64CC::LE: // Z set, N and V differ
1366 case AArch64CC::GE: // N and V the same
1367 case AArch64CC::LT: // N and V differ
1375 static bool isADDSRegImm(unsigned Opcode) {
1376 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1379 static bool isSUBSRegImm(unsigned Opcode) {
1380 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1383 /// Check if CmpInstr can be substituted by MI.
1385 /// CmpInstr can be substituted:
1386 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1387 /// - and, MI and CmpInstr are from the same MachineBB
1388 /// - and, condition flags are not alive in successors of the CmpInstr parent
1389 /// - and, if MI opcode is the S form there must be no defs of flags between
1391 /// or if MI opcode is not the S form there must be neither defs of flags
1392 /// nor uses of flags between MI and CmpInstr.
1393 /// - and C/V flags are not used after CmpInstr
1394 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1395 const TargetRegisterInfo *TRI) {
1397 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1400 const unsigned CmpOpcode = CmpInstr->getOpcode();
1401 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1404 if (MI->getParent() != CmpInstr->getParent())
1407 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1410 AccessKind AccessToCheck = AK_Write;
1411 if (sForm(*MI) != MI->getOpcode())
1412 AccessToCheck = AK_All;
1413 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1416 UsedNZCV NZCVUsedAfterCmp;
1417 for (auto I = std::next(CmpInstr->getIterator()),
1418 E = CmpInstr->getParent()->instr_end();
1420 const MachineInstr &Instr = *I;
1421 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1422 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1423 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1425 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1428 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1432 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1435 /// Substitute an instruction comparing to zero with another instruction
1436 /// which produces needed condition flags.
1438 /// Return true on success.
1439 bool AArch64InstrInfo::substituteCmpToZero(
1440 MachineInstr &CmpInstr, unsigned SrcReg,
1441 const MachineRegisterInfo *MRI) const {
1443 // Get the unique definition of SrcReg.
1444 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1448 const TargetRegisterInfo *TRI = &getRegisterInfo();
1450 unsigned NewOpc = sForm(*MI);
1451 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1454 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1457 // Update the instruction to set NZCV.
1458 MI->setDesc(get(NewOpc));
1459 CmpInstr.eraseFromParent();
1460 bool succeeded = UpdateOperandRegClass(*MI);
1462 assert(succeeded && "Some operands reg class are incompatible!");
1463 MI->addRegisterDefined(AArch64::NZCV, TRI);
1467 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1468 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1469 MI.getOpcode() != AArch64::CATCHRET)
1472 MachineBasicBlock &MBB = *MI.getParent();
1473 DebugLoc DL = MI.getDebugLoc();
1475 if (MI.getOpcode() == AArch64::CATCHRET) {
1476 // Skip to the first instruction before the epilog.
1477 const TargetInstrInfo *TII =
1478 MBB.getParent()->getSubtarget().getInstrInfo();
1479 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1480 auto MBBI = MachineBasicBlock::iterator(MI);
1481 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1482 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1483 FirstEpilogSEH != MBB.begin())
1484 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1485 if (FirstEpilogSEH != MBB.begin())
1486 FirstEpilogSEH = std::next(FirstEpilogSEH);
1487 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1488 .addReg(AArch64::X0, RegState::Define)
1490 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1491 .addReg(AArch64::X0, RegState::Define)
1492 .addReg(AArch64::X0)
1498 unsigned Reg = MI.getOperand(0).getReg();
1499 const GlobalValue *GV =
1500 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1501 const TargetMachine &TM = MBB.getParent()->getTarget();
1502 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1503 const unsigned char MO_NC = AArch64II::MO_NC;
1505 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1506 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1507 .addGlobalAddress(GV, 0, OpFlags);
1508 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1509 .addReg(Reg, RegState::Kill)
1511 .addMemOperand(*MI.memoperands_begin());
1512 } else if (TM.getCodeModel() == CodeModel::Large) {
1513 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1514 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1516 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1517 .addReg(Reg, RegState::Kill)
1518 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1520 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1521 .addReg(Reg, RegState::Kill)
1522 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1524 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1525 .addReg(Reg, RegState::Kill)
1526 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1528 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1529 .addReg(Reg, RegState::Kill)
1531 .addMemOperand(*MI.memoperands_begin());
1532 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1533 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1534 .addGlobalAddress(GV, 0, OpFlags);
1536 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1537 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1538 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1539 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1540 .addReg(Reg, RegState::Kill)
1541 .addGlobalAddress(GV, 0, LoFlags)
1542 .addMemOperand(*MI.memoperands_begin());
1550 // Return true if this instruction simply sets its single destination register
1551 // to zero. This is equivalent to a register rename of the zero-register.
1552 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1553 switch (MI.getOpcode()) {
1556 case AArch64::MOVZWi:
1557 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1558 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1559 assert(MI.getDesc().getNumOperands() == 3 &&
1560 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1564 case AArch64::ANDWri: // and Rd, Rzr, #imm
1565 return MI.getOperand(1).getReg() == AArch64::WZR;
1566 case AArch64::ANDXri:
1567 return MI.getOperand(1).getReg() == AArch64::XZR;
1568 case TargetOpcode::COPY:
1569 return MI.getOperand(1).getReg() == AArch64::WZR;
1574 // Return true if this instruction simply renames a general register without
1576 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1577 switch (MI.getOpcode()) {
1580 case TargetOpcode::COPY: {
1581 // GPR32 copies will by lowered to ORRXrs
1582 unsigned DstReg = MI.getOperand(0).getReg();
1583 return (AArch64::GPR32RegClass.contains(DstReg) ||
1584 AArch64::GPR64RegClass.contains(DstReg));
1586 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1587 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1588 assert(MI.getDesc().getNumOperands() == 4 &&
1589 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1593 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1594 if (MI.getOperand(2).getImm() == 0) {
1595 assert(MI.getDesc().getNumOperands() == 4 &&
1596 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1604 // Return true if this instruction simply renames a general register without
1606 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1607 switch (MI.getOpcode()) {
1610 case TargetOpcode::COPY: {
1611 // FPR64 copies will by lowered to ORR.16b
1612 unsigned DstReg = MI.getOperand(0).getReg();
1613 return (AArch64::FPR64RegClass.contains(DstReg) ||
1614 AArch64::FPR128RegClass.contains(DstReg));
1616 case AArch64::ORRv16i8:
1617 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1618 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1619 "invalid ORRv16i8 operands");
1627 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1628 int &FrameIndex) const {
1629 switch (MI.getOpcode()) {
1632 case AArch64::LDRWui:
1633 case AArch64::LDRXui:
1634 case AArch64::LDRBui:
1635 case AArch64::LDRHui:
1636 case AArch64::LDRSui:
1637 case AArch64::LDRDui:
1638 case AArch64::LDRQui:
1639 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1640 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1641 FrameIndex = MI.getOperand(1).getIndex();
1642 return MI.getOperand(0).getReg();
1650 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1651 int &FrameIndex) const {
1652 switch (MI.getOpcode()) {
1655 case AArch64::STRWui:
1656 case AArch64::STRXui:
1657 case AArch64::STRBui:
1658 case AArch64::STRHui:
1659 case AArch64::STRSui:
1660 case AArch64::STRDui:
1661 case AArch64::STRQui:
1662 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1663 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1664 FrameIndex = MI.getOperand(1).getIndex();
1665 return MI.getOperand(0).getReg();
1672 /// Check all MachineMemOperands for a hint to suppress pairing.
1673 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1674 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1675 return MMO->getFlags() & MOSuppressPair;
1679 /// Set a flag on the first MachineMemOperand to suppress pairing.
1680 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1681 if (MI.memoperands_empty())
1683 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1686 /// Check all MachineMemOperands for a hint that the load/store is strided.
1687 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1688 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1689 return MMO->getFlags() & MOStridedAccess;
1693 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1697 case AArch64::STURSi:
1698 case AArch64::STURDi:
1699 case AArch64::STURQi:
1700 case AArch64::STURBBi:
1701 case AArch64::STURHHi:
1702 case AArch64::STURWi:
1703 case AArch64::STURXi:
1704 case AArch64::LDURSi:
1705 case AArch64::LDURDi:
1706 case AArch64::LDURQi:
1707 case AArch64::LDURWi:
1708 case AArch64::LDURXi:
1709 case AArch64::LDURSWi:
1710 case AArch64::LDURHHi:
1711 case AArch64::LDURBBi:
1712 case AArch64::LDURSBWi:
1713 case AArch64::LDURSHWi:
1718 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1719 switch (MI.getOpcode()) {
1722 // Scaled instructions.
1723 case AArch64::STRSui:
1724 case AArch64::STRDui:
1725 case AArch64::STRQui:
1726 case AArch64::STRXui:
1727 case AArch64::STRWui:
1728 case AArch64::LDRSui:
1729 case AArch64::LDRDui:
1730 case AArch64::LDRQui:
1731 case AArch64::LDRXui:
1732 case AArch64::LDRWui:
1733 case AArch64::LDRSWui:
1734 // Unscaled instructions.
1735 case AArch64::STURSi:
1736 case AArch64::STURDi:
1737 case AArch64::STURQi:
1738 case AArch64::STURWi:
1739 case AArch64::STURXi:
1740 case AArch64::LDURSi:
1741 case AArch64::LDURDi:
1742 case AArch64::LDURQi:
1743 case AArch64::LDURWi:
1744 case AArch64::LDURXi:
1745 case AArch64::LDURSWi:
1750 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1754 llvm_unreachable("Opcode has no flag setting equivalent!");
1756 case AArch64::ADDWri:
1758 return AArch64::ADDSWri;
1759 case AArch64::ADDWrr:
1761 return AArch64::ADDSWrr;
1762 case AArch64::ADDWrs:
1764 return AArch64::ADDSWrs;
1765 case AArch64::ADDWrx:
1767 return AArch64::ADDSWrx;
1768 case AArch64::ANDWri:
1770 return AArch64::ANDSWri;
1771 case AArch64::ANDWrr:
1773 return AArch64::ANDSWrr;
1774 case AArch64::ANDWrs:
1776 return AArch64::ANDSWrs;
1777 case AArch64::BICWrr:
1779 return AArch64::BICSWrr;
1780 case AArch64::BICWrs:
1782 return AArch64::BICSWrs;
1783 case AArch64::SUBWri:
1785 return AArch64::SUBSWri;
1786 case AArch64::SUBWrr:
1788 return AArch64::SUBSWrr;
1789 case AArch64::SUBWrs:
1791 return AArch64::SUBSWrs;
1792 case AArch64::SUBWrx:
1794 return AArch64::SUBSWrx;
1796 case AArch64::ADDXri:
1798 return AArch64::ADDSXri;
1799 case AArch64::ADDXrr:
1801 return AArch64::ADDSXrr;
1802 case AArch64::ADDXrs:
1804 return AArch64::ADDSXrs;
1805 case AArch64::ADDXrx:
1807 return AArch64::ADDSXrx;
1808 case AArch64::ANDXri:
1810 return AArch64::ANDSXri;
1811 case AArch64::ANDXrr:
1813 return AArch64::ANDSXrr;
1814 case AArch64::ANDXrs:
1816 return AArch64::ANDSXrs;
1817 case AArch64::BICXrr:
1819 return AArch64::BICSXrr;
1820 case AArch64::BICXrs:
1822 return AArch64::BICSXrs;
1823 case AArch64::SUBXri:
1825 return AArch64::SUBSXri;
1826 case AArch64::SUBXrr:
1828 return AArch64::SUBSXrr;
1829 case AArch64::SUBXrs:
1831 return AArch64::SUBSXrs;
1832 case AArch64::SUBXrx:
1834 return AArch64::SUBSXrx;
1838 // Is this a candidate for ld/st merging or pairing? For example, we don't
1839 // touch volatiles or load/stores that have a hint to avoid pair formation.
1840 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1841 // If this is a volatile load/store, don't mess with it.
1842 if (MI.hasOrderedMemoryRef())
1845 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1846 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1847 "Expected a reg or frame index operand.");
1848 if (!MI.getOperand(2).isImm())
1851 // Can't merge/pair if the instruction modifies the base register.
1852 // e.g., ldr x0, [x0]
1853 // This case will never occur with an FI base.
1854 if (MI.getOperand(1).isReg()) {
1855 unsigned BaseReg = MI.getOperand(1).getReg();
1856 const TargetRegisterInfo *TRI = &getRegisterInfo();
1857 if (MI.modifiesRegister(BaseReg, TRI))
1861 // Check if this load/store has a hint to avoid pair formation.
1862 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1863 if (isLdStPairSuppressed(MI))
1866 // On some CPUs quad load/store pairs are slower than two single load/stores.
1867 if (Subtarget.isPaired128Slow()) {
1868 switch (MI.getOpcode()) {
1871 case AArch64::LDURQi:
1872 case AArch64::STURQi:
1873 case AArch64::LDRQui:
1874 case AArch64::STRQui:
1882 bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
1883 MachineOperand *&BaseOp,
1885 const TargetRegisterInfo *TRI) const {
1887 return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1890 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1891 MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
1892 unsigned &Width, const TargetRegisterInfo *TRI) const {
1893 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1894 // Handle only loads/stores with base register followed by immediate offset.
1895 if (LdSt.getNumExplicitOperands() == 3) {
1896 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1897 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
1898 !LdSt.getOperand(2).isImm())
1900 } else if (LdSt.getNumExplicitOperands() == 4) {
1901 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1902 if (!LdSt.getOperand(1).isReg() ||
1903 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
1904 !LdSt.getOperand(3).isImm())
1909 // Get the scaling factor for the instruction and set the width for the
1912 int64_t Dummy1, Dummy2;
1914 // If this returns false, then it's an instruction we don't want to handle.
1915 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1918 // Compute the offset. Offset is calculated as the immediate operand
1919 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1921 if (LdSt.getNumExplicitOperands() == 3) {
1922 BaseOp = &LdSt.getOperand(1);
1923 Offset = LdSt.getOperand(2).getImm() * Scale;
1925 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1926 BaseOp = &LdSt.getOperand(2);
1927 Offset = LdSt.getOperand(3).getImm() * Scale;
1930 assert((BaseOp->isReg() || BaseOp->isFI()) &&
1931 "getMemOperandWithOffset only supports base "
1932 "operands of type register or frame index.");
1938 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1939 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1940 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1941 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1945 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1946 unsigned &Width, int64_t &MinOffset,
1947 int64_t &MaxOffset) const {
1949 // Not a memory operation or something we want to handle.
1952 MinOffset = MaxOffset = 0;
1954 case AArch64::STRWpost:
1955 case AArch64::LDRWpost:
1961 case AArch64::LDURQi:
1962 case AArch64::STURQi:
1968 case AArch64::LDURXi:
1969 case AArch64::LDURDi:
1970 case AArch64::STURXi:
1971 case AArch64::STURDi:
1977 case AArch64::LDURWi:
1978 case AArch64::LDURSi:
1979 case AArch64::LDURSWi:
1980 case AArch64::STURWi:
1981 case AArch64::STURSi:
1987 case AArch64::LDURHi:
1988 case AArch64::LDURHHi:
1989 case AArch64::LDURSHXi:
1990 case AArch64::LDURSHWi:
1991 case AArch64::STURHi:
1992 case AArch64::STURHHi:
1998 case AArch64::LDURBi:
1999 case AArch64::LDURBBi:
2000 case AArch64::LDURSBXi:
2001 case AArch64::LDURSBWi:
2002 case AArch64::STURBi:
2003 case AArch64::STURBBi:
2009 case AArch64::LDPQi:
2010 case AArch64::LDNPQi:
2011 case AArch64::STPQi:
2012 case AArch64::STNPQi:
2018 case AArch64::LDRQui:
2019 case AArch64::STRQui:
2024 case AArch64::LDPXi:
2025 case AArch64::LDPDi:
2026 case AArch64::LDNPXi:
2027 case AArch64::LDNPDi:
2028 case AArch64::STPXi:
2029 case AArch64::STPDi:
2030 case AArch64::STNPXi:
2031 case AArch64::STNPDi:
2037 case AArch64::LDRXui:
2038 case AArch64::LDRDui:
2039 case AArch64::STRXui:
2040 case AArch64::STRDui:
2045 case AArch64::LDPWi:
2046 case AArch64::LDPSi:
2047 case AArch64::LDNPWi:
2048 case AArch64::LDNPSi:
2049 case AArch64::STPWi:
2050 case AArch64::STPSi:
2051 case AArch64::STNPWi:
2052 case AArch64::STNPSi:
2058 case AArch64::LDRWui:
2059 case AArch64::LDRSui:
2060 case AArch64::LDRSWui:
2061 case AArch64::STRWui:
2062 case AArch64::STRSui:
2067 case AArch64::LDRHui:
2068 case AArch64::LDRHHui:
2069 case AArch64::STRHui:
2070 case AArch64::STRHHui:
2075 case AArch64::LDRBui:
2076 case AArch64::LDRBBui:
2077 case AArch64::STRBui:
2078 case AArch64::STRBBui:
2088 static unsigned getOffsetStride(unsigned Opc) {
2092 case AArch64::LDURQi:
2093 case AArch64::STURQi:
2095 case AArch64::LDURXi:
2096 case AArch64::LDURDi:
2097 case AArch64::STURXi:
2098 case AArch64::STURDi:
2100 case AArch64::LDURWi:
2101 case AArch64::LDURSi:
2102 case AArch64::LDURSWi:
2103 case AArch64::STURWi:
2104 case AArch64::STURSi:
2109 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2111 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2112 unsigned OffsetStride = getOffsetStride(Opc);
2113 if (OffsetStride == 0)
2115 // If the byte-offset isn't a multiple of the stride, we can't scale this
2117 if (Offset % OffsetStride != 0)
2120 // Convert the byte-offset used by unscaled into an "element" offset used
2121 // by the scaled pair load/store instructions.
2122 Offset /= OffsetStride;
2126 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2128 static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2129 unsigned OffsetStride = getOffsetStride(Opc);
2130 if (OffsetStride == 0)
2133 // Convert the "element" offset used by scaled pair load/store instructions
2134 // into the byte-offset used by unscaled.
2135 Offset *= OffsetStride;
2139 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2140 if (FirstOpc == SecondOpc)
2142 // We can also pair sign-ext and zero-ext instructions.
2146 case AArch64::LDRWui:
2147 case AArch64::LDURWi:
2148 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2149 case AArch64::LDRSWui:
2150 case AArch64::LDURSWi:
2151 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2153 // These instructions can't be paired based on their opcodes.
2157 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2158 int64_t Offset1, unsigned Opcode1, int FI2,
2159 int64_t Offset2, unsigned Opcode2) {
2160 // Accesses through fixed stack object frame indices may access a different
2161 // fixed stack slot. Check that the object offsets + offsets match.
2162 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2163 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2164 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2165 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2166 // Get the byte-offset from the object offset.
2167 if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
2169 ObjectOffset1 += Offset1;
2170 ObjectOffset2 += Offset2;
2171 // Get the "element" index in the object.
2172 if (!scaleOffset(Opcode1, ObjectOffset1) ||
2173 !scaleOffset(Opcode2, ObjectOffset2))
2175 return ObjectOffset1 + 1 == ObjectOffset2;
2181 /// Detect opportunities for ldp/stp formation.
2183 /// Only called for LdSt for which getMemOperandWithOffset returns true.
2184 bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
2185 MachineOperand &BaseOp2,
2186 unsigned NumLoads) const {
2187 MachineInstr &FirstLdSt = *BaseOp1.getParent();
2188 MachineInstr &SecondLdSt = *BaseOp2.getParent();
2189 if (BaseOp1.getType() != BaseOp2.getType())
2192 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2193 "Only base registers and frame indices are supported.");
2195 // Check for both base regs and base FI.
2196 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2199 // Only cluster up to a single pair.
2203 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2206 // Can we pair these instructions based on their opcodes?
2207 unsigned FirstOpc = FirstLdSt.getOpcode();
2208 unsigned SecondOpc = SecondLdSt.getOpcode();
2209 if (!canPairLdStOpc(FirstOpc, SecondOpc))
2212 // Can't merge volatiles or load/stores that have a hint to avoid pair
2213 // formation, for example.
2214 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2215 !isCandidateToMergeOrPair(SecondLdSt))
2218 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2219 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2220 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2223 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2224 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2227 // Pairwise instructions have a 7-bit signed offset field.
2228 if (Offset1 > 63 || Offset1 < -64)
2231 // The caller should already have ordered First/SecondLdSt by offset.
2232 // Note: except for non-equal frame index bases
2233 if (BaseOp1.isFI()) {
2234 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2235 "Caller should have ordered offsets.");
2237 const MachineFrameInfo &MFI =
2238 FirstLdSt.getParent()->getParent()->getFrameInfo();
2239 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2240 BaseOp2.getIndex(), Offset2, SecondOpc);
2243 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2244 "Caller should have ordered offsets.");
2246 return Offset1 + 1 == Offset2;
2249 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2250 unsigned Reg, unsigned SubIdx,
2252 const TargetRegisterInfo *TRI) {
2254 return MIB.addReg(Reg, State);
2256 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2257 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2258 return MIB.addReg(Reg, State, SubIdx);
2261 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2263 // We really want the positive remainder mod 32 here, that happens to be
2264 // easily obtainable with a mask.
2265 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2268 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2269 MachineBasicBlock::iterator I,
2270 const DebugLoc &DL, unsigned DestReg,
2271 unsigned SrcReg, bool KillSrc,
2273 ArrayRef<unsigned> Indices) const {
2274 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2275 const TargetRegisterInfo *TRI = &getRegisterInfo();
2276 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2277 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2278 unsigned NumRegs = Indices.size();
2280 int SubReg = 0, End = NumRegs, Incr = 1;
2281 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2282 SubReg = NumRegs - 1;
2287 for (; SubReg != End; SubReg += Incr) {
2288 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2289 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2290 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2291 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2295 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2296 MachineBasicBlock::iterator I,
2297 const DebugLoc &DL, unsigned DestReg,
2298 unsigned SrcReg, bool KillSrc) const {
2299 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2300 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2301 const TargetRegisterInfo *TRI = &getRegisterInfo();
2303 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2304 // If either operand is WSP, expand to ADD #0.
2305 if (Subtarget.hasZeroCycleRegMove()) {
2306 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2307 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2308 &AArch64::GPR64spRegClass);
2309 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2310 &AArch64::GPR64spRegClass);
2311 // This instruction is reading and writing X registers. This may upset
2312 // the register scavenger and machine verifier, so we need to indicate
2313 // that we are reading an undefined value from SrcRegX, but a proper
2314 // value from SrcReg.
2315 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2316 .addReg(SrcRegX, RegState::Undef)
2318 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2319 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2321 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2322 .addReg(SrcReg, getKillRegState(KillSrc))
2324 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2326 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2327 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2329 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2331 if (Subtarget.hasZeroCycleRegMove()) {
2332 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2333 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2334 &AArch64::GPR64spRegClass);
2335 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2336 &AArch64::GPR64spRegClass);
2337 // This instruction is reading and writing X registers. This may upset
2338 // the register scavenger and machine verifier, so we need to indicate
2339 // that we are reading an undefined value from SrcRegX, but a proper
2340 // value from SrcReg.
2341 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2342 .addReg(AArch64::XZR)
2343 .addReg(SrcRegX, RegState::Undef)
2344 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2346 // Otherwise, expand to ORR WZR.
2347 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2348 .addReg(AArch64::WZR)
2349 .addReg(SrcReg, getKillRegState(KillSrc));
2355 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2356 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2357 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2358 // If either operand is SP, expand to ADD #0.
2359 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2360 .addReg(SrcReg, getKillRegState(KillSrc))
2362 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2363 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2364 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2366 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2368 // Otherwise, expand to ORR XZR.
2369 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2370 .addReg(AArch64::XZR)
2371 .addReg(SrcReg, getKillRegState(KillSrc));
2376 // Copy a DDDD register quad by copying the individual sub-registers.
2377 if (AArch64::DDDDRegClass.contains(DestReg) &&
2378 AArch64::DDDDRegClass.contains(SrcReg)) {
2379 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2380 AArch64::dsub2, AArch64::dsub3};
2381 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2386 // Copy a DDD register triple by copying the individual sub-registers.
2387 if (AArch64::DDDRegClass.contains(DestReg) &&
2388 AArch64::DDDRegClass.contains(SrcReg)) {
2389 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2391 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2396 // Copy a DD register pair by copying the individual sub-registers.
2397 if (AArch64::DDRegClass.contains(DestReg) &&
2398 AArch64::DDRegClass.contains(SrcReg)) {
2399 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2400 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2405 // Copy a QQQQ register quad by copying the individual sub-registers.
2406 if (AArch64::QQQQRegClass.contains(DestReg) &&
2407 AArch64::QQQQRegClass.contains(SrcReg)) {
2408 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2409 AArch64::qsub2, AArch64::qsub3};
2410 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2415 // Copy a QQQ register triple by copying the individual sub-registers.
2416 if (AArch64::QQQRegClass.contains(DestReg) &&
2417 AArch64::QQQRegClass.contains(SrcReg)) {
2418 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2420 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2425 // Copy a QQ register pair by copying the individual sub-registers.
2426 if (AArch64::QQRegClass.contains(DestReg) &&
2427 AArch64::QQRegClass.contains(SrcReg)) {
2428 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2429 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2434 if (AArch64::FPR128RegClass.contains(DestReg) &&
2435 AArch64::FPR128RegClass.contains(SrcReg)) {
2436 if (Subtarget.hasNEON()) {
2437 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2439 .addReg(SrcReg, getKillRegState(KillSrc));
2441 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2442 .addReg(AArch64::SP, RegState::Define)
2443 .addReg(SrcReg, getKillRegState(KillSrc))
2444 .addReg(AArch64::SP)
2446 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2447 .addReg(AArch64::SP, RegState::Define)
2448 .addReg(DestReg, RegState::Define)
2449 .addReg(AArch64::SP)
2455 if (AArch64::FPR64RegClass.contains(DestReg) &&
2456 AArch64::FPR64RegClass.contains(SrcReg)) {
2457 if (Subtarget.hasNEON()) {
2458 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2459 &AArch64::FPR128RegClass);
2460 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2461 &AArch64::FPR128RegClass);
2462 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2464 .addReg(SrcReg, getKillRegState(KillSrc));
2466 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2467 .addReg(SrcReg, getKillRegState(KillSrc));
2472 if (AArch64::FPR32RegClass.contains(DestReg) &&
2473 AArch64::FPR32RegClass.contains(SrcReg)) {
2474 if (Subtarget.hasNEON()) {
2475 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2476 &AArch64::FPR128RegClass);
2477 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2478 &AArch64::FPR128RegClass);
2479 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2481 .addReg(SrcReg, getKillRegState(KillSrc));
2483 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2484 .addReg(SrcReg, getKillRegState(KillSrc));
2489 if (AArch64::FPR16RegClass.contains(DestReg) &&
2490 AArch64::FPR16RegClass.contains(SrcReg)) {
2491 if (Subtarget.hasNEON()) {
2492 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2493 &AArch64::FPR128RegClass);
2494 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2495 &AArch64::FPR128RegClass);
2496 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2498 .addReg(SrcReg, getKillRegState(KillSrc));
2500 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2501 &AArch64::FPR32RegClass);
2502 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2503 &AArch64::FPR32RegClass);
2504 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2505 .addReg(SrcReg, getKillRegState(KillSrc));
2510 if (AArch64::FPR8RegClass.contains(DestReg) &&
2511 AArch64::FPR8RegClass.contains(SrcReg)) {
2512 if (Subtarget.hasNEON()) {
2513 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2514 &AArch64::FPR128RegClass);
2515 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2516 &AArch64::FPR128RegClass);
2517 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2519 .addReg(SrcReg, getKillRegState(KillSrc));
2521 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2522 &AArch64::FPR32RegClass);
2523 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2524 &AArch64::FPR32RegClass);
2525 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2526 .addReg(SrcReg, getKillRegState(KillSrc));
2531 // Copies between GPR64 and FPR64.
2532 if (AArch64::FPR64RegClass.contains(DestReg) &&
2533 AArch64::GPR64RegClass.contains(SrcReg)) {
2534 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2535 .addReg(SrcReg, getKillRegState(KillSrc));
2538 if (AArch64::GPR64RegClass.contains(DestReg) &&
2539 AArch64::FPR64RegClass.contains(SrcReg)) {
2540 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2541 .addReg(SrcReg, getKillRegState(KillSrc));
2544 // Copies between GPR32 and FPR32.
2545 if (AArch64::FPR32RegClass.contains(DestReg) &&
2546 AArch64::GPR32RegClass.contains(SrcReg)) {
2547 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2548 .addReg(SrcReg, getKillRegState(KillSrc));
2551 if (AArch64::GPR32RegClass.contains(DestReg) &&
2552 AArch64::FPR32RegClass.contains(SrcReg)) {
2553 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2554 .addReg(SrcReg, getKillRegState(KillSrc));
2558 if (DestReg == AArch64::NZCV) {
2559 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2560 BuildMI(MBB, I, DL, get(AArch64::MSR))
2561 .addImm(AArch64SysReg::NZCV)
2562 .addReg(SrcReg, getKillRegState(KillSrc))
2563 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2567 if (SrcReg == AArch64::NZCV) {
2568 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2569 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2570 .addImm(AArch64SysReg::NZCV)
2571 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2575 llvm_unreachable("unimplemented reg-to-reg copy");
2578 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2579 MachineBasicBlock &MBB,
2580 MachineBasicBlock::iterator InsertBefore,
2581 const MCInstrDesc &MCID,
2582 unsigned SrcReg, bool IsKill,
2583 unsigned SubIdx0, unsigned SubIdx1, int FI,
2584 MachineMemOperand *MMO) {
2585 unsigned SrcReg0 = SrcReg;
2586 unsigned SrcReg1 = SrcReg;
2587 if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2588 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2590 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2593 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2594 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2595 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2598 .addMemOperand(MMO);
2601 void AArch64InstrInfo::storeRegToStackSlot(
2602 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2603 bool isKill, int FI, const TargetRegisterClass *RC,
2604 const TargetRegisterInfo *TRI) const {
2605 MachineFunction &MF = *MBB.getParent();
2606 MachineFrameInfo &MFI = MF.getFrameInfo();
2607 unsigned Align = MFI.getObjectAlignment(FI);
2609 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2610 MachineMemOperand *MMO = MF.getMachineMemOperand(
2611 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2614 switch (TRI->getSpillSize(*RC)) {
2616 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2617 Opc = AArch64::STRBui;
2620 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2621 Opc = AArch64::STRHui;
2624 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2625 Opc = AArch64::STRWui;
2626 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2627 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2629 assert(SrcReg != AArch64::WSP);
2630 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2631 Opc = AArch64::STRSui;
2634 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2635 Opc = AArch64::STRXui;
2636 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2637 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2639 assert(SrcReg != AArch64::SP);
2640 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2641 Opc = AArch64::STRDui;
2642 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2643 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2644 get(AArch64::STPWi), SrcReg, isKill,
2645 AArch64::sube32, AArch64::subo32, FI, MMO);
2650 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2651 Opc = AArch64::STRQui;
2652 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2653 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2654 Opc = AArch64::ST1Twov1d;
2656 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2657 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2658 get(AArch64::STPXi), SrcReg, isKill,
2659 AArch64::sube64, AArch64::subo64, FI, MMO);
2664 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2665 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2666 Opc = AArch64::ST1Threev1d;
2671 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2672 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2673 Opc = AArch64::ST1Fourv1d;
2675 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2676 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2677 Opc = AArch64::ST1Twov2d;
2682 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2683 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2684 Opc = AArch64::ST1Threev2d;
2689 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2690 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2691 Opc = AArch64::ST1Fourv2d;
2696 assert(Opc && "Unknown register class");
2698 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2699 .addReg(SrcReg, getKillRegState(isKill))
2704 MI.addMemOperand(MMO);
2707 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2708 MachineBasicBlock &MBB,
2709 MachineBasicBlock::iterator InsertBefore,
2710 const MCInstrDesc &MCID,
2711 unsigned DestReg, unsigned SubIdx0,
2712 unsigned SubIdx1, int FI,
2713 MachineMemOperand *MMO) {
2714 unsigned DestReg0 = DestReg;
2715 unsigned DestReg1 = DestReg;
2716 bool IsUndef = true;
2717 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
2718 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2720 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2724 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2725 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2726 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2729 .addMemOperand(MMO);
2732 void AArch64InstrInfo::loadRegFromStackSlot(
2733 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2734 int FI, const TargetRegisterClass *RC,
2735 const TargetRegisterInfo *TRI) const {
2736 MachineFunction &MF = *MBB.getParent();
2737 MachineFrameInfo &MFI = MF.getFrameInfo();
2738 unsigned Align = MFI.getObjectAlignment(FI);
2739 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2740 MachineMemOperand *MMO = MF.getMachineMemOperand(
2741 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2745 switch (TRI->getSpillSize(*RC)) {
2747 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2748 Opc = AArch64::LDRBui;
2751 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2752 Opc = AArch64::LDRHui;
2755 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2756 Opc = AArch64::LDRWui;
2757 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2758 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2760 assert(DestReg != AArch64::WSP);
2761 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2762 Opc = AArch64::LDRSui;
2765 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2766 Opc = AArch64::LDRXui;
2767 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2768 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2770 assert(DestReg != AArch64::SP);
2771 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2772 Opc = AArch64::LDRDui;
2773 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2774 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2775 get(AArch64::LDPWi), DestReg, AArch64::sube32,
2776 AArch64::subo32, FI, MMO);
2781 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2782 Opc = AArch64::LDRQui;
2783 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2784 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2785 Opc = AArch64::LD1Twov1d;
2787 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2788 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2789 get(AArch64::LDPXi), DestReg, AArch64::sube64,
2790 AArch64::subo64, FI, MMO);
2795 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2796 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2797 Opc = AArch64::LD1Threev1d;
2802 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2803 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2804 Opc = AArch64::LD1Fourv1d;
2806 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2807 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2808 Opc = AArch64::LD1Twov2d;
2813 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2814 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2815 Opc = AArch64::LD1Threev2d;
2820 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2821 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2822 Opc = AArch64::LD1Fourv2d;
2827 assert(Opc && "Unknown register class");
2829 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2830 .addReg(DestReg, getDefRegState(true))
2834 MI.addMemOperand(MMO);
2837 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2838 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2839 unsigned DestReg, unsigned SrcReg, int Offset,
2840 const TargetInstrInfo *TII,
2841 MachineInstr::MIFlag Flag, bool SetNZCV,
2843 if (DestReg == SrcReg && Offset == 0)
2846 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2847 "SP increment/decrement not 16-byte aligned");
2849 bool isSub = Offset < 0;
2853 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2854 // scratch register. If DestReg is a virtual register, use it as the
2855 // scratch register; otherwise, create a new virtual register (to be
2856 // replaced by the scavenger at the end of PEI). That case can be optimized
2857 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2858 // register can be loaded with offset%8 and the add/sub can use an extending
2859 // instruction with LSL#3.
2860 // Currently the function handles any offsets but generates a poor sequence
2862 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2866 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2868 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2869 const unsigned MaxEncoding = 0xfff;
2870 const unsigned ShiftSize = 12;
2871 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2872 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2874 if (((unsigned)Offset) > MaxEncodableValue) {
2875 ThisVal = MaxEncodableValue;
2877 ThisVal = Offset & MaxEncodableValue;
2879 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2880 "Encoding cannot handle value that big");
2881 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2883 .addImm(ThisVal >> ShiftSize)
2884 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2887 if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
2888 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
2897 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2900 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2904 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
2905 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
2907 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
2910 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
2911 addImm(Offset).setMIFlag(Flag);
2912 } else if (DestReg == AArch64::SP) {
2913 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
2914 addImm(Offset).setMIFlag(Flag);
2919 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2920 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2921 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2922 LiveIntervals *LIS) const {
2923 // This is a bit of a hack. Consider this instruction:
2925 // %0 = COPY %sp; GPR64all:%0
2927 // We explicitly chose GPR64all for the virtual register so such a copy might
2928 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2929 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2930 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2932 // To prevent that, we are going to constrain the %0 register class here.
2934 // <rdar://problem/11522048>
2936 if (MI.isFullCopy()) {
2937 unsigned DstReg = MI.getOperand(0).getReg();
2938 unsigned SrcReg = MI.getOperand(1).getReg();
2939 if (SrcReg == AArch64::SP &&
2940 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2941 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2944 if (DstReg == AArch64::SP &&
2945 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2946 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2951 // Handle the case where a copy is being spilled or filled but the source
2952 // and destination register class don't match. For example:
2954 // %0 = COPY %xzr; GPR64common:%0
2956 // In this case we can still safely fold away the COPY and generate the
2957 // following spill code:
2959 // STRXui %xzr, %stack.0
2961 // This also eliminates spilled cross register class COPYs (e.g. between x and
2962 // d regs) of the same size. For example:
2964 // %0 = COPY %1; GPR64:%0, FPR64:%1
2966 // will be filled as
2968 // LDRDui %0, fi<#0>
2972 // LDRXui %Temp, fi<#0>
2975 if (MI.isCopy() && Ops.size() == 1 &&
2976 // Make sure we're only folding the explicit COPY defs/uses.
2977 (Ops[0] == 0 || Ops[0] == 1)) {
2978 bool IsSpill = Ops[0] == 0;
2979 bool IsFill = !IsSpill;
2980 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
2981 const MachineRegisterInfo &MRI = MF.getRegInfo();
2982 MachineBasicBlock &MBB = *MI.getParent();
2983 const MachineOperand &DstMO = MI.getOperand(0);
2984 const MachineOperand &SrcMO = MI.getOperand(1);
2985 unsigned DstReg = DstMO.getReg();
2986 unsigned SrcReg = SrcMO.getReg();
2987 // This is slightly expensive to compute for physical regs since
2988 // getMinimalPhysRegClass is slow.
2989 auto getRegClass = [&](unsigned Reg) {
2990 return TargetRegisterInfo::isVirtualRegister(Reg)
2991 ? MRI.getRegClass(Reg)
2992 : TRI.getMinimalPhysRegClass(Reg);
2995 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
2996 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
2997 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
2998 "Mismatched register size in non subreg COPY");
3000 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3001 getRegClass(SrcReg), &TRI);
3003 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3004 getRegClass(DstReg), &TRI);
3005 return &*--InsertPt;
3008 // Handle cases like spilling def of:
3010 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3012 // where the physical register source can be widened and stored to the full
3013 // virtual reg destination stack slot, in this case producing:
3015 // STRXui %xzr, %stack.0
3017 if (IsSpill && DstMO.isUndef() &&
3018 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3019 assert(SrcMO.getSubReg() == 0 &&
3020 "Unexpected subreg on physical register");
3021 const TargetRegisterClass *SpillRC;
3022 unsigned SpillSubreg;
3023 switch (DstMO.getSubReg()) {
3027 case AArch64::sub_32:
3029 if (AArch64::GPR32RegClass.contains(SrcReg)) {
3030 SpillRC = &AArch64::GPR64RegClass;
3031 SpillSubreg = AArch64::sub_32;
3032 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3033 SpillRC = &AArch64::FPR64RegClass;
3034 SpillSubreg = AArch64::ssub;
3039 if (AArch64::FPR64RegClass.contains(SrcReg)) {
3040 SpillRC = &AArch64::FPR128RegClass;
3041 SpillSubreg = AArch64::dsub;
3048 if (unsigned WidenedSrcReg =
3049 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3050 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3051 FrameIndex, SpillRC, &TRI);
3052 return &*--InsertPt;
3056 // Handle cases like filling use of:
3058 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3060 // where we can load the full virtual reg source stack slot, into the subreg
3061 // destination, in this case producing:
3063 // LDRWui %0:sub_32<def,read-undef>, %stack.0
3065 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3066 const TargetRegisterClass *FillRC;
3067 switch (DstMO.getSubReg()) {
3071 case AArch64::sub_32:
3072 FillRC = &AArch64::GPR32RegClass;
3075 FillRC = &AArch64::FPR32RegClass;
3078 FillRC = &AArch64::FPR64RegClass;
3083 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3084 TRI.getRegSizeInBits(*FillRC) &&
3085 "Mismatched regclass size on folded subreg COPY");
3086 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3087 MachineInstr &LoadMI = *--InsertPt;
3088 MachineOperand &LoadDst = LoadMI.getOperand(0);
3089 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3090 LoadDst.setSubReg(DstMO.getSubReg());
3091 LoadDst.setIsUndef();
3101 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3102 bool *OutUseUnscaledOp,
3103 unsigned *OutUnscaledOp,
3104 int *EmittableOffset) {
3106 bool IsSigned = false;
3107 // The ImmIdx should be changed case by case if it is not 2.
3108 unsigned ImmIdx = 2;
3109 unsigned UnscaledOp = 0;
3110 // Set output values in case of early exit.
3111 if (EmittableOffset)
3112 *EmittableOffset = 0;
3113 if (OutUseUnscaledOp)
3114 *OutUseUnscaledOp = false;
3117 switch (MI.getOpcode()) {
3119 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3120 // Vector spills/fills can't take an immediate offset.
3121 case AArch64::LD1Twov2d:
3122 case AArch64::LD1Threev2d:
3123 case AArch64::LD1Fourv2d:
3124 case AArch64::LD1Twov1d:
3125 case AArch64::LD1Threev1d:
3126 case AArch64::LD1Fourv1d:
3127 case AArch64::ST1Twov2d:
3128 case AArch64::ST1Threev2d:
3129 case AArch64::ST1Fourv2d:
3130 case AArch64::ST1Twov1d:
3131 case AArch64::ST1Threev1d:
3132 case AArch64::ST1Fourv1d:
3133 return AArch64FrameOffsetCannotUpdate;
3134 case AArch64::PRFMui:
3136 UnscaledOp = AArch64::PRFUMi;
3138 case AArch64::LDRXui:
3140 UnscaledOp = AArch64::LDURXi;
3142 case AArch64::LDRWui:
3144 UnscaledOp = AArch64::LDURWi;
3146 case AArch64::LDRBui:
3148 UnscaledOp = AArch64::LDURBi;
3150 case AArch64::LDRHui:
3152 UnscaledOp = AArch64::LDURHi;
3154 case AArch64::LDRSui:
3156 UnscaledOp = AArch64::LDURSi;
3158 case AArch64::LDRDui:
3160 UnscaledOp = AArch64::LDURDi;
3162 case AArch64::LDRQui:
3164 UnscaledOp = AArch64::LDURQi;
3166 case AArch64::LDRBBui:
3168 UnscaledOp = AArch64::LDURBBi;
3170 case AArch64::LDRHHui:
3172 UnscaledOp = AArch64::LDURHHi;
3174 case AArch64::LDRSBXui:
3176 UnscaledOp = AArch64::LDURSBXi;
3178 case AArch64::LDRSBWui:
3180 UnscaledOp = AArch64::LDURSBWi;
3182 case AArch64::LDRSHXui:
3184 UnscaledOp = AArch64::LDURSHXi;
3186 case AArch64::LDRSHWui:
3188 UnscaledOp = AArch64::LDURSHWi;
3190 case AArch64::LDRSWui:
3192 UnscaledOp = AArch64::LDURSWi;
3195 case AArch64::STRXui:
3197 UnscaledOp = AArch64::STURXi;
3199 case AArch64::STRWui:
3201 UnscaledOp = AArch64::STURWi;
3203 case AArch64::STRBui:
3205 UnscaledOp = AArch64::STURBi;
3207 case AArch64::STRHui:
3209 UnscaledOp = AArch64::STURHi;
3211 case AArch64::STRSui:
3213 UnscaledOp = AArch64::STURSi;
3215 case AArch64::STRDui:
3217 UnscaledOp = AArch64::STURDi;
3219 case AArch64::STRQui:
3221 UnscaledOp = AArch64::STURQi;
3223 case AArch64::STRBBui:
3225 UnscaledOp = AArch64::STURBBi;
3227 case AArch64::STRHHui:
3229 UnscaledOp = AArch64::STURHHi;
3232 case AArch64::LDPXi:
3233 case AArch64::LDPDi:
3234 case AArch64::STPXi:
3235 case AArch64::STPDi:
3236 case AArch64::LDNPXi:
3237 case AArch64::LDNPDi:
3238 case AArch64::STNPXi:
3239 case AArch64::STNPDi:
3244 case AArch64::LDPQi:
3245 case AArch64::STPQi:
3246 case AArch64::LDNPQi:
3247 case AArch64::STNPQi:
3252 case AArch64::LDPWi:
3253 case AArch64::LDPSi:
3254 case AArch64::STPWi:
3255 case AArch64::STPSi:
3256 case AArch64::LDNPWi:
3257 case AArch64::LDNPSi:
3258 case AArch64::STNPWi:
3259 case AArch64::STNPSi:
3265 case AArch64::LDURXi:
3266 case AArch64::LDURWi:
3267 case AArch64::LDURBi:
3268 case AArch64::LDURHi:
3269 case AArch64::LDURSi:
3270 case AArch64::LDURDi:
3271 case AArch64::LDURQi:
3272 case AArch64::LDURHHi:
3273 case AArch64::LDURBBi:
3274 case AArch64::LDURSBXi:
3275 case AArch64::LDURSBWi:
3276 case AArch64::LDURSHXi:
3277 case AArch64::LDURSHWi:
3278 case AArch64::LDURSWi:
3279 case AArch64::STURXi:
3280 case AArch64::STURWi:
3281 case AArch64::STURBi:
3282 case AArch64::STURHi:
3283 case AArch64::STURSi:
3284 case AArch64::STURDi:
3285 case AArch64::STURQi:
3286 case AArch64::STURBBi:
3287 case AArch64::STURHHi:
3292 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3294 bool useUnscaledOp = false;
3295 // If the offset doesn't match the scale, we rewrite the instruction to
3296 // use the unscaled instruction instead. Likewise, if we have a negative
3297 // offset (and have an unscaled op to use).
3298 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3299 useUnscaledOp = true;
3301 // Use an unscaled addressing mode if the instruction has a negative offset
3302 // (or if the instruction is already using an unscaled addressing mode).
3305 // ldp/stp instructions.
3308 } else if (UnscaledOp == 0 || useUnscaledOp) {
3318 // Attempt to fold address computation.
3319 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3320 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3321 if (Offset >= MinOff && Offset <= MaxOff) {
3322 if (EmittableOffset)
3323 *EmittableOffset = Offset;
3326 int NewOff = Offset < 0 ? MinOff : MaxOff;
3327 if (EmittableOffset)
3328 *EmittableOffset = NewOff;
3329 Offset = (Offset - NewOff) * Scale;
3331 if (OutUseUnscaledOp)
3332 *OutUseUnscaledOp = useUnscaledOp;
3334 *OutUnscaledOp = UnscaledOp;
3335 return AArch64FrameOffsetCanUpdate |
3336 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3339 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3340 unsigned FrameReg, int &Offset,
3341 const AArch64InstrInfo *TII) {
3342 unsigned Opcode = MI.getOpcode();
3343 unsigned ImmIdx = FrameRegIdx + 1;
3345 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3346 Offset += MI.getOperand(ImmIdx).getImm();
3347 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3348 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3349 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3350 MI.eraseFromParent();
3356 unsigned UnscaledOp;
3358 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3359 &UnscaledOp, &NewOffset);
3360 if (Status & AArch64FrameOffsetCanUpdate) {
3361 if (Status & AArch64FrameOffsetIsLegal)
3362 // Replace the FrameIndex with FrameReg.
3363 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3365 MI.setDesc(TII->get(UnscaledOp));
3367 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3374 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3375 NopInst.setOpcode(AArch64::HINT);
3376 NopInst.addOperand(MCOperand::createImm(0));
3379 // AArch64 supports MachineCombiner.
3380 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3382 // True when Opc sets flag
3383 static bool isCombineInstrSettingFlag(unsigned Opc) {
3385 case AArch64::ADDSWrr:
3386 case AArch64::ADDSWri:
3387 case AArch64::ADDSXrr:
3388 case AArch64::ADDSXri:
3389 case AArch64::SUBSWrr:
3390 case AArch64::SUBSXrr:
3391 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3392 case AArch64::SUBSWri:
3393 case AArch64::SUBSXri:
3401 // 32b Opcodes that can be combined with a MUL
3402 static bool isCombineInstrCandidate32(unsigned Opc) {
3404 case AArch64::ADDWrr:
3405 case AArch64::ADDWri:
3406 case AArch64::SUBWrr:
3407 case AArch64::ADDSWrr:
3408 case AArch64::ADDSWri:
3409 case AArch64::SUBSWrr:
3410 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3411 case AArch64::SUBWri:
3412 case AArch64::SUBSWri:
3420 // 64b Opcodes that can be combined with a MUL
3421 static bool isCombineInstrCandidate64(unsigned Opc) {
3423 case AArch64::ADDXrr:
3424 case AArch64::ADDXri:
3425 case AArch64::SUBXrr:
3426 case AArch64::ADDSXrr:
3427 case AArch64::ADDSXri:
3428 case AArch64::SUBSXrr:
3429 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3430 case AArch64::SUBXri:
3431 case AArch64::SUBSXri:
3439 // FP Opcodes that can be combined with a FMUL
3440 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3441 switch (Inst.getOpcode()) {
3444 case AArch64::FADDSrr:
3445 case AArch64::FADDDrr:
3446 case AArch64::FADDv2f32:
3447 case AArch64::FADDv2f64:
3448 case AArch64::FADDv4f32:
3449 case AArch64::FSUBSrr:
3450 case AArch64::FSUBDrr:
3451 case AArch64::FSUBv2f32:
3452 case AArch64::FSUBv2f64:
3453 case AArch64::FSUBv4f32:
3454 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3455 return (Options.UnsafeFPMath ||
3456 Options.AllowFPOpFusion == FPOpFusion::Fast);
3461 // Opcodes that can be combined with a MUL
3462 static bool isCombineInstrCandidate(unsigned Opc) {
3463 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3467 // Utility routine that checks if \param MO is defined by an
3468 // \param CombineOpc instruction in the basic block \param MBB
3469 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3470 unsigned CombineOpc, unsigned ZeroReg = 0,
3471 bool CheckZeroReg = false) {
3472 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3473 MachineInstr *MI = nullptr;
3475 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3476 MI = MRI.getUniqueVRegDef(MO.getReg());
3477 // And it needs to be in the trace (otherwise, it won't have a depth).
3478 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3480 // Must only used by the user we combine with.
3481 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3485 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3486 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3487 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3488 // The third input reg must be zero.
3489 if (MI->getOperand(3).getReg() != ZeroReg)
3497 // Is \param MO defined by an integer multiply and can be combined?
3498 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3499 unsigned MulOpc, unsigned ZeroReg) {
3500 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3504 // Is \param MO defined by a floating-point multiply and can be combined?
3505 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3507 return canCombine(MBB, MO, MulOpc);
3510 // TODO: There are many more machine instruction opcodes to match:
3511 // 1. Other data types (integer, vectors)
3512 // 2. Other math / logic operations (xor, or)
3513 // 3. Other forms of the same operation (intrinsics and other variants)
3514 bool AArch64InstrInfo::isAssociativeAndCommutative(
3515 const MachineInstr &Inst) const {
3516 switch (Inst.getOpcode()) {
3517 case AArch64::FADDDrr:
3518 case AArch64::FADDSrr:
3519 case AArch64::FADDv2f32:
3520 case AArch64::FADDv2f64:
3521 case AArch64::FADDv4f32:
3522 case AArch64::FMULDrr:
3523 case AArch64::FMULSrr:
3524 case AArch64::FMULX32:
3525 case AArch64::FMULX64:
3526 case AArch64::FMULXv2f32:
3527 case AArch64::FMULXv2f64:
3528 case AArch64::FMULXv4f32:
3529 case AArch64::FMULv2f32:
3530 case AArch64::FMULv2f64:
3531 case AArch64::FMULv4f32:
3532 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3538 /// Find instructions that can be turned into madd.
3539 static bool getMaddPatterns(MachineInstr &Root,
3540 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3541 unsigned Opc = Root.getOpcode();
3542 MachineBasicBlock &MBB = *Root.getParent();
3545 if (!isCombineInstrCandidate(Opc))
3547 if (isCombineInstrSettingFlag(Opc)) {
3548 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3549 // When NZCV is live bail out.
3552 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3553 // When opcode can't change bail out.
3554 // CHECKME: do we miss any cases for opcode conversion?
3563 case AArch64::ADDWrr:
3564 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3565 "ADDWrr does not have register operands");
3566 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3568 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3571 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3573 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3577 case AArch64::ADDXrr:
3578 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3580 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3583 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3585 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3589 case AArch64::SUBWrr:
3590 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3592 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3595 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3597 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3601 case AArch64::SUBXrr:
3602 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3604 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3607 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3609 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3613 case AArch64::ADDWri:
3614 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3616 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3620 case AArch64::ADDXri:
3621 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3623 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3627 case AArch64::SUBWri:
3628 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3630 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3634 case AArch64::SUBXri:
3635 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3637 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3644 /// Floating-Point Support
3646 /// Find instructions that can be turned into madd.
3647 static bool getFMAPatterns(MachineInstr &Root,
3648 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3650 if (!isCombineInstrCandidateFP(Root))
3653 MachineBasicBlock &MBB = *Root.getParent();
3656 switch (Root.getOpcode()) {
3658 assert(false && "Unsupported FP instruction in combiner\n");
3660 case AArch64::FADDSrr:
3661 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3662 "FADDWrr does not have register operands");
3663 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3664 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3666 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3667 AArch64::FMULv1i32_indexed)) {
3668 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3671 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3672 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3674 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3675 AArch64::FMULv1i32_indexed)) {
3676 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3680 case AArch64::FADDDrr:
3681 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3682 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3684 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3685 AArch64::FMULv1i64_indexed)) {
3686 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3689 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3690 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3692 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3693 AArch64::FMULv1i64_indexed)) {
3694 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3698 case AArch64::FADDv2f32:
3699 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3700 AArch64::FMULv2i32_indexed)) {
3701 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3703 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3704 AArch64::FMULv2f32)) {
3705 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3708 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3709 AArch64::FMULv2i32_indexed)) {
3710 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3712 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3713 AArch64::FMULv2f32)) {
3714 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3718 case AArch64::FADDv2f64:
3719 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3720 AArch64::FMULv2i64_indexed)) {
3721 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3723 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3724 AArch64::FMULv2f64)) {
3725 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3728 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3729 AArch64::FMULv2i64_indexed)) {
3730 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3732 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3733 AArch64::FMULv2f64)) {
3734 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3738 case AArch64::FADDv4f32:
3739 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3740 AArch64::FMULv4i32_indexed)) {
3741 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3743 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3744 AArch64::FMULv4f32)) {
3745 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3748 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3749 AArch64::FMULv4i32_indexed)) {
3750 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3752 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3753 AArch64::FMULv4f32)) {
3754 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3759 case AArch64::FSUBSrr:
3760 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3761 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3764 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3765 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3767 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3768 AArch64::FMULv1i32_indexed)) {
3769 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3772 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3773 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3777 case AArch64::FSUBDrr:
3778 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3779 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3782 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3783 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3785 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3786 AArch64::FMULv1i64_indexed)) {
3787 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3790 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3791 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3795 case AArch64::FSUBv2f32:
3796 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3797 AArch64::FMULv2i32_indexed)) {
3798 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3800 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3801 AArch64::FMULv2f32)) {
3802 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3805 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3806 AArch64::FMULv2i32_indexed)) {
3807 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3809 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3810 AArch64::FMULv2f32)) {
3811 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3815 case AArch64::FSUBv2f64:
3816 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3817 AArch64::FMULv2i64_indexed)) {
3818 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3820 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3821 AArch64::FMULv2f64)) {
3822 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3825 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3826 AArch64::FMULv2i64_indexed)) {
3827 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3829 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3830 AArch64::FMULv2f64)) {
3831 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3835 case AArch64::FSUBv4f32:
3836 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3837 AArch64::FMULv4i32_indexed)) {
3838 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3840 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3841 AArch64::FMULv4f32)) {
3842 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3845 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3846 AArch64::FMULv4i32_indexed)) {
3847 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3849 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3850 AArch64::FMULv4f32)) {
3851 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3859 /// Return true when a code sequence can improve throughput. It
3860 /// should be called only for instructions in loops.
3861 /// \param Pattern - combiner pattern
3862 bool AArch64InstrInfo::isThroughputPattern(
3863 MachineCombinerPattern Pattern) const {
3867 case MachineCombinerPattern::FMULADDS_OP1:
3868 case MachineCombinerPattern::FMULADDS_OP2:
3869 case MachineCombinerPattern::FMULSUBS_OP1:
3870 case MachineCombinerPattern::FMULSUBS_OP2:
3871 case MachineCombinerPattern::FMULADDD_OP1:
3872 case MachineCombinerPattern::FMULADDD_OP2:
3873 case MachineCombinerPattern::FMULSUBD_OP1:
3874 case MachineCombinerPattern::FMULSUBD_OP2:
3875 case MachineCombinerPattern::FNMULSUBS_OP1:
3876 case MachineCombinerPattern::FNMULSUBD_OP1:
3877 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3878 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3879 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3880 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3881 case MachineCombinerPattern::FMLAv2f32_OP2:
3882 case MachineCombinerPattern::FMLAv2f32_OP1:
3883 case MachineCombinerPattern::FMLAv2f64_OP1:
3884 case MachineCombinerPattern::FMLAv2f64_OP2:
3885 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3886 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3887 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3888 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3889 case MachineCombinerPattern::FMLAv4f32_OP1:
3890 case MachineCombinerPattern::FMLAv4f32_OP2:
3891 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3892 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3893 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3894 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3895 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3896 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3897 case MachineCombinerPattern::FMLSv2f32_OP2:
3898 case MachineCombinerPattern::FMLSv2f64_OP2:
3899 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3900 case MachineCombinerPattern::FMLSv4f32_OP2:
3902 } // end switch (Pattern)
3905 /// Return true when there is potentially a faster code sequence for an
3906 /// instruction chain ending in \p Root. All potential patterns are listed in
3907 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3908 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3910 bool AArch64InstrInfo::getMachineCombinerPatterns(
3912 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3914 if (getMaddPatterns(Root, Patterns))
3916 // Floating point patterns
3917 if (getFMAPatterns(Root, Patterns))
3920 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3923 enum class FMAInstKind { Default, Indexed, Accumulator };
3924 /// genFusedMultiply - Generate fused multiply instructions.
3925 /// This function supports both integer and floating point instructions.
3926 /// A typical example:
3929 /// ==> F|MADD R,A,B,C
3930 /// \param MF Containing MachineFunction
3931 /// \param MRI Register information
3932 /// \param TII Target information
3933 /// \param Root is the F|ADD instruction
3934 /// \param [out] InsInstrs is a vector of machine instructions and will
3935 /// contain the generated madd instruction
3936 /// \param IdxMulOpd is index of operand in Root that is the result of
3937 /// the F|MUL. In the example above IdxMulOpd is 1.
3938 /// \param MaddOpc the opcode fo the f|madd instruction
3939 /// \param RC Register class of operands
3940 /// \param kind of fma instruction (addressing mode) to be generated
3941 /// \param ReplacedAddend is the result register from the instruction
3942 /// replacing the non-combined operand, if any.
3943 static MachineInstr *
3944 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3945 const TargetInstrInfo *TII, MachineInstr &Root,
3946 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3947 unsigned MaddOpc, const TargetRegisterClass *RC,
3948 FMAInstKind kind = FMAInstKind::Default,
3949 const unsigned *ReplacedAddend = nullptr) {
3950 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3952 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3953 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3954 unsigned ResultReg = Root.getOperand(0).getReg();
3955 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3956 bool Src0IsKill = MUL->getOperand(1).isKill();
3957 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3958 bool Src1IsKill = MUL->getOperand(2).isKill();
3962 if (ReplacedAddend) {
3963 // If we just generated a new addend, we must be it's only use.
3964 SrcReg2 = *ReplacedAddend;
3967 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3968 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3971 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3972 MRI.constrainRegClass(ResultReg, RC);
3973 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3974 MRI.constrainRegClass(SrcReg0, RC);
3975 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3976 MRI.constrainRegClass(SrcReg1, RC);
3977 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3978 MRI.constrainRegClass(SrcReg2, RC);
3980 MachineInstrBuilder MIB;
3981 if (kind == FMAInstKind::Default)
3982 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3983 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3984 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3985 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3986 else if (kind == FMAInstKind::Indexed)
3987 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3988 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3989 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3990 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3991 .addImm(MUL->getOperand(3).getImm());
3992 else if (kind == FMAInstKind::Accumulator)
3993 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3994 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3995 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3996 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3998 assert(false && "Invalid FMA instruction kind \n");
3999 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4000 InsInstrs.push_back(MIB);
4004 /// genMaddR - Generate madd instruction and combine mul and add using
4005 /// an extra virtual register
4006 /// Example - an ADD intermediate needs to be stored in a register:
4009 /// ==> ORR V, ZR, Imm
4010 /// ==> MADD R,A,B,V
4011 /// \param MF Containing MachineFunction
4012 /// \param MRI Register information
4013 /// \param TII Target information
4014 /// \param Root is the ADD instruction
4015 /// \param [out] InsInstrs is a vector of machine instructions and will
4016 /// contain the generated madd instruction
4017 /// \param IdxMulOpd is index of operand in Root that is the result of
4018 /// the MUL. In the example above IdxMulOpd is 1.
4019 /// \param MaddOpc the opcode fo the madd instruction
4020 /// \param VR is a virtual register that holds the value of an ADD operand
4021 /// (V in the example above).
4022 /// \param RC Register class of operands
4023 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4024 const TargetInstrInfo *TII, MachineInstr &Root,
4025 SmallVectorImpl<MachineInstr *> &InsInstrs,
4026 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4027 const TargetRegisterClass *RC) {
4028 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4030 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4031 unsigned ResultReg = Root.getOperand(0).getReg();
4032 unsigned SrcReg0 = MUL->getOperand(1).getReg();
4033 bool Src0IsKill = MUL->getOperand(1).isKill();
4034 unsigned SrcReg1 = MUL->getOperand(2).getReg();
4035 bool Src1IsKill = MUL->getOperand(2).isKill();
4037 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4038 MRI.constrainRegClass(ResultReg, RC);
4039 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4040 MRI.constrainRegClass(SrcReg0, RC);
4041 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4042 MRI.constrainRegClass(SrcReg1, RC);
4043 if (TargetRegisterInfo::isVirtualRegister(VR))
4044 MRI.constrainRegClass(VR, RC);
4046 MachineInstrBuilder MIB =
4047 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4048 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4049 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4052 InsInstrs.push_back(MIB);
4056 /// When getMachineCombinerPatterns() finds potential patterns,
4057 /// this function generates the instructions that could replace the
4058 /// original code sequence
4059 void AArch64InstrInfo::genAlternativeCodeSequence(
4060 MachineInstr &Root, MachineCombinerPattern Pattern,
4061 SmallVectorImpl<MachineInstr *> &InsInstrs,
4062 SmallVectorImpl<MachineInstr *> &DelInstrs,
4063 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4064 MachineBasicBlock &MBB = *Root.getParent();
4065 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4066 MachineFunction &MF = *MBB.getParent();
4067 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4070 const TargetRegisterClass *RC;
4074 // Reassociate instructions.
4075 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4076 DelInstrs, InstrIdxForVirtReg);
4078 case MachineCombinerPattern::MULADDW_OP1:
4079 case MachineCombinerPattern::MULADDX_OP1:
4083 // --- Create(MADD);
4084 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4085 Opc = AArch64::MADDWrrr;
4086 RC = &AArch64::GPR32RegClass;
4088 Opc = AArch64::MADDXrrr;
4089 RC = &AArch64::GPR64RegClass;
4091 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4093 case MachineCombinerPattern::MULADDW_OP2:
4094 case MachineCombinerPattern::MULADDX_OP2:
4098 // --- Create(MADD);
4099 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4100 Opc = AArch64::MADDWrrr;
4101 RC = &AArch64::GPR32RegClass;
4103 Opc = AArch64::MADDXrrr;
4104 RC = &AArch64::GPR64RegClass;
4106 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4108 case MachineCombinerPattern::MULADDWI_OP1:
4109 case MachineCombinerPattern::MULADDXI_OP1: {
4112 // ==> ORR V, ZR, Imm
4114 // --- Create(MADD);
4115 const TargetRegisterClass *OrrRC;
4116 unsigned BitSize, OrrOpc, ZeroReg;
4117 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4118 OrrOpc = AArch64::ORRWri;
4119 OrrRC = &AArch64::GPR32spRegClass;
4121 ZeroReg = AArch64::WZR;
4122 Opc = AArch64::MADDWrrr;
4123 RC = &AArch64::GPR32RegClass;
4125 OrrOpc = AArch64::ORRXri;
4126 OrrRC = &AArch64::GPR64spRegClass;
4128 ZeroReg = AArch64::XZR;
4129 Opc = AArch64::MADDXrrr;
4130 RC = &AArch64::GPR64RegClass;
4132 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4133 uint64_t Imm = Root.getOperand(2).getImm();
4135 if (Root.getOperand(3).isImm()) {
4136 unsigned Val = Root.getOperand(3).getImm();
4139 uint64_t UImm = SignExtend64(Imm, BitSize);
4141 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4142 MachineInstrBuilder MIB1 =
4143 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4146 InsInstrs.push_back(MIB1);
4147 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4148 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4152 case MachineCombinerPattern::MULSUBW_OP1:
4153 case MachineCombinerPattern::MULSUBX_OP1: {
4157 // ==> MADD R,A,B,V // = -C + A*B
4158 // --- Create(MADD);
4159 const TargetRegisterClass *SubRC;
4160 unsigned SubOpc, ZeroReg;
4161 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4162 SubOpc = AArch64::SUBWrr;
4163 SubRC = &AArch64::GPR32spRegClass;
4164 ZeroReg = AArch64::WZR;
4165 Opc = AArch64::MADDWrrr;
4166 RC = &AArch64::GPR32RegClass;
4168 SubOpc = AArch64::SUBXrr;
4169 SubRC = &AArch64::GPR64spRegClass;
4170 ZeroReg = AArch64::XZR;
4171 Opc = AArch64::MADDXrrr;
4172 RC = &AArch64::GPR64RegClass;
4174 unsigned NewVR = MRI.createVirtualRegister(SubRC);
4176 MachineInstrBuilder MIB1 =
4177 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4179 .add(Root.getOperand(2));
4180 InsInstrs.push_back(MIB1);
4181 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4182 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4185 case MachineCombinerPattern::MULSUBW_OP2:
4186 case MachineCombinerPattern::MULSUBX_OP2:
4189 // ==> MSUB R,A,B,C (computes C - A*B)
4190 // --- Create(MSUB);
4191 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4192 Opc = AArch64::MSUBWrrr;
4193 RC = &AArch64::GPR32RegClass;
4195 Opc = AArch64::MSUBXrrr;
4196 RC = &AArch64::GPR64RegClass;
4198 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4200 case MachineCombinerPattern::MULSUBWI_OP1:
4201 case MachineCombinerPattern::MULSUBXI_OP1: {
4204 // ==> ORR V, ZR, -Imm
4205 // ==> MADD R,A,B,V // = -Imm + A*B
4206 // --- Create(MADD);
4207 const TargetRegisterClass *OrrRC;
4208 unsigned BitSize, OrrOpc, ZeroReg;
4209 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4210 OrrOpc = AArch64::ORRWri;
4211 OrrRC = &AArch64::GPR32spRegClass;
4213 ZeroReg = AArch64::WZR;
4214 Opc = AArch64::MADDWrrr;
4215 RC = &AArch64::GPR32RegClass;
4217 OrrOpc = AArch64::ORRXri;
4218 OrrRC = &AArch64::GPR64spRegClass;
4220 ZeroReg = AArch64::XZR;
4221 Opc = AArch64::MADDXrrr;
4222 RC = &AArch64::GPR64RegClass;
4224 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4225 uint64_t Imm = Root.getOperand(2).getImm();
4226 if (Root.getOperand(3).isImm()) {
4227 unsigned Val = Root.getOperand(3).getImm();
4230 uint64_t UImm = SignExtend64(-Imm, BitSize);
4232 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4233 MachineInstrBuilder MIB1 =
4234 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4237 InsInstrs.push_back(MIB1);
4238 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4239 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4243 // Floating Point Support
4244 case MachineCombinerPattern::FMULADDS_OP1:
4245 case MachineCombinerPattern::FMULADDD_OP1:
4249 // --- Create(MADD);
4250 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4251 Opc = AArch64::FMADDSrrr;
4252 RC = &AArch64::FPR32RegClass;
4254 Opc = AArch64::FMADDDrrr;
4255 RC = &AArch64::FPR64RegClass;
4257 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4259 case MachineCombinerPattern::FMULADDS_OP2:
4260 case MachineCombinerPattern::FMULADDD_OP2:
4263 // ==> FMADD R,A,B,C
4264 // --- Create(FMADD);
4265 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4266 Opc = AArch64::FMADDSrrr;
4267 RC = &AArch64::FPR32RegClass;
4269 Opc = AArch64::FMADDDrrr;
4270 RC = &AArch64::FPR64RegClass;
4272 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4275 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4276 Opc = AArch64::FMLAv1i32_indexed;
4277 RC = &AArch64::FPR32RegClass;
4278 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4279 FMAInstKind::Indexed);
4281 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4282 Opc = AArch64::FMLAv1i32_indexed;
4283 RC = &AArch64::FPR32RegClass;
4284 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4285 FMAInstKind::Indexed);
4288 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4289 Opc = AArch64::FMLAv1i64_indexed;
4290 RC = &AArch64::FPR64RegClass;
4291 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4292 FMAInstKind::Indexed);
4294 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4295 Opc = AArch64::FMLAv1i64_indexed;
4296 RC = &AArch64::FPR64RegClass;
4297 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4298 FMAInstKind::Indexed);
4301 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4302 case MachineCombinerPattern::FMLAv2f32_OP1:
4303 RC = &AArch64::FPR64RegClass;
4304 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4305 Opc = AArch64::FMLAv2i32_indexed;
4306 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4307 FMAInstKind::Indexed);
4309 Opc = AArch64::FMLAv2f32;
4310 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4311 FMAInstKind::Accumulator);
4314 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4315 case MachineCombinerPattern::FMLAv2f32_OP2:
4316 RC = &AArch64::FPR64RegClass;
4317 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4318 Opc = AArch64::FMLAv2i32_indexed;
4319 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4320 FMAInstKind::Indexed);
4322 Opc = AArch64::FMLAv2f32;
4323 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4324 FMAInstKind::Accumulator);
4328 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4329 case MachineCombinerPattern::FMLAv2f64_OP1:
4330 RC = &AArch64::FPR128RegClass;
4331 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4332 Opc = AArch64::FMLAv2i64_indexed;
4333 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4334 FMAInstKind::Indexed);
4336 Opc = AArch64::FMLAv2f64;
4337 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4338 FMAInstKind::Accumulator);
4341 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4342 case MachineCombinerPattern::FMLAv2f64_OP2:
4343 RC = &AArch64::FPR128RegClass;
4344 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4345 Opc = AArch64::FMLAv2i64_indexed;
4346 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4347 FMAInstKind::Indexed);
4349 Opc = AArch64::FMLAv2f64;
4350 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4351 FMAInstKind::Accumulator);
4355 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4356 case MachineCombinerPattern::FMLAv4f32_OP1:
4357 RC = &AArch64::FPR128RegClass;
4358 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4359 Opc = AArch64::FMLAv4i32_indexed;
4360 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4361 FMAInstKind::Indexed);
4363 Opc = AArch64::FMLAv4f32;
4364 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4365 FMAInstKind::Accumulator);
4369 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4370 case MachineCombinerPattern::FMLAv4f32_OP2:
4371 RC = &AArch64::FPR128RegClass;
4372 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4373 Opc = AArch64::FMLAv4i32_indexed;
4374 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4375 FMAInstKind::Indexed);
4377 Opc = AArch64::FMLAv4f32;
4378 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4379 FMAInstKind::Accumulator);
4383 case MachineCombinerPattern::FMULSUBS_OP1:
4384 case MachineCombinerPattern::FMULSUBD_OP1: {
4387 // ==> FNMSUB R,A,B,C // = -C + A*B
4388 // --- Create(FNMSUB);
4389 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4390 Opc = AArch64::FNMSUBSrrr;
4391 RC = &AArch64::FPR32RegClass;
4393 Opc = AArch64::FNMSUBDrrr;
4394 RC = &AArch64::FPR64RegClass;
4396 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4400 case MachineCombinerPattern::FNMULSUBS_OP1:
4401 case MachineCombinerPattern::FNMULSUBD_OP1: {
4404 // ==> FNMADD R,A,B,C // = -A*B - C
4405 // --- Create(FNMADD);
4406 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4407 Opc = AArch64::FNMADDSrrr;
4408 RC = &AArch64::FPR32RegClass;
4410 Opc = AArch64::FNMADDDrrr;
4411 RC = &AArch64::FPR64RegClass;
4413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4417 case MachineCombinerPattern::FMULSUBS_OP2:
4418 case MachineCombinerPattern::FMULSUBD_OP2: {
4421 // ==> FMSUB R,A,B,C (computes C - A*B)
4422 // --- Create(FMSUB);
4423 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4424 Opc = AArch64::FMSUBSrrr;
4425 RC = &AArch64::FPR32RegClass;
4427 Opc = AArch64::FMSUBDrrr;
4428 RC = &AArch64::FPR64RegClass;
4430 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4434 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4435 Opc = AArch64::FMLSv1i32_indexed;
4436 RC = &AArch64::FPR32RegClass;
4437 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4438 FMAInstKind::Indexed);
4441 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4442 Opc = AArch64::FMLSv1i64_indexed;
4443 RC = &AArch64::FPR64RegClass;
4444 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4445 FMAInstKind::Indexed);
4448 case MachineCombinerPattern::FMLSv2f32_OP2:
4449 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4450 RC = &AArch64::FPR64RegClass;
4451 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4452 Opc = AArch64::FMLSv2i32_indexed;
4453 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4454 FMAInstKind::Indexed);
4456 Opc = AArch64::FMLSv2f32;
4457 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4458 FMAInstKind::Accumulator);
4462 case MachineCombinerPattern::FMLSv2f64_OP2:
4463 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4464 RC = &AArch64::FPR128RegClass;
4465 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4466 Opc = AArch64::FMLSv2i64_indexed;
4467 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4468 FMAInstKind::Indexed);
4470 Opc = AArch64::FMLSv2f64;
4471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4472 FMAInstKind::Accumulator);
4476 case MachineCombinerPattern::FMLSv4f32_OP2:
4477 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4478 RC = &AArch64::FPR128RegClass;
4479 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4480 Opc = AArch64::FMLSv4i32_indexed;
4481 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4482 FMAInstKind::Indexed);
4484 Opc = AArch64::FMLSv4f32;
4485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4486 FMAInstKind::Accumulator);
4489 case MachineCombinerPattern::FMLSv2f32_OP1:
4490 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4491 RC = &AArch64::FPR64RegClass;
4492 unsigned NewVR = MRI.createVirtualRegister(RC);
4493 MachineInstrBuilder MIB1 =
4494 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4495 .add(Root.getOperand(2));
4496 InsInstrs.push_back(MIB1);
4497 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4498 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4499 Opc = AArch64::FMLAv2i32_indexed;
4500 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4501 FMAInstKind::Indexed, &NewVR);
4503 Opc = AArch64::FMLAv2f32;
4504 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4505 FMAInstKind::Accumulator, &NewVR);
4509 case MachineCombinerPattern::FMLSv4f32_OP1:
4510 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4511 RC = &AArch64::FPR128RegClass;
4512 unsigned NewVR = MRI.createVirtualRegister(RC);
4513 MachineInstrBuilder MIB1 =
4514 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4515 .add(Root.getOperand(2));
4516 InsInstrs.push_back(MIB1);
4517 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4518 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4519 Opc = AArch64::FMLAv4i32_indexed;
4520 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4521 FMAInstKind::Indexed, &NewVR);
4523 Opc = AArch64::FMLAv4f32;
4524 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4525 FMAInstKind::Accumulator, &NewVR);
4529 case MachineCombinerPattern::FMLSv2f64_OP1:
4530 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4531 RC = &AArch64::FPR128RegClass;
4532 unsigned NewVR = MRI.createVirtualRegister(RC);
4533 MachineInstrBuilder MIB1 =
4534 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4535 .add(Root.getOperand(2));
4536 InsInstrs.push_back(MIB1);
4537 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4538 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4539 Opc = AArch64::FMLAv2i64_indexed;
4540 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4541 FMAInstKind::Indexed, &NewVR);
4543 Opc = AArch64::FMLAv2f64;
4544 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4545 FMAInstKind::Accumulator, &NewVR);
4549 } // end switch (Pattern)
4550 // Record MUL and ADD/SUB for deletion
4551 DelInstrs.push_back(MUL);
4552 DelInstrs.push_back(&Root);
4555 /// Replace csincr-branch sequence by simple conditional branch
4559 /// csinc w9, wzr, wzr, <condition code>
4560 /// tbnz w9, #0, 0x44
4564 /// b.<inverted condition code>
4568 /// csinc w9, wzr, wzr, <condition code>
4569 /// tbz w9, #0, 0x44
4573 /// b.<condition code>
4576 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4577 /// compare's constant operand is power of 2.
4581 /// and w8, w8, #0x400
4586 /// tbnz w8, #10, L1
4589 /// \param MI Conditional Branch
4590 /// \return True when the simple conditional branch is generated
4592 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4593 bool IsNegativeBranch = false;
4594 bool IsTestAndBranch = false;
4595 unsigned TargetBBInMI = 0;
4596 switch (MI.getOpcode()) {
4598 llvm_unreachable("Unknown branch instruction?");
4605 case AArch64::CBNZW:
4606 case AArch64::CBNZX:
4608 IsNegativeBranch = true;
4613 IsTestAndBranch = true;
4615 case AArch64::TBNZW:
4616 case AArch64::TBNZX:
4618 IsNegativeBranch = true;
4619 IsTestAndBranch = true;
4622 // So we increment a zero register and test for bits other
4623 // than bit 0? Conservatively bail out in case the verifier
4624 // missed this case.
4625 if (IsTestAndBranch && MI.getOperand(1).getImm())
4629 assert(MI.getParent() && "Incomplete machine instruciton\n");
4630 MachineBasicBlock *MBB = MI.getParent();
4631 MachineFunction *MF = MBB->getParent();
4632 MachineRegisterInfo *MRI = &MF->getRegInfo();
4633 unsigned VReg = MI.getOperand(0).getReg();
4634 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4637 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4639 // Look through COPY instructions to find definition.
4640 while (DefMI->isCopy()) {
4641 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4642 if (!MRI->hasOneNonDBGUse(CopyVReg))
4644 if (!MRI->hasOneDef(CopyVReg))
4646 DefMI = MRI->getVRegDef(CopyVReg);
4649 switch (DefMI->getOpcode()) {
4652 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4653 case AArch64::ANDWri:
4654 case AArch64::ANDXri: {
4655 if (IsTestAndBranch)
4657 if (DefMI->getParent() != MBB)
4659 if (!MRI->hasOneNonDBGUse(VReg))
4662 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4663 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4664 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4665 if (!isPowerOf2_64(Mask))
4668 MachineOperand &MO = DefMI->getOperand(1);
4669 unsigned NewReg = MO.getReg();
4670 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4673 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4675 MachineBasicBlock &RefToMBB = *MBB;
4676 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4677 DebugLoc DL = MI.getDebugLoc();
4678 unsigned Imm = Log2_64(Mask);
4679 unsigned Opc = (Imm < 32)
4680 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4681 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4682 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4686 // Register lives on to the CBZ now.
4687 MO.setIsKill(false);
4689 // For immediate smaller than 32, we need to use the 32-bit
4690 // variant (W) in all cases. Indeed the 64-bit variant does not
4691 // allow to encode them.
4692 // Therefore, if the input register is 64-bit, we need to take the
4694 if (!Is32Bit && Imm < 32)
4695 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4696 MI.eraseFromParent();
4700 case AArch64::CSINCWr:
4701 case AArch64::CSINCXr: {
4702 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4703 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4704 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4705 DefMI->getOperand(2).getReg() == AArch64::XZR))
4708 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4711 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4712 // Convert only when the condition code is not modified between
4713 // the CSINC and the branch. The CC may be used by other
4714 // instructions in between.
4715 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4717 MachineBasicBlock &RefToMBB = *MBB;
4718 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4719 DebugLoc DL = MI.getDebugLoc();
4720 if (IsNegativeBranch)
4721 CC = AArch64CC::getInvertedCondCode(CC);
4722 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4723 MI.eraseFromParent();
4729 std::pair<unsigned, unsigned>
4730 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4731 const unsigned Mask = AArch64II::MO_FRAGMENT;
4732 return std::make_pair(TF & Mask, TF & ~Mask);
4735 ArrayRef<std::pair<unsigned, const char *>>
4736 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4737 using namespace AArch64II;
4739 static const std::pair<unsigned, const char *> TargetFlags[] = {
4740 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4741 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4742 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4743 {MO_HI12, "aarch64-hi12"}};
4744 return makeArrayRef(TargetFlags);
4747 ArrayRef<std::pair<unsigned, const char *>>
4748 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4749 using namespace AArch64II;
4751 static const std::pair<unsigned, const char *> TargetFlags[] = {
4752 {MO_COFFSTUB, "aarch64-coffstub"},
4753 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4754 {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
4755 {MO_DLLIMPORT, "aarch64-dllimport"}};
4756 return makeArrayRef(TargetFlags);
4759 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4760 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4761 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4762 {{MOSuppressPair, "aarch64-suppress-pair"},
4763 {MOStridedAccess, "aarch64-strided-access"}};
4764 return makeArrayRef(TargetFlags);
4767 /// Constants defining how certain sequences should be outlined.
4768 /// This encompasses how an outlined function should be called, and what kind of
4769 /// frame should be emitted for that outlined function.
4771 /// \p MachineOutlinerDefault implies that the function should be called with
4772 /// a save and restore of LR to the stack.
4776 /// I1 Save LR OUTLINED_FUNCTION:
4777 /// I2 --> BL OUTLINED_FUNCTION I1
4778 /// I3 Restore LR I2
4782 /// * Call construction overhead: 3 (save + BL + restore)
4783 /// * Frame construction overhead: 1 (ret)
4784 /// * Requires stack fixups? Yes
4786 /// \p MachineOutlinerTailCall implies that the function is being created from
4787 /// a sequence of instructions ending in a return.
4791 /// I1 OUTLINED_FUNCTION:
4792 /// I2 --> B OUTLINED_FUNCTION I1
4796 /// * Call construction overhead: 1 (B)
4797 /// * Frame construction overhead: 0 (Return included in sequence)
4798 /// * Requires stack fixups? No
4800 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4801 /// a BL instruction, but doesn't require LR to be saved and restored. This
4802 /// happens when LR is known to be dead.
4806 /// I1 OUTLINED_FUNCTION:
4807 /// I2 --> BL OUTLINED_FUNCTION I1
4812 /// * Call construction overhead: 1 (BL)
4813 /// * Frame construction overhead: 1 (RET)
4814 /// * Requires stack fixups? No
4816 /// \p MachineOutlinerThunk implies that the function is being created from
4817 /// a sequence of instructions ending in a call. The outlined function is
4818 /// called with a BL instruction, and the outlined function tail-calls the
4819 /// original call destination.
4823 /// I1 OUTLINED_FUNCTION:
4824 /// I2 --> BL OUTLINED_FUNCTION I1
4827 /// * Call construction overhead: 1 (BL)
4828 /// * Frame construction overhead: 0
4829 /// * Requires stack fixups? No
4831 /// \p MachineOutlinerRegSave implies that the function should be called with a
4832 /// save and restore of LR to an available register. This allows us to avoid
4833 /// stack fixups. Note that this outlining variant is compatible with the
4838 /// I1 Save LR OUTLINED_FUNCTION:
4839 /// I2 --> BL OUTLINED_FUNCTION I1
4840 /// I3 Restore LR I2
4844 /// * Call construction overhead: 3 (save + BL + restore)
4845 /// * Frame construction overhead: 1 (ret)
4846 /// * Requires stack fixups? No
4847 enum MachineOutlinerClass {
4848 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4849 MachineOutlinerTailCall, /// Only emit a branch.
4850 MachineOutlinerNoLRSave, /// Emit a call and return.
4851 MachineOutlinerThunk, /// Emit a call and tail-call.
4852 MachineOutlinerRegSave /// Same as default, but save to a register.
4855 enum MachineOutlinerMBBFlags {
4856 LRUnavailableSomewhere = 0x2,
4858 UnsafeRegsDead = 0x8
4862 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4863 assert(C.LRUWasSet && "LRU wasn't set?");
4864 MachineFunction *MF = C.getMF();
4865 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4866 MF->getSubtarget().getRegisterInfo());
4868 // Check if there is an available register across the sequence that we can
4870 for (unsigned Reg : AArch64::GPR64RegClass) {
4871 if (!ARI->isReservedReg(*MF, Reg) &&
4872 Reg != AArch64::LR && // LR is not reserved, but don't use it.
4873 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4874 Reg != AArch64::X17 && // Ditto for X17.
4875 C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
4879 // No suitable register. Return 0.
4883 outliner::OutlinedFunction
4884 AArch64InstrInfo::getOutliningCandidateInfo(
4885 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4886 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4887 unsigned SequenceSize =
4888 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4889 [this](unsigned Sum, const MachineInstr &MI) {
4890 return Sum + getInstSizeInBytes(MI);
4893 // Properties about candidate MBBs that hold for all of them.
4894 unsigned FlagsSetInAll = 0xF;
4896 // Compute liveness information for each candidate, and set FlagsSetInAll.
4897 const TargetRegisterInfo &TRI = getRegisterInfo();
4898 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4899 [&FlagsSetInAll](outliner::Candidate &C) {
4900 FlagsSetInAll &= C.Flags;
4903 // According to the AArch64 Procedure Call Standard, the following are
4904 // undefined on entry/exit from a function call:
4906 // * Registers x16, x17, (and thus w16, w17)
4907 // * Condition codes (and thus the NZCV register)
4909 // Because if this, we can't outline any sequence of instructions where
4911 // of these registers is live into/across it. Thus, we need to delete
4914 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4915 // If the unsafe registers in this block are all dead, then we don't need
4916 // to compute liveness here.
4917 if (C.Flags & UnsafeRegsDead)
4920 LiveRegUnits LRU = C.LRU;
4921 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4922 !LRU.available(AArch64::NZCV));
4925 // Are there any candidates where those registers are live?
4926 if (!(FlagsSetInAll & UnsafeRegsDead)) {
4927 // Erase every candidate that violates the restrictions above. (It could be
4928 // true that we have viable candidates, so it's not worth bailing out in
4929 // the case that, say, 1 out of 20 candidates violate the restructions.)
4930 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4931 RepeatedSequenceLocs.end(),
4932 CantGuaranteeValueAcrossCall),
4933 RepeatedSequenceLocs.end());
4935 // If the sequence doesn't have enough candidates left, then we're done.
4936 if (RepeatedSequenceLocs.size() < 2)
4937 return outliner::OutlinedFunction();
4940 // At this point, we have only "safe" candidates to outline. Figure out
4941 // frame + call instruction information.
4943 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4945 // Helper lambda which sets call information for every candidate.
4946 auto SetCandidateCallInfo =
4947 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4948 for (outliner::Candidate &C : RepeatedSequenceLocs)
4949 C.setCallInfo(CallID, NumBytesForCall);
4952 unsigned FrameID = MachineOutlinerDefault;
4953 unsigned NumBytesToCreateFrame = 4;
4955 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4956 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4959 // Returns true if an instructions is safe to fix up, false otherwise.
4960 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
4964 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
4965 !MI.readsRegister(AArch64::SP, &TRI))
4968 // Any modification of SP will break our code to save/restore LR.
4969 // FIXME: We could handle some instructions which add a constant
4970 // offset to SP, with a bit more work.
4971 if (MI.modifiesRegister(AArch64::SP, &TRI))
4974 // At this point, we have a stack instruction that we might need to
4975 // fix up. We'll handle it if it's a load or store.
4976 if (MI.mayLoadOrStore()) {
4977 MachineOperand *Base; // Filled with the base operand of MI.
4978 int64_t Offset; // Filled with the offset of MI.
4980 // Does it allow us to offset the base operand and is the base the
4982 if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
4983 Base->getReg() != AArch64::SP)
4986 // Find the minimum/maximum offset for this instruction and check
4987 // if fixing it up would be in range.
4989 MaxOffset; // Unscaled offsets for the instruction.
4990 unsigned Scale; // The scale to multiply the offsets by.
4991 unsigned DummyWidth;
4992 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4994 Offset += 16; // Update the offset to what it would be if we outlined.
4995 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
4998 // It's in range, so we can outline it.
5002 // FIXME: Add handling for instructions like "add x0, sp, #8".
5004 // We can't fix it up, so don't outline it.
5008 // True if it's possible to fix up each stack instruction in this sequence.
5009 // Important for frames/call variants that modify the stack.
5010 bool AllStackInstrsSafe = std::all_of(
5011 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5013 // If the last instruction in any candidate is a terminator, then we should
5014 // tail call all of the candidates.
5015 if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5016 FrameID = MachineOutlinerTailCall;
5017 NumBytesToCreateFrame = 0;
5018 SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5021 else if (LastInstrOpcode == AArch64::BL ||
5022 (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5023 // FIXME: Do we need to check if the code after this uses the value of LR?
5024 FrameID = MachineOutlinerThunk;
5025 NumBytesToCreateFrame = 0;
5026 SetCandidateCallInfo(MachineOutlinerThunk, 4);
5030 // We need to decide how to emit calls + frames. We can always emit the same
5031 // frame if we don't need to save to the stack. If we have to save to the
5032 // stack, then we need a different frame.
5033 unsigned NumBytesNoStackCalls = 0;
5034 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5036 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5039 // Is LR available? If so, we don't need a save.
5040 if (C.LRU.available(AArch64::LR)) {
5041 NumBytesNoStackCalls += 4;
5042 C.setCallInfo(MachineOutlinerNoLRSave, 4);
5043 CandidatesWithoutStackFixups.push_back(C);
5046 // Is an unused register available? If so, we won't modify the stack, so
5047 // we can outline with the same frame type as those that don't save LR.
5048 else if (findRegisterToSaveLRTo(C)) {
5049 NumBytesNoStackCalls += 12;
5050 C.setCallInfo(MachineOutlinerRegSave, 12);
5051 CandidatesWithoutStackFixups.push_back(C);
5054 // Is SP used in the sequence at all? If not, we don't have to modify
5055 // the stack, so we are guaranteed to get the same frame.
5056 else if (C.UsedInSequence.available(AArch64::SP)) {
5057 NumBytesNoStackCalls += 12;
5058 C.setCallInfo(MachineOutlinerDefault, 12);
5059 CandidatesWithoutStackFixups.push_back(C);
5062 // If we outline this, we need to modify the stack. Pretend we don't
5063 // outline this by saving all of its bytes.
5065 NumBytesNoStackCalls += SequenceSize;
5069 // If there are no places where we have to save LR, then note that we
5070 // don't have to update the stack. Otherwise, give every candidate the
5071 // default call type, as long as it's safe to do so.
5072 if (!AllStackInstrsSafe ||
5073 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5074 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5075 FrameID = MachineOutlinerNoLRSave;
5077 SetCandidateCallInfo(MachineOutlinerDefault, 12);
5080 // If we dropped all of the candidates, bail out here.
5081 if (RepeatedSequenceLocs.size() < 2) {
5082 RepeatedSequenceLocs.clear();
5083 return outliner::OutlinedFunction();
5087 // Does every candidate's MBB contain a call? If so, then we might have a call
5089 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5090 // Check if the range contains a call. These require a save + restore of the
5092 bool ModStackToSaveLR = false;
5093 if (std::any_of(FirstCand.front(), FirstCand.back(),
5094 [](const MachineInstr &MI) { return MI.isCall(); }))
5095 ModStackToSaveLR = true;
5097 // Handle the last instruction separately. If this is a tail call, then the
5098 // last instruction is a call. We don't want to save + restore in this case.
5099 // However, it could be possible that the last instruction is a call without
5100 // it being valid to tail call this sequence. We should consider this as
5102 else if (FrameID != MachineOutlinerThunk &&
5103 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5104 ModStackToSaveLR = true;
5106 if (ModStackToSaveLR) {
5107 // We can't fix up the stack. Bail out.
5108 if (!AllStackInstrsSafe) {
5109 RepeatedSequenceLocs.clear();
5110 return outliner::OutlinedFunction();
5113 // Save + restore LR.
5114 NumBytesToCreateFrame += 8;
5118 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5119 NumBytesToCreateFrame, FrameID);
5122 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5123 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5124 const Function &F = MF.getFunction();
5126 // Can F be deduplicated by the linker? If it can, don't outline from it.
5127 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5130 // Don't outline from functions with section markings; the program could
5131 // expect that all the code is in the named section.
5132 // FIXME: Allow outlining from multiple functions with the same section
5137 // Outlining from functions with redzones is unsafe since the outliner may
5138 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5140 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5141 if (!AFI || AFI->hasRedZone().getValueOr(true))
5144 // It's safe to outline from MF.
5148 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5149 unsigned &Flags) const {
5150 // Check if LR is available through all of the MBB. If it's not, then set
5152 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5153 "Suitable Machine Function for outlining must track liveness");
5154 LiveRegUnits LRU(getRegisterInfo());
5156 std::for_each(MBB.rbegin(), MBB.rend(),
5157 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5159 // Check if each of the unsafe registers are available...
5160 bool W16AvailableInBlock = LRU.available(AArch64::W16);
5161 bool W17AvailableInBlock = LRU.available(AArch64::W17);
5162 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5164 // If all of these are dead (and not live out), we know we don't have to check
5166 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
5167 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
5169 // Now, add the live outs to the set.
5170 LRU.addLiveOuts(MBB);
5172 // If any of these registers is available in the MBB, but also a live out of
5173 // the block, then we know outlining is unsafe.
5174 if (W16AvailableInBlock && !LRU.available(AArch64::W16))
5176 if (W17AvailableInBlock && !LRU.available(AArch64::W17))
5178 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
5181 // Check if there's a call inside this MachineBasicBlock. If there is, then
5183 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5184 Flags |= MachineOutlinerMBBFlags::HasCalls;
5186 MachineFunction *MF = MBB.getParent();
5188 // In the event that we outline, we may have to save LR. If there is an
5189 // available register in the MBB, then we'll always save LR there. Check if
5191 bool CanSaveLR = false;
5192 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5193 MF->getSubtarget().getRegisterInfo());
5195 // Check if there is an available register across the sequence that we can
5197 for (unsigned Reg : AArch64::GPR64RegClass) {
5198 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
5199 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
5205 // Check if we have a register we can save LR to, and if LR was used
5206 // somewhere. If both of those things are true, then we need to evaluate the
5207 // safety of outlining stack instructions later.
5208 if (!CanSaveLR && !LRU.available(AArch64::LR))
5209 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5215 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5216 unsigned Flags) const {
5217 MachineInstr &MI = *MIT;
5218 MachineBasicBlock *MBB = MI.getParent();
5219 MachineFunction *MF = MBB->getParent();
5220 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5222 // Don't outline LOHs.
5223 if (FuncInfo->getLOHRelated().count(&MI))
5224 return outliner::InstrType::Illegal;
5226 // Don't allow debug values to impact outlining type.
5227 if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5228 return outliner::InstrType::Invisible;
5230 // At this point, KILL instructions don't really tell us much so we can go
5231 // ahead and skip over them.
5233 return outliner::InstrType::Invisible;
5235 // Is this a terminator for a basic block?
5236 if (MI.isTerminator()) {
5238 // Is this the end of a function?
5239 if (MI.getParent()->succ_empty())
5240 return outliner::InstrType::Legal;
5242 // It's not, so don't outline it.
5243 return outliner::InstrType::Illegal;
5246 // Make sure none of the operands are un-outlinable.
5247 for (const MachineOperand &MOP : MI.operands()) {
5248 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5249 MOP.isTargetIndex())
5250 return outliner::InstrType::Illegal;
5252 // If it uses LR or W30 explicitly, then don't touch it.
5253 if (MOP.isReg() && !MOP.isImplicit() &&
5254 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5255 return outliner::InstrType::Illegal;
5258 // Special cases for instructions that can always be outlined, but will fail
5259 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5260 // be outlined because they don't require a *specific* value to be in LR.
5261 if (MI.getOpcode() == AArch64::ADRP)
5262 return outliner::InstrType::Legal;
5264 // If MI is a call we might be able to outline it. We don't want to outline
5265 // any calls that rely on the position of items on the stack. When we outline
5266 // something containing a call, we have to emit a save and restore of LR in
5267 // the outlined function. Currently, this always happens by saving LR to the
5268 // stack. Thus, if we outline, say, half the parameters for a function call
5269 // plus the call, then we'll break the callee's expectations for the layout
5272 // FIXME: Allow calls to functions which construct a stack frame, as long
5273 // as they don't access arguments on the stack.
5274 // FIXME: Figure out some way to analyze functions defined in other modules.
5275 // We should be able to compute the memory usage based on the IR calling
5276 // convention, even if we can't see the definition.
5278 // Get the function associated with the call. Look at each operand and find
5279 // the one that represents the callee and get its name.
5280 const Function *Callee = nullptr;
5281 for (const MachineOperand &MOP : MI.operands()) {
5282 if (MOP.isGlobal()) {
5283 Callee = dyn_cast<Function>(MOP.getGlobal());
5288 // Never outline calls to mcount. There isn't any rule that would require
5289 // this, but the Linux kernel's "ftrace" feature depends on it.
5290 if (Callee && Callee->getName() == "\01_mcount")
5291 return outliner::InstrType::Illegal;
5293 // If we don't know anything about the callee, assume it depends on the
5294 // stack layout of the caller. In that case, it's only legal to outline
5295 // as a tail-call. Whitelist the call instructions we know about so we
5296 // don't get unexpected results with call pseudo-instructions.
5297 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5298 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5299 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5302 return UnknownCallOutlineType;
5304 // We have a function we have information about. Check it if it's something
5305 // can safely outline.
5306 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5308 // We don't know what's going on with the callee at all. Don't touch it.
5310 return UnknownCallOutlineType;
5312 // Check if we know anything about the callee saves on the function. If we
5313 // don't, then don't touch it, since that implies that we haven't
5314 // computed anything about its stack frame yet.
5315 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5316 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5317 MFI.getNumObjects() > 0)
5318 return UnknownCallOutlineType;
5320 // At this point, we can say that CalleeMF ought to not pass anything on the
5321 // stack. Therefore, we can outline it.
5322 return outliner::InstrType::Legal;
5325 // Don't outline positions.
5326 if (MI.isPosition())
5327 return outliner::InstrType::Illegal;
5329 // Don't touch the link register or W30.
5330 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5331 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5332 return outliner::InstrType::Illegal;
5334 return outliner::InstrType::Legal;
5337 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5338 for (MachineInstr &MI : MBB) {
5339 MachineOperand *Base;
5343 // Is this a load or store with an immediate offset with SP as the base?
5344 if (!MI.mayLoadOrStore() ||
5345 !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
5346 (Base->isReg() && Base->getReg() != AArch64::SP))
5349 // It is, so we have to fix it up.
5351 int64_t Dummy1, Dummy2;
5353 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5354 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5355 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5356 assert(Scale != 0 && "Unexpected opcode!");
5358 // We've pushed the return address to the stack, so add 16 to the offset.
5359 // This is safe, since we already checked if it would overflow when we
5360 // checked if this instruction was legal to outline.
5361 int64_t NewImm = (Offset + 16) / Scale;
5362 StackOffsetOperand.setImm(NewImm);
5366 void AArch64InstrInfo::buildOutlinedFrame(
5367 MachineBasicBlock &MBB, MachineFunction &MF,
5368 const outliner::OutlinedFunction &OF) const {
5369 // For thunk outlining, rewrite the last instruction from a call to a
5371 if (OF.FrameConstructionID == MachineOutlinerThunk) {
5372 MachineInstr *Call = &*--MBB.instr_end();
5373 unsigned TailOpcode;
5374 if (Call->getOpcode() == AArch64::BL) {
5375 TailOpcode = AArch64::TCRETURNdi;
5377 assert(Call->getOpcode() == AArch64::BLR);
5378 TailOpcode = AArch64::TCRETURNriALL;
5380 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5381 .add(Call->getOperand(0))
5383 MBB.insert(MBB.end(), TC);
5384 Call->eraseFromParent();
5387 // Is there a call in the outlined range?
5388 auto IsNonTailCall = [](MachineInstr &MI) {
5389 return MI.isCall() && !MI.isReturn();
5391 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5392 // Fix up the instructions in the range, since we're going to modify the
5394 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
5395 "Can only fix up stack references once");
5396 fixupPostOutline(MBB);
5398 // LR has to be a live in so that we can save it.
5399 MBB.addLiveIn(AArch64::LR);
5401 MachineBasicBlock::iterator It = MBB.begin();
5402 MachineBasicBlock::iterator Et = MBB.end();
5404 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5405 OF.FrameConstructionID == MachineOutlinerThunk)
5406 Et = std::prev(MBB.end());
5408 // Insert a save before the outlined region
5409 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5410 .addReg(AArch64::SP, RegState::Define)
5411 .addReg(AArch64::LR)
5412 .addReg(AArch64::SP)
5414 It = MBB.insert(It, STRXpre);
5416 const TargetSubtargetInfo &STI = MF.getSubtarget();
5417 const MCRegisterInfo *MRI = STI.getRegisterInfo();
5418 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5420 // Add a CFI saying the stack was moved 16 B down.
5421 int64_t StackPosEntry =
5422 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5423 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5424 .addCFIIndex(StackPosEntry)
5425 .setMIFlags(MachineInstr::FrameSetup);
5427 // Add a CFI saying that the LR that we want to find is now 16 B higher than
5429 int64_t LRPosEntry =
5430 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5431 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5432 .addCFIIndex(LRPosEntry)
5433 .setMIFlags(MachineInstr::FrameSetup);
5435 // Insert a restore before the terminator for the function.
5436 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5437 .addReg(AArch64::SP, RegState::Define)
5438 .addReg(AArch64::LR, RegState::Define)
5439 .addReg(AArch64::SP)
5441 Et = MBB.insert(Et, LDRXpost);
5444 // If this is a tail call outlined function, then there's already a return.
5445 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5446 OF.FrameConstructionID == MachineOutlinerThunk)
5449 // It's not a tail call, so we have to insert the return ourselves.
5450 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5451 .addReg(AArch64::LR, RegState::Undef);
5452 MBB.insert(MBB.end(), ret);
5454 // Did we have to modify the stack by saving the link register?
5455 if (OF.FrameConstructionID != MachineOutlinerDefault)
5458 // We modified the stack.
5459 // Walk over the basic block and fix up all the stack accesses.
5460 fixupPostOutline(MBB);
5463 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5464 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5465 MachineFunction &MF, const outliner::Candidate &C) const {
5467 // Are we tail calling?
5468 if (C.CallConstructionID == MachineOutlinerTailCall) {
5469 // If yes, then we can just branch to the label.
5470 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5471 .addGlobalAddress(M.getNamedValue(MF.getName()))
5476 // Are we saving the link register?
5477 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
5478 C.CallConstructionID == MachineOutlinerThunk) {
5479 // No, so just insert the call.
5480 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5481 .addGlobalAddress(M.getNamedValue(MF.getName())));
5485 // We want to return the spot where we inserted the call.
5486 MachineBasicBlock::iterator CallPt;
5488 // Instructions for saving and restoring LR around the call instruction we're
5491 MachineInstr *Restore;
5492 // Can we save to a register?
5493 if (C.CallConstructionID == MachineOutlinerRegSave) {
5494 // FIXME: This logic should be sunk into a target-specific interface so that
5495 // we don't have to recompute the register.
5496 unsigned Reg = findRegisterToSaveLRTo(C);
5497 assert(Reg != 0 && "No callee-saved register available?");
5499 // Save and restore LR from that register.
5500 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5501 .addReg(AArch64::XZR)
5502 .addReg(AArch64::LR)
5504 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5505 .addReg(AArch64::XZR)
5509 // We have the default case. Save and restore from SP.
5510 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5511 .addReg(AArch64::SP, RegState::Define)
5512 .addReg(AArch64::LR)
5513 .addReg(AArch64::SP)
5515 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5516 .addReg(AArch64::SP, RegState::Define)
5517 .addReg(AArch64::LR, RegState::Define)
5518 .addReg(AArch64::SP)
5522 It = MBB.insert(It, Save);
5526 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5527 .addGlobalAddress(M.getNamedValue(MF.getName())));
5531 It = MBB.insert(It, Restore);
5535 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5536 MachineFunction &MF) const {
5537 return MF.getFunction().optForMinSize();
5540 #define GET_INSTRINFO_HELPERS
5541 #include "AArch64GenInstrInfo.inc"