1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/TargetRegistry.h"
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
32 static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
33 MachineMemOperand::MOTargetFlag1;
35 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
36 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
37 RI(STI.getTargetTriple()), Subtarget(STI) {}
39 /// GetInstSize - Return the number of bytes of code the specified
40 /// instruction may be. This returns the maximum number of bytes.
41 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
42 const MachineBasicBlock &MBB = *MI.getParent();
43 const MachineFunction *MF = MBB.getParent();
44 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
46 if (MI.getOpcode() == AArch64::INLINEASM)
47 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
49 const MCInstrDesc &Desc = MI.getDesc();
50 switch (Desc.getOpcode()) {
52 // Anything not explicitly designated otherwise is a nomal 4-byte insn.
54 case TargetOpcode::DBG_VALUE:
55 case TargetOpcode::EH_LABEL:
56 case TargetOpcode::IMPLICIT_DEF:
57 case TargetOpcode::KILL:
59 case AArch64::TLSDESC_CALLSEQ:
60 // This gets lowered to an instruction sequence which takes 16 bytes
64 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
67 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
68 SmallVectorImpl<MachineOperand> &Cond) {
69 // Block ends with fall-through condbranch.
70 switch (LastInst->getOpcode()) {
72 llvm_unreachable("Unknown branch instruction?");
74 Target = LastInst->getOperand(1).getMBB();
75 Cond.push_back(LastInst->getOperand(0));
81 Target = LastInst->getOperand(1).getMBB();
82 Cond.push_back(MachineOperand::CreateImm(-1));
83 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
84 Cond.push_back(LastInst->getOperand(0));
90 Target = LastInst->getOperand(2).getMBB();
91 Cond.push_back(MachineOperand::CreateImm(-1));
92 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
93 Cond.push_back(LastInst->getOperand(0));
94 Cond.push_back(LastInst->getOperand(1));
99 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
100 MachineBasicBlock *&TBB,
101 MachineBasicBlock *&FBB,
102 SmallVectorImpl<MachineOperand> &Cond,
103 bool AllowModify) const {
104 // If the block has no terminators, it just falls into the block after it.
105 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
109 if (!isUnpredicatedTerminator(*I))
112 // Get the last instruction in the block.
113 MachineInstr *LastInst = &*I;
115 // If there is only one terminator instruction, process it.
116 unsigned LastOpc = LastInst->getOpcode();
117 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
118 if (isUncondBranchOpcode(LastOpc)) {
119 TBB = LastInst->getOperand(0).getMBB();
122 if (isCondBranchOpcode(LastOpc)) {
123 // Block ends with fall-through condbranch.
124 parseCondBranch(LastInst, TBB, Cond);
127 return true; // Can't handle indirect branch.
130 // Get the instruction before it if it is a terminator.
131 MachineInstr *SecondLastInst = &*I;
132 unsigned SecondLastOpc = SecondLastInst->getOpcode();
134 // If AllowModify is true and the block ends with two or more unconditional
135 // branches, delete all but the first unconditional branch.
136 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
137 while (isUncondBranchOpcode(SecondLastOpc)) {
138 LastInst->eraseFromParent();
139 LastInst = SecondLastInst;
140 LastOpc = LastInst->getOpcode();
141 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
142 // Return now the only terminator is an unconditional branch.
143 TBB = LastInst->getOperand(0).getMBB();
146 SecondLastInst = &*I;
147 SecondLastOpc = SecondLastInst->getOpcode();
152 // If there are three terminators, we don't know what sort of block this is.
153 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
156 // If the block ends with a B and a Bcc, handle it.
157 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
158 parseCondBranch(SecondLastInst, TBB, Cond);
159 FBB = LastInst->getOperand(0).getMBB();
163 // If the block ends with two unconditional branches, handle it. The second
164 // one is not executed, so remove it.
165 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
166 TBB = SecondLastInst->getOperand(0).getMBB();
169 I->eraseFromParent();
173 // ...likewise if it ends with an indirect branch followed by an unconditional
175 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
178 I->eraseFromParent();
182 // Otherwise, can't handle this.
186 bool AArch64InstrInfo::ReverseBranchCondition(
187 SmallVectorImpl<MachineOperand> &Cond) const {
188 if (Cond[0].getImm() != -1) {
190 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
191 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
193 // Folded compare-and-branch
194 switch (Cond[1].getImm()) {
196 llvm_unreachable("Unknown conditional branch!");
198 Cond[1].setImm(AArch64::CBNZW);
201 Cond[1].setImm(AArch64::CBZW);
204 Cond[1].setImm(AArch64::CBNZX);
207 Cond[1].setImm(AArch64::CBZX);
210 Cond[1].setImm(AArch64::TBNZW);
213 Cond[1].setImm(AArch64::TBZW);
216 Cond[1].setImm(AArch64::TBNZX);
219 Cond[1].setImm(AArch64::TBZX);
227 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
228 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
232 if (!isUncondBranchOpcode(I->getOpcode()) &&
233 !isCondBranchOpcode(I->getOpcode()))
236 // Remove the branch.
237 I->eraseFromParent();
241 if (I == MBB.begin())
244 if (!isCondBranchOpcode(I->getOpcode()))
247 // Remove the branch.
248 I->eraseFromParent();
252 void AArch64InstrInfo::instantiateCondBranch(
253 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
254 ArrayRef<MachineOperand> Cond) const {
255 if (Cond[0].getImm() != -1) {
257 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
259 // Folded compare-and-branch
260 // Note that we use addOperand instead of addReg to keep the flags.
261 const MachineInstrBuilder MIB =
262 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
264 MIB.addImm(Cond[3].getImm());
269 unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
270 MachineBasicBlock *TBB,
271 MachineBasicBlock *FBB,
272 ArrayRef<MachineOperand> Cond,
273 const DebugLoc &DL) const {
274 // Shouldn't be a fall through.
275 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
278 if (Cond.empty()) // Unconditional branch?
279 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
281 instantiateCondBranch(MBB, DL, TBB, Cond);
285 // Two-way conditional branch.
286 instantiateCondBranch(MBB, DL, TBB, Cond);
287 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
291 // Find the original register that VReg is copied from.
292 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
293 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
294 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
295 if (!DefMI->isFullCopy())
297 VReg = DefMI->getOperand(1).getReg();
302 // Determine if VReg is defined by an instruction that can be folded into a
303 // csel instruction. If so, return the folded opcode, and the replacement
305 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
306 unsigned *NewVReg = nullptr) {
307 VReg = removeCopies(MRI, VReg);
308 if (!TargetRegisterInfo::isVirtualRegister(VReg))
311 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
312 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
314 unsigned SrcOpNum = 0;
315 switch (DefMI->getOpcode()) {
316 case AArch64::ADDSXri:
317 case AArch64::ADDSWri:
318 // if NZCV is used, do not fold.
319 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
321 // fall-through to ADDXri and ADDWri.
322 case AArch64::ADDXri:
323 case AArch64::ADDWri:
324 // add x, 1 -> csinc.
325 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
326 DefMI->getOperand(3).getImm() != 0)
329 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
332 case AArch64::ORNXrr:
333 case AArch64::ORNWrr: {
334 // not x -> csinv, represented as orn dst, xzr, src.
335 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
336 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
339 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
343 case AArch64::SUBSXrr:
344 case AArch64::SUBSWrr:
345 // if NZCV is used, do not fold.
346 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
348 // fall-through to SUBXrr and SUBWrr.
349 case AArch64::SUBXrr:
350 case AArch64::SUBWrr: {
351 // neg x -> csneg, represented as sub dst, xzr, src.
352 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
353 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
356 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
362 assert(Opc && SrcOpNum && "Missing parameters");
365 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
369 bool AArch64InstrInfo::canInsertSelect(
370 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
371 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
372 int &FalseCycles) const {
373 // Check register classes.
374 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
375 const TargetRegisterClass *RC =
376 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
380 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
381 unsigned ExtraCondLat = Cond.size() != 1;
383 // GPRs are handled by csel.
384 // FIXME: Fold in x+1, -x, and ~x when applicable.
385 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
386 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
387 // Single-cycle csel, csinc, csinv, and csneg.
388 CondCycles = 1 + ExtraCondLat;
389 TrueCycles = FalseCycles = 1;
390 if (canFoldIntoCSel(MRI, TrueReg))
392 else if (canFoldIntoCSel(MRI, FalseReg))
397 // Scalar floating point is handled by fcsel.
398 // FIXME: Form fabs, fmin, and fmax when applicable.
399 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
400 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
401 CondCycles = 5 + ExtraCondLat;
402 TrueCycles = FalseCycles = 2;
410 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
411 MachineBasicBlock::iterator I,
412 const DebugLoc &DL, unsigned DstReg,
413 ArrayRef<MachineOperand> Cond,
414 unsigned TrueReg, unsigned FalseReg) const {
415 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
417 // Parse the condition code, see parseCondBranch() above.
418 AArch64CC::CondCode CC;
419 switch (Cond.size()) {
421 llvm_unreachable("Unknown condition opcode in Cond");
423 CC = AArch64CC::CondCode(Cond[0].getImm());
425 case 3: { // cbz/cbnz
426 // We must insert a compare against 0.
428 switch (Cond[1].getImm()) {
430 llvm_unreachable("Unknown branch opcode in Cond");
448 unsigned SrcReg = Cond[2].getReg();
450 // cmp reg, #0 is actually subs xzr, reg, #0.
451 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
452 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
457 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
458 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
465 case 4: { // tbz/tbnz
466 // We must insert a tst instruction.
467 switch (Cond[1].getImm()) {
469 llvm_unreachable("Unknown branch opcode in Cond");
479 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
480 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
481 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
482 .addReg(Cond[2].getReg())
484 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
486 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
487 .addReg(Cond[2].getReg())
489 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
495 const TargetRegisterClass *RC = nullptr;
496 bool TryFold = false;
497 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
498 RC = &AArch64::GPR64RegClass;
499 Opc = AArch64::CSELXr;
501 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
502 RC = &AArch64::GPR32RegClass;
503 Opc = AArch64::CSELWr;
505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
506 RC = &AArch64::FPR64RegClass;
507 Opc = AArch64::FCSELDrrr;
508 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
509 RC = &AArch64::FPR32RegClass;
510 Opc = AArch64::FCSELSrrr;
512 assert(RC && "Unsupported regclass");
514 // Try folding simple instructions into the csel.
516 unsigned NewVReg = 0;
517 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
519 // The folded opcodes csinc, csinc and csneg apply the operation to
520 // FalseReg, so we need to invert the condition.
521 CC = AArch64CC::getInvertedCondCode(CC);
524 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
526 // Fold the operation. Leave any dead instructions for DCE to clean up.
530 // The extends the live range of NewVReg.
531 MRI.clearKillFlags(NewVReg);
535 // Pull all virtual register into the appropriate class.
536 MRI.constrainRegClass(TrueReg, RC);
537 MRI.constrainRegClass(FalseReg, RC);
540 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
544 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
545 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
546 uint64_t Imm = MI.getOperand(1).getImm();
547 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
549 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
552 // FIXME: this implementation should be micro-architecture dependent, so a
553 // micro-architecture target hook should be introduced here in future.
554 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
555 if (!Subtarget.hasCustomCheapAsMoveHandling())
556 return MI.isAsCheapAsAMove();
560 switch (MI.getOpcode()) {
564 // add/sub on register without shift
565 case AArch64::ADDWri:
566 case AArch64::ADDXri:
567 case AArch64::SUBWri:
568 case AArch64::SUBXri:
569 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
570 MI.getOperand(3).getImm() == 0);
572 // add/sub on register with shift
573 case AArch64::ADDWrs:
574 case AArch64::ADDXrs:
575 case AArch64::SUBWrs:
576 case AArch64::SUBXrs:
577 Imm = MI.getOperand(3).getImm();
578 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
579 AArch64_AM::getArithShiftValue(Imm) < 4);
581 // logical ops on immediate
582 case AArch64::ANDWri:
583 case AArch64::ANDXri:
584 case AArch64::EORWri:
585 case AArch64::EORXri:
586 case AArch64::ORRWri:
587 case AArch64::ORRXri:
590 // logical ops on register without shift
591 case AArch64::ANDWrr:
592 case AArch64::ANDXrr:
593 case AArch64::BICWrr:
594 case AArch64::BICXrr:
595 case AArch64::EONWrr:
596 case AArch64::EONXrr:
597 case AArch64::EORWrr:
598 case AArch64::EORXrr:
599 case AArch64::ORNWrr:
600 case AArch64::ORNXrr:
601 case AArch64::ORRWrr:
602 case AArch64::ORRXrr:
605 // logical ops on register with shift
606 case AArch64::ANDWrs:
607 case AArch64::ANDXrs:
608 case AArch64::BICWrs:
609 case AArch64::BICXrs:
610 case AArch64::EONWrs:
611 case AArch64::EONXrs:
612 case AArch64::EORWrs:
613 case AArch64::EORXrs:
614 case AArch64::ORNWrs:
615 case AArch64::ORNXrs:
616 case AArch64::ORRWrs:
617 case AArch64::ORRXrs:
618 Imm = MI.getOperand(3).getImm();
619 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
620 AArch64_AM::getShiftValue(Imm) < 4 &&
621 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
623 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
624 // ORRXri, it is as cheap as MOV
625 case AArch64::MOVi32imm:
626 return canBeExpandedToORR(MI, 32);
627 case AArch64::MOVi64imm:
628 return canBeExpandedToORR(MI, 64);
630 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
632 case AArch64::FMOVS0:
633 case AArch64::FMOVD0:
634 return Subtarget.hasZeroCycleZeroing();
635 case TargetOpcode::COPY:
636 return (Subtarget.hasZeroCycleZeroing() &&
637 (MI.getOperand(1).getReg() == AArch64::WZR ||
638 MI.getOperand(1).getReg() == AArch64::XZR));
641 llvm_unreachable("Unknown opcode to check as cheap as a move!");
644 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
645 unsigned &SrcReg, unsigned &DstReg,
646 unsigned &SubIdx) const {
647 switch (MI.getOpcode()) {
650 case AArch64::SBFMXri: // aka sxtw
651 case AArch64::UBFMXri: // aka uxtw
652 // Check for the 32 -> 64 bit extension case, these instructions can do
654 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
656 // This is a signed or unsigned 32 -> 64 bit extension.
657 SrcReg = MI.getOperand(1).getReg();
658 DstReg = MI.getOperand(0).getReg();
659 SubIdx = AArch64::sub_32;
664 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
665 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
666 const TargetRegisterInfo *TRI = &getRegisterInfo();
667 unsigned BaseRegA = 0, BaseRegB = 0;
668 int64_t OffsetA = 0, OffsetB = 0;
669 unsigned WidthA = 0, WidthB = 0;
671 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
672 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
674 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
675 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
678 // Retrieve the base register, offset from the base register and width. Width
679 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
680 // base registers are identical, and the offset of a lower memory access +
681 // the width doesn't overlap the offset of a higher memory access,
682 // then the memory accesses are different.
683 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
684 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
685 if (BaseRegA == BaseRegB) {
686 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
687 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
688 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
689 if (LowOffset + LowWidth <= HighOffset)
696 /// analyzeCompare - For a comparison instruction, return the source registers
697 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
698 /// Return true if the comparison instruction can be analyzed.
699 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
700 unsigned &SrcReg2, int &CmpMask,
701 int &CmpValue) const {
702 switch (MI.getOpcode()) {
705 case AArch64::SUBSWrr:
706 case AArch64::SUBSWrs:
707 case AArch64::SUBSWrx:
708 case AArch64::SUBSXrr:
709 case AArch64::SUBSXrs:
710 case AArch64::SUBSXrx:
711 case AArch64::ADDSWrr:
712 case AArch64::ADDSWrs:
713 case AArch64::ADDSWrx:
714 case AArch64::ADDSXrr:
715 case AArch64::ADDSXrs:
716 case AArch64::ADDSXrx:
717 // Replace SUBSWrr with SUBWrr if NZCV is not used.
718 SrcReg = MI.getOperand(1).getReg();
719 SrcReg2 = MI.getOperand(2).getReg();
723 case AArch64::SUBSWri:
724 case AArch64::ADDSWri:
725 case AArch64::SUBSXri:
726 case AArch64::ADDSXri:
727 SrcReg = MI.getOperand(1).getReg();
730 // FIXME: In order to convert CmpValue to 0 or 1
731 CmpValue = MI.getOperand(2).getImm() != 0;
733 case AArch64::ANDSWri:
734 case AArch64::ANDSXri:
735 // ANDS does not use the same encoding scheme as the others xxxS
737 SrcReg = MI.getOperand(1).getReg();
740 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
741 // while the type of CmpValue is int. When converting uint64_t to int,
742 // the high 32 bits of uint64_t will be lost.
743 // In fact it causes a bug in spec2006-483.xalancbmk
744 // CmpValue is only used to compare with zero in OptimizeCompareInstr
745 CmpValue = AArch64_AM::decodeLogicalImmediate(
746 MI.getOperand(2).getImm(),
747 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
754 static bool UpdateOperandRegClass(MachineInstr &Instr) {
755 MachineBasicBlock *MBB = Instr.getParent();
756 assert(MBB && "Can't get MachineBasicBlock here");
757 MachineFunction *MF = MBB->getParent();
758 assert(MF && "Can't get MachineFunction here");
759 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
760 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
761 MachineRegisterInfo *MRI = &MF->getRegInfo();
763 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
765 MachineOperand &MO = Instr.getOperand(OpIdx);
766 const TargetRegisterClass *OpRegCstraints =
767 Instr.getRegClassConstraint(OpIdx, TII, TRI);
769 // If there's no constraint, there's nothing to do.
772 // If the operand is a frame index, there's nothing to do here.
773 // A frame index operand will resolve correctly during PEI.
778 "Operand has register constraints without being a register!");
780 unsigned Reg = MO.getReg();
781 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
782 if (!OpRegCstraints->contains(Reg))
784 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
785 !MRI->constrainRegClass(Reg, OpRegCstraints))
792 /// \brief Return the opcode that does not set flags when possible - otherwise
793 /// return the original opcode. The caller is responsible to do the actual
794 /// substitution and legality checking.
795 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
796 // Don't convert all compare instructions, because for some the zero register
797 // encoding becomes the sp register.
798 bool MIDefinesZeroReg = false;
799 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
800 MIDefinesZeroReg = true;
802 switch (MI.getOpcode()) {
804 return MI.getOpcode();
805 case AArch64::ADDSWrr:
806 return AArch64::ADDWrr;
807 case AArch64::ADDSWri:
808 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
809 case AArch64::ADDSWrs:
810 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
811 case AArch64::ADDSWrx:
812 return AArch64::ADDWrx;
813 case AArch64::ADDSXrr:
814 return AArch64::ADDXrr;
815 case AArch64::ADDSXri:
816 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
817 case AArch64::ADDSXrs:
818 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
819 case AArch64::ADDSXrx:
820 return AArch64::ADDXrx;
821 case AArch64::SUBSWrr:
822 return AArch64::SUBWrr;
823 case AArch64::SUBSWri:
824 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
825 case AArch64::SUBSWrs:
826 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
827 case AArch64::SUBSWrx:
828 return AArch64::SUBWrx;
829 case AArch64::SUBSXrr:
830 return AArch64::SUBXrr;
831 case AArch64::SUBSXri:
832 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
833 case AArch64::SUBSXrs:
834 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
835 case AArch64::SUBSXrx:
836 return AArch64::SUBXrx;
846 /// True when condition flags are accessed (either by writing or reading)
847 /// on the instruction trace starting at From and ending at To.
849 /// Note: If From and To are from different blocks it's assumed CC are accessed
851 static bool areCFlagsAccessedBetweenInstrs(
852 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
853 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
854 // Early exit if To is at the beginning of the BB.
855 if (To == To->getParent()->begin())
858 // Check whether the instructions are in the same basic block
859 // If not, assume the condition flags might get modified somewhere.
860 if (To->getParent() != From->getParent())
863 // From must be above To.
864 assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
865 To->getParent()->rend(), [From](MachineInstr &MI) {
866 return MachineBasicBlock::iterator(MI) == From;
867 }) != To->getParent()->rend());
869 // We iterate backward starting \p To until we hit \p From.
870 for (--To; To != From; --To) {
871 const MachineInstr &Instr = *To;
873 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
874 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
880 /// Try to optimize a compare instruction. A compare instruction is an
881 /// instruction which produces AArch64::NZCV. It can be truly compare instruction
882 /// when there are no uses of its destination register.
884 /// The following steps are tried in order:
885 /// 1. Convert CmpInstr into an unconditional version.
886 /// 2. Remove CmpInstr if above there is an instruction producing a needed
887 /// condition code or an instruction which can be converted into such an instruction.
888 /// Only comparison with zero is supported.
889 bool AArch64InstrInfo::optimizeCompareInstr(
890 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
891 int CmpValue, const MachineRegisterInfo *MRI) const {
892 assert(CmpInstr.getParent());
895 // Replace SUBSWrr with SUBWrr if NZCV is not used.
896 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
897 if (DeadNZCVIdx != -1) {
898 if (CmpInstr.definesRegister(AArch64::WZR) ||
899 CmpInstr.definesRegister(AArch64::XZR)) {
900 CmpInstr.eraseFromParent();
903 unsigned Opc = CmpInstr.getOpcode();
904 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
907 const MCInstrDesc &MCID = get(NewOpc);
908 CmpInstr.setDesc(MCID);
909 CmpInstr.RemoveOperand(DeadNZCVIdx);
910 bool succeeded = UpdateOperandRegClass(CmpInstr);
912 assert(succeeded && "Some operands reg class are incompatible!");
916 // Continue only if we have a "ri" where immediate is zero.
917 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
919 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
920 if (CmpValue != 0 || SrcReg2 != 0)
923 // CmpInstr is a Compare instruction if destination register is not used.
924 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
927 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
930 /// Get opcode of S version of Instr.
931 /// If Instr is S version its opcode is returned.
932 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
933 /// or we are not interested in it.
934 static unsigned sForm(MachineInstr &Instr) {
935 switch (Instr.getOpcode()) {
937 return AArch64::INSTRUCTION_LIST_END;
939 case AArch64::ADDSWrr:
940 case AArch64::ADDSWri:
941 case AArch64::ADDSXrr:
942 case AArch64::ADDSXri:
943 case AArch64::SUBSWrr:
944 case AArch64::SUBSWri:
945 case AArch64::SUBSXrr:
946 case AArch64::SUBSXri:
947 return Instr.getOpcode();;
949 case AArch64::ADDWrr: return AArch64::ADDSWrr;
950 case AArch64::ADDWri: return AArch64::ADDSWri;
951 case AArch64::ADDXrr: return AArch64::ADDSXrr;
952 case AArch64::ADDXri: return AArch64::ADDSXri;
953 case AArch64::ADCWr: return AArch64::ADCSWr;
954 case AArch64::ADCXr: return AArch64::ADCSXr;
955 case AArch64::SUBWrr: return AArch64::SUBSWrr;
956 case AArch64::SUBWri: return AArch64::SUBSWri;
957 case AArch64::SUBXrr: return AArch64::SUBSXrr;
958 case AArch64::SUBXri: return AArch64::SUBSXri;
959 case AArch64::SBCWr: return AArch64::SBCSWr;
960 case AArch64::SBCXr: return AArch64::SBCSXr;
961 case AArch64::ANDWri: return AArch64::ANDSWri;
962 case AArch64::ANDXri: return AArch64::ANDSXri;
966 /// Check if AArch64::NZCV should be alive in successors of MBB.
967 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
968 for (auto *BB : MBB->successors())
969 if (BB->isLiveIn(AArch64::NZCV))
979 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
980 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
981 this->N |= UsedFlags.N;
982 this->Z |= UsedFlags.Z;
983 this->C |= UsedFlags.C;
984 this->V |= UsedFlags.V;
989 /// Find a condition code used by the instruction.
990 /// Returns AArch64CC::Invalid if either the instruction does not use condition
991 /// codes or we don't optimize CmpInstr in the presence of such instructions.
992 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
993 switch (Instr.getOpcode()) {
995 return AArch64CC::Invalid;
998 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1000 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1003 case AArch64::CSINVWr:
1004 case AArch64::CSINVXr:
1005 case AArch64::CSINCWr:
1006 case AArch64::CSINCXr:
1007 case AArch64::CSELWr:
1008 case AArch64::CSELXr:
1009 case AArch64::CSNEGWr:
1010 case AArch64::CSNEGXr:
1011 case AArch64::FCSELSrrr:
1012 case AArch64::FCSELDrrr: {
1013 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1015 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1020 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1021 assert(CC != AArch64CC::Invalid);
1027 case AArch64CC::EQ: // Z set
1028 case AArch64CC::NE: // Z clear
1032 case AArch64CC::HI: // Z clear and C set
1033 case AArch64CC::LS: // Z set or C clear
1035 case AArch64CC::HS: // C set
1036 case AArch64CC::LO: // C clear
1040 case AArch64CC::MI: // N set
1041 case AArch64CC::PL: // N clear
1045 case AArch64CC::VS: // V set
1046 case AArch64CC::VC: // V clear
1050 case AArch64CC::GT: // Z clear, N and V the same
1051 case AArch64CC::LE: // Z set, N and V differ
1053 case AArch64CC::GE: // N and V the same
1054 case AArch64CC::LT: // N and V differ
1062 static bool isADDSRegImm(unsigned Opcode) {
1063 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1066 static bool isSUBSRegImm(unsigned Opcode) {
1067 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1070 /// Check if CmpInstr can be substituted by MI.
1072 /// CmpInstr can be substituted:
1073 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1074 /// - and, MI and CmpInstr are from the same MachineBB
1075 /// - and, condition flags are not alive in successors of the CmpInstr parent
1076 /// - and, if MI opcode is the S form there must be no defs of flags between
1078 /// or if MI opcode is not the S form there must be neither defs of flags
1079 /// nor uses of flags between MI and CmpInstr.
1080 /// - and C/V flags are not used after CmpInstr
1081 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1082 const TargetRegisterInfo *TRI) {
1084 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1087 const unsigned CmpOpcode = CmpInstr->getOpcode();
1088 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1091 if (MI->getParent() != CmpInstr->getParent())
1094 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1097 AccessKind AccessToCheck = AK_Write;
1098 if (sForm(*MI) != MI->getOpcode())
1099 AccessToCheck = AK_All;
1100 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1103 UsedNZCV NZCVUsedAfterCmp;
1104 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1106 const MachineInstr &Instr = *I;
1107 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1108 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1109 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1111 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1114 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1118 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1121 /// Substitute an instruction comparing to zero with another instruction
1122 /// which produces needed condition flags.
1124 /// Return true on success.
1125 bool AArch64InstrInfo::substituteCmpToZero(
1126 MachineInstr &CmpInstr, unsigned SrcReg,
1127 const MachineRegisterInfo *MRI) const {
1129 // Get the unique definition of SrcReg.
1130 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1134 const TargetRegisterInfo *TRI = &getRegisterInfo();
1136 unsigned NewOpc = sForm(*MI);
1137 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1140 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1143 // Update the instruction to set NZCV.
1144 MI->setDesc(get(NewOpc));
1145 CmpInstr.eraseFromParent();
1146 bool succeeded = UpdateOperandRegClass(*MI);
1148 assert(succeeded && "Some operands reg class are incompatible!");
1149 MI->addRegisterDefined(AArch64::NZCV, TRI);
1153 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1154 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1157 MachineBasicBlock &MBB = *MI.getParent();
1158 DebugLoc DL = MI.getDebugLoc();
1159 unsigned Reg = MI.getOperand(0).getReg();
1160 const GlobalValue *GV =
1161 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1162 const TargetMachine &TM = MBB.getParent()->getTarget();
1163 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1164 const unsigned char MO_NC = AArch64II::MO_NC;
1166 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1167 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1168 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1169 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1170 .addReg(Reg, RegState::Kill)
1172 .addMemOperand(*MI.memoperands_begin());
1173 } else if (TM.getCodeModel() == CodeModel::Large) {
1174 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1175 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1176 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1177 .addReg(Reg, RegState::Kill)
1178 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1179 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1180 .addReg(Reg, RegState::Kill)
1181 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1182 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1183 .addReg(Reg, RegState::Kill)
1184 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1185 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1186 .addReg(Reg, RegState::Kill)
1188 .addMemOperand(*MI.memoperands_begin());
1190 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1191 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1192 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1193 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1194 .addReg(Reg, RegState::Kill)
1195 .addGlobalAddress(GV, 0, LoFlags)
1196 .addMemOperand(*MI.memoperands_begin());
1204 /// Return true if this is this instruction has a non-zero immediate
1205 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1206 switch (MI.getOpcode()) {
1209 case AArch64::ADDSWrs:
1210 case AArch64::ADDSXrs:
1211 case AArch64::ADDWrs:
1212 case AArch64::ADDXrs:
1213 case AArch64::ANDSWrs:
1214 case AArch64::ANDSXrs:
1215 case AArch64::ANDWrs:
1216 case AArch64::ANDXrs:
1217 case AArch64::BICSWrs:
1218 case AArch64::BICSXrs:
1219 case AArch64::BICWrs:
1220 case AArch64::BICXrs:
1221 case AArch64::CRC32Brr:
1222 case AArch64::CRC32CBrr:
1223 case AArch64::CRC32CHrr:
1224 case AArch64::CRC32CWrr:
1225 case AArch64::CRC32CXrr:
1226 case AArch64::CRC32Hrr:
1227 case AArch64::CRC32Wrr:
1228 case AArch64::CRC32Xrr:
1229 case AArch64::EONWrs:
1230 case AArch64::EONXrs:
1231 case AArch64::EORWrs:
1232 case AArch64::EORXrs:
1233 case AArch64::ORNWrs:
1234 case AArch64::ORNXrs:
1235 case AArch64::ORRWrs:
1236 case AArch64::ORRXrs:
1237 case AArch64::SUBSWrs:
1238 case AArch64::SUBSXrs:
1239 case AArch64::SUBWrs:
1240 case AArch64::SUBXrs:
1241 if (MI.getOperand(3).isImm()) {
1242 unsigned val = MI.getOperand(3).getImm();
1250 /// Return true if this is this instruction has a non-zero immediate
1251 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1252 switch (MI.getOpcode()) {
1255 case AArch64::ADDSWrx:
1256 case AArch64::ADDSXrx:
1257 case AArch64::ADDSXrx64:
1258 case AArch64::ADDWrx:
1259 case AArch64::ADDXrx:
1260 case AArch64::ADDXrx64:
1261 case AArch64::SUBSWrx:
1262 case AArch64::SUBSXrx:
1263 case AArch64::SUBSXrx64:
1264 case AArch64::SUBWrx:
1265 case AArch64::SUBXrx:
1266 case AArch64::SUBXrx64:
1267 if (MI.getOperand(3).isImm()) {
1268 unsigned val = MI.getOperand(3).getImm();
1277 // Return true if this instruction simply sets its single destination register
1278 // to zero. This is equivalent to a register rename of the zero-register.
1279 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1280 switch (MI.getOpcode()) {
1283 case AArch64::MOVZWi:
1284 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1285 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1286 assert(MI.getDesc().getNumOperands() == 3 &&
1287 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1291 case AArch64::ANDWri: // and Rd, Rzr, #imm
1292 return MI.getOperand(1).getReg() == AArch64::WZR;
1293 case AArch64::ANDXri:
1294 return MI.getOperand(1).getReg() == AArch64::XZR;
1295 case TargetOpcode::COPY:
1296 return MI.getOperand(1).getReg() == AArch64::WZR;
1301 // Return true if this instruction simply renames a general register without
1303 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1304 switch (MI.getOpcode()) {
1307 case TargetOpcode::COPY: {
1308 // GPR32 copies will by lowered to ORRXrs
1309 unsigned DstReg = MI.getOperand(0).getReg();
1310 return (AArch64::GPR32RegClass.contains(DstReg) ||
1311 AArch64::GPR64RegClass.contains(DstReg));
1313 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1314 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1315 assert(MI.getDesc().getNumOperands() == 4 &&
1316 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1320 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1321 if (MI.getOperand(2).getImm() == 0) {
1322 assert(MI.getDesc().getNumOperands() == 4 &&
1323 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1331 // Return true if this instruction simply renames a general register without
1333 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1334 switch (MI.getOpcode()) {
1337 case TargetOpcode::COPY: {
1338 // FPR64 copies will by lowered to ORR.16b
1339 unsigned DstReg = MI.getOperand(0).getReg();
1340 return (AArch64::FPR64RegClass.contains(DstReg) ||
1341 AArch64::FPR128RegClass.contains(DstReg));
1343 case AArch64::ORRv16i8:
1344 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1345 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1346 "invalid ORRv16i8 operands");
1354 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1355 int &FrameIndex) const {
1356 switch (MI.getOpcode()) {
1359 case AArch64::LDRWui:
1360 case AArch64::LDRXui:
1361 case AArch64::LDRBui:
1362 case AArch64::LDRHui:
1363 case AArch64::LDRSui:
1364 case AArch64::LDRDui:
1365 case AArch64::LDRQui:
1366 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1367 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1368 FrameIndex = MI.getOperand(1).getIndex();
1369 return MI.getOperand(0).getReg();
1377 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1378 int &FrameIndex) const {
1379 switch (MI.getOpcode()) {
1382 case AArch64::STRWui:
1383 case AArch64::STRXui:
1384 case AArch64::STRBui:
1385 case AArch64::STRHui:
1386 case AArch64::STRSui:
1387 case AArch64::STRDui:
1388 case AArch64::STRQui:
1389 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1390 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1391 FrameIndex = MI.getOperand(1).getIndex();
1392 return MI.getOperand(0).getReg();
1399 /// Return true if this is load/store scales or extends its register offset.
1400 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1401 /// MI should be a memory op that allows scaled addressing.
1402 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1403 switch (MI.getOpcode()) {
1406 case AArch64::LDRBBroW:
1407 case AArch64::LDRBroW:
1408 case AArch64::LDRDroW:
1409 case AArch64::LDRHHroW:
1410 case AArch64::LDRHroW:
1411 case AArch64::LDRQroW:
1412 case AArch64::LDRSBWroW:
1413 case AArch64::LDRSBXroW:
1414 case AArch64::LDRSHWroW:
1415 case AArch64::LDRSHXroW:
1416 case AArch64::LDRSWroW:
1417 case AArch64::LDRSroW:
1418 case AArch64::LDRWroW:
1419 case AArch64::LDRXroW:
1420 case AArch64::STRBBroW:
1421 case AArch64::STRBroW:
1422 case AArch64::STRDroW:
1423 case AArch64::STRHHroW:
1424 case AArch64::STRHroW:
1425 case AArch64::STRQroW:
1426 case AArch64::STRSroW:
1427 case AArch64::STRWroW:
1428 case AArch64::STRXroW:
1429 case AArch64::LDRBBroX:
1430 case AArch64::LDRBroX:
1431 case AArch64::LDRDroX:
1432 case AArch64::LDRHHroX:
1433 case AArch64::LDRHroX:
1434 case AArch64::LDRQroX:
1435 case AArch64::LDRSBWroX:
1436 case AArch64::LDRSBXroX:
1437 case AArch64::LDRSHWroX:
1438 case AArch64::LDRSHXroX:
1439 case AArch64::LDRSWroX:
1440 case AArch64::LDRSroX:
1441 case AArch64::LDRWroX:
1442 case AArch64::LDRXroX:
1443 case AArch64::STRBBroX:
1444 case AArch64::STRBroX:
1445 case AArch64::STRDroX:
1446 case AArch64::STRHHroX:
1447 case AArch64::STRHroX:
1448 case AArch64::STRQroX:
1449 case AArch64::STRSroX:
1450 case AArch64::STRWroX:
1451 case AArch64::STRXroX:
1453 unsigned Val = MI.getOperand(3).getImm();
1454 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1455 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1460 /// Check all MachineMemOperands for a hint to suppress pairing.
1461 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
1462 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1463 return MMO->getFlags() & MOSuppressPair;
1467 /// Set a flag on the first MachineMemOperand to suppress pairing.
1468 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1469 if (MI.memoperands_empty())
1471 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1474 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1478 case AArch64::STURSi:
1479 case AArch64::STURDi:
1480 case AArch64::STURQi:
1481 case AArch64::STURBBi:
1482 case AArch64::STURHHi:
1483 case AArch64::STURWi:
1484 case AArch64::STURXi:
1485 case AArch64::LDURSi:
1486 case AArch64::LDURDi:
1487 case AArch64::LDURQi:
1488 case AArch64::LDURWi:
1489 case AArch64::LDURXi:
1490 case AArch64::LDURSWi:
1491 case AArch64::LDURHHi:
1492 case AArch64::LDURBBi:
1493 case AArch64::LDURSBWi:
1494 case AArch64::LDURSHWi:
1499 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1500 return isUnscaledLdSt(MI.getOpcode());
1503 // Is this a candidate for ld/st merging or pairing? For example, we don't
1504 // touch volatiles or load/stores that have a hint to avoid pair formation.
1505 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1506 // If this is a volatile load/store, don't mess with it.
1507 if (MI.hasOrderedMemoryRef())
1510 // Make sure this is a reg+imm (as opposed to an address reloc).
1511 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1512 if (!MI.getOperand(2).isImm())
1515 // Can't merge/pair if the instruction modifies the base register.
1516 // e.g., ldr x0, [x0]
1517 unsigned BaseReg = MI.getOperand(1).getReg();
1518 const TargetRegisterInfo *TRI = &getRegisterInfo();
1519 if (MI.modifiesRegister(BaseReg, TRI))
1522 // Check if this load/store has a hint to avoid pair formation.
1523 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1524 if (isLdStPairSuppressed(MI))
1527 // On some CPUs quad load/store pairs are slower than two single load/stores.
1528 if (Subtarget.avoidQuadLdStPairs()) {
1529 switch (MI.getOpcode()) {
1533 case AArch64::LDURQi:
1534 case AArch64::STURQi:
1535 case AArch64::LDRQui:
1536 case AArch64::STRQui:
1544 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1545 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1546 const TargetRegisterInfo *TRI) const {
1547 switch (LdSt.getOpcode()) {
1550 // Scaled instructions.
1551 case AArch64::STRSui:
1552 case AArch64::STRDui:
1553 case AArch64::STRQui:
1554 case AArch64::STRXui:
1555 case AArch64::STRWui:
1556 case AArch64::LDRSui:
1557 case AArch64::LDRDui:
1558 case AArch64::LDRQui:
1559 case AArch64::LDRXui:
1560 case AArch64::LDRWui:
1561 case AArch64::LDRSWui:
1562 // Unscaled instructions.
1563 case AArch64::STURSi:
1564 case AArch64::STURDi:
1565 case AArch64::STURQi:
1566 case AArch64::STURXi:
1567 case AArch64::STURWi:
1568 case AArch64::LDURSi:
1569 case AArch64::LDURDi:
1570 case AArch64::LDURQi:
1571 case AArch64::LDURWi:
1572 case AArch64::LDURXi:
1573 case AArch64::LDURSWi:
1575 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1579 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
1580 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1581 const TargetRegisterInfo *TRI) const {
1582 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1583 // Handle only loads/stores with base register followed by immediate offset.
1584 if (LdSt.getNumExplicitOperands() == 3) {
1585 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1586 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1588 } else if (LdSt.getNumExplicitOperands() == 4) {
1589 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1590 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1591 !LdSt.getOperand(3).isImm())
1596 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1597 // Unscaled instructions have scaling factor set to 1.
1599 switch (LdSt.getOpcode()) {
1602 case AArch64::LDURQi:
1603 case AArch64::STURQi:
1607 case AArch64::LDURXi:
1608 case AArch64::LDURDi:
1609 case AArch64::STURXi:
1610 case AArch64::STURDi:
1614 case AArch64::LDURWi:
1615 case AArch64::LDURSi:
1616 case AArch64::LDURSWi:
1617 case AArch64::STURWi:
1618 case AArch64::STURSi:
1622 case AArch64::LDURHi:
1623 case AArch64::LDURHHi:
1624 case AArch64::LDURSHXi:
1625 case AArch64::LDURSHWi:
1626 case AArch64::STURHi:
1627 case AArch64::STURHHi:
1631 case AArch64::LDURBi:
1632 case AArch64::LDURBBi:
1633 case AArch64::LDURSBXi:
1634 case AArch64::LDURSBWi:
1635 case AArch64::STURBi:
1636 case AArch64::STURBBi:
1640 case AArch64::LDPQi:
1641 case AArch64::LDNPQi:
1642 case AArch64::STPQi:
1643 case AArch64::STNPQi:
1647 case AArch64::LDRQui:
1648 case AArch64::STRQui:
1651 case AArch64::LDPXi:
1652 case AArch64::LDPDi:
1653 case AArch64::LDNPXi:
1654 case AArch64::LDNPDi:
1655 case AArch64::STPXi:
1656 case AArch64::STPDi:
1657 case AArch64::STNPXi:
1658 case AArch64::STNPDi:
1662 case AArch64::LDRXui:
1663 case AArch64::LDRDui:
1664 case AArch64::STRXui:
1665 case AArch64::STRDui:
1668 case AArch64::LDPWi:
1669 case AArch64::LDPSi:
1670 case AArch64::LDNPWi:
1671 case AArch64::LDNPSi:
1672 case AArch64::STPWi:
1673 case AArch64::STPSi:
1674 case AArch64::STNPWi:
1675 case AArch64::STNPSi:
1679 case AArch64::LDRWui:
1680 case AArch64::LDRSui:
1681 case AArch64::LDRSWui:
1682 case AArch64::STRWui:
1683 case AArch64::STRSui:
1686 case AArch64::LDRHui:
1687 case AArch64::LDRHHui:
1688 case AArch64::STRHui:
1689 case AArch64::STRHHui:
1692 case AArch64::LDRBui:
1693 case AArch64::LDRBBui:
1694 case AArch64::STRBui:
1695 case AArch64::STRBBui:
1700 if (LdSt.getNumExplicitOperands() == 3) {
1701 BaseReg = LdSt.getOperand(1).getReg();
1702 Offset = LdSt.getOperand(2).getImm() * Scale;
1704 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1705 BaseReg = LdSt.getOperand(2).getReg();
1706 Offset = LdSt.getOperand(3).getImm() * Scale;
1711 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
1713 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1714 unsigned OffsetStride = 1;
1718 case AArch64::LDURQi:
1719 case AArch64::STURQi:
1722 case AArch64::LDURXi:
1723 case AArch64::LDURDi:
1724 case AArch64::STURXi:
1725 case AArch64::STURDi:
1728 case AArch64::LDURWi:
1729 case AArch64::LDURSi:
1730 case AArch64::LDURSWi:
1731 case AArch64::STURWi:
1732 case AArch64::STURSi:
1736 // If the byte-offset isn't a multiple of the stride, we can't scale this
1738 if (Offset % OffsetStride != 0)
1741 // Convert the byte-offset used by unscaled into an "element" offset used
1742 // by the scaled pair load/store instructions.
1743 Offset /= OffsetStride;
1747 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1748 if (FirstOpc == SecondOpc)
1750 // We can also pair sign-ext and zero-ext instructions.
1754 case AArch64::LDRWui:
1755 case AArch64::LDURWi:
1756 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1757 case AArch64::LDRSWui:
1758 case AArch64::LDURSWi:
1759 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1761 // These instructions can't be paired based on their opcodes.
1765 /// Detect opportunities for ldp/stp formation.
1767 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
1768 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1769 MachineInstr &SecondLdSt,
1770 unsigned NumLoads) const {
1771 // Only cluster up to a single pair.
1775 // Can we pair these instructions based on their opcodes?
1776 unsigned FirstOpc = FirstLdSt.getOpcode();
1777 unsigned SecondOpc = SecondLdSt.getOpcode();
1778 if (!canPairLdStOpc(FirstOpc, SecondOpc))
1781 // Can't merge volatiles or load/stores that have a hint to avoid pair
1782 // formation, for example.
1783 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1784 !isCandidateToMergeOrPair(SecondLdSt))
1787 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1788 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
1789 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1792 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
1793 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1796 // Pairwise instructions have a 7-bit signed offset field.
1797 if (Offset1 > 63 || Offset1 < -64)
1800 // The caller should already have ordered First/SecondLdSt by offset.
1801 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1802 return Offset1 + 1 == Offset2;
1805 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1806 MachineInstr &Second) const {
1807 if (Subtarget.hasMacroOpFusion()) {
1808 // Fuse CMN, CMP, TST followed by Bcc.
1809 unsigned SecondOpcode = Second.getOpcode();
1810 if (SecondOpcode == AArch64::Bcc) {
1811 switch (First.getOpcode()) {
1814 case AArch64::SUBSWri:
1815 case AArch64::ADDSWri:
1816 case AArch64::ANDSWri:
1817 case AArch64::SUBSXri:
1818 case AArch64::ADDSXri:
1819 case AArch64::ANDSXri:
1823 // Fuse ALU operations followed by CBZ/CBNZ.
1824 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1825 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1826 switch (First.getOpcode()) {
1829 case AArch64::ADDWri:
1830 case AArch64::ADDXri:
1831 case AArch64::ANDWri:
1832 case AArch64::ANDXri:
1833 case AArch64::EORWri:
1834 case AArch64::EORXri:
1835 case AArch64::ORRWri:
1836 case AArch64::ORRXri:
1837 case AArch64::SUBWri:
1838 case AArch64::SUBXri:
1846 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1847 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1848 const MDNode *Expr, const DebugLoc &DL) const {
1849 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1850 .addFrameIndex(FrameIx)
1858 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1859 unsigned Reg, unsigned SubIdx,
1861 const TargetRegisterInfo *TRI) {
1863 return MIB.addReg(Reg, State);
1865 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1866 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1867 return MIB.addReg(Reg, State, SubIdx);
1870 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1872 // We really want the positive remainder mod 32 here, that happens to be
1873 // easily obtainable with a mask.
1874 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1877 void AArch64InstrInfo::copyPhysRegTuple(
1878 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
1879 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1880 llvm::ArrayRef<unsigned> Indices) const {
1881 assert(Subtarget.hasNEON() &&
1882 "Unexpected register copy without NEON");
1883 const TargetRegisterInfo *TRI = &getRegisterInfo();
1884 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1885 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1886 unsigned NumRegs = Indices.size();
1888 int SubReg = 0, End = NumRegs, Incr = 1;
1889 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1890 SubReg = NumRegs - 1;
1895 for (; SubReg != End; SubReg += Incr) {
1896 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1897 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1898 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1899 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1903 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1904 MachineBasicBlock::iterator I,
1905 const DebugLoc &DL, unsigned DestReg,
1906 unsigned SrcReg, bool KillSrc) const {
1907 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1908 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1909 const TargetRegisterInfo *TRI = &getRegisterInfo();
1911 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1912 // If either operand is WSP, expand to ADD #0.
1913 if (Subtarget.hasZeroCycleRegMove()) {
1914 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1915 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1916 &AArch64::GPR64spRegClass);
1917 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1918 &AArch64::GPR64spRegClass);
1919 // This instruction is reading and writing X registers. This may upset
1920 // the register scavenger and machine verifier, so we need to indicate
1921 // that we are reading an undefined value from SrcRegX, but a proper
1922 // value from SrcReg.
1923 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1924 .addReg(SrcRegX, RegState::Undef)
1926 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1927 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1929 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1930 .addReg(SrcReg, getKillRegState(KillSrc))
1932 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1934 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1935 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1936 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1938 if (Subtarget.hasZeroCycleRegMove()) {
1939 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1940 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1941 &AArch64::GPR64spRegClass);
1942 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1943 &AArch64::GPR64spRegClass);
1944 // This instruction is reading and writing X registers. This may upset
1945 // the register scavenger and machine verifier, so we need to indicate
1946 // that we are reading an undefined value from SrcRegX, but a proper
1947 // value from SrcReg.
1948 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1949 .addReg(AArch64::XZR)
1950 .addReg(SrcRegX, RegState::Undef)
1951 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1953 // Otherwise, expand to ORR WZR.
1954 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1955 .addReg(AArch64::WZR)
1956 .addReg(SrcReg, getKillRegState(KillSrc));
1962 if (AArch64::GPR64spRegClass.contains(DestReg) &&
1963 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1964 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1965 // If either operand is SP, expand to ADD #0.
1966 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1967 .addReg(SrcReg, getKillRegState(KillSrc))
1969 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1970 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1971 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1972 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1974 // Otherwise, expand to ORR XZR.
1975 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1976 .addReg(AArch64::XZR)
1977 .addReg(SrcReg, getKillRegState(KillSrc));
1982 // Copy a DDDD register quad by copying the individual sub-registers.
1983 if (AArch64::DDDDRegClass.contains(DestReg) &&
1984 AArch64::DDDDRegClass.contains(SrcReg)) {
1985 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1986 AArch64::dsub2, AArch64::dsub3 };
1987 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1992 // Copy a DDD register triple by copying the individual sub-registers.
1993 if (AArch64::DDDRegClass.contains(DestReg) &&
1994 AArch64::DDDRegClass.contains(SrcReg)) {
1995 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1997 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2002 // Copy a DD register pair by copying the individual sub-registers.
2003 if (AArch64::DDRegClass.contains(DestReg) &&
2004 AArch64::DDRegClass.contains(SrcReg)) {
2005 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2006 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2011 // Copy a QQQQ register quad by copying the individual sub-registers.
2012 if (AArch64::QQQQRegClass.contains(DestReg) &&
2013 AArch64::QQQQRegClass.contains(SrcReg)) {
2014 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2015 AArch64::qsub2, AArch64::qsub3 };
2016 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2021 // Copy a QQQ register triple by copying the individual sub-registers.
2022 if (AArch64::QQQRegClass.contains(DestReg) &&
2023 AArch64::QQQRegClass.contains(SrcReg)) {
2024 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2026 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2031 // Copy a QQ register pair by copying the individual sub-registers.
2032 if (AArch64::QQRegClass.contains(DestReg) &&
2033 AArch64::QQRegClass.contains(SrcReg)) {
2034 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2035 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2040 if (AArch64::FPR128RegClass.contains(DestReg) &&
2041 AArch64::FPR128RegClass.contains(SrcReg)) {
2042 if(Subtarget.hasNEON()) {
2043 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2045 .addReg(SrcReg, getKillRegState(KillSrc));
2047 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2048 .addReg(AArch64::SP, RegState::Define)
2049 .addReg(SrcReg, getKillRegState(KillSrc))
2050 .addReg(AArch64::SP)
2052 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2053 .addReg(AArch64::SP, RegState::Define)
2054 .addReg(DestReg, RegState::Define)
2055 .addReg(AArch64::SP)
2061 if (AArch64::FPR64RegClass.contains(DestReg) &&
2062 AArch64::FPR64RegClass.contains(SrcReg)) {
2063 if(Subtarget.hasNEON()) {
2064 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2065 &AArch64::FPR128RegClass);
2066 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2067 &AArch64::FPR128RegClass);
2068 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2070 .addReg(SrcReg, getKillRegState(KillSrc));
2072 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2073 .addReg(SrcReg, getKillRegState(KillSrc));
2078 if (AArch64::FPR32RegClass.contains(DestReg) &&
2079 AArch64::FPR32RegClass.contains(SrcReg)) {
2080 if(Subtarget.hasNEON()) {
2081 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2082 &AArch64::FPR128RegClass);
2083 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2084 &AArch64::FPR128RegClass);
2085 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2087 .addReg(SrcReg, getKillRegState(KillSrc));
2089 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2090 .addReg(SrcReg, getKillRegState(KillSrc));
2095 if (AArch64::FPR16RegClass.contains(DestReg) &&
2096 AArch64::FPR16RegClass.contains(SrcReg)) {
2097 if(Subtarget.hasNEON()) {
2098 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2099 &AArch64::FPR128RegClass);
2100 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2101 &AArch64::FPR128RegClass);
2102 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2104 .addReg(SrcReg, getKillRegState(KillSrc));
2106 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2107 &AArch64::FPR32RegClass);
2108 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2109 &AArch64::FPR32RegClass);
2110 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2111 .addReg(SrcReg, getKillRegState(KillSrc));
2116 if (AArch64::FPR8RegClass.contains(DestReg) &&
2117 AArch64::FPR8RegClass.contains(SrcReg)) {
2118 if(Subtarget.hasNEON()) {
2119 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2120 &AArch64::FPR128RegClass);
2121 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2122 &AArch64::FPR128RegClass);
2123 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2125 .addReg(SrcReg, getKillRegState(KillSrc));
2127 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2128 &AArch64::FPR32RegClass);
2129 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2130 &AArch64::FPR32RegClass);
2131 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2132 .addReg(SrcReg, getKillRegState(KillSrc));
2137 // Copies between GPR64 and FPR64.
2138 if (AArch64::FPR64RegClass.contains(DestReg) &&
2139 AArch64::GPR64RegClass.contains(SrcReg)) {
2140 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2141 .addReg(SrcReg, getKillRegState(KillSrc));
2144 if (AArch64::GPR64RegClass.contains(DestReg) &&
2145 AArch64::FPR64RegClass.contains(SrcReg)) {
2146 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2147 .addReg(SrcReg, getKillRegState(KillSrc));
2150 // Copies between GPR32 and FPR32.
2151 if (AArch64::FPR32RegClass.contains(DestReg) &&
2152 AArch64::GPR32RegClass.contains(SrcReg)) {
2153 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2154 .addReg(SrcReg, getKillRegState(KillSrc));
2157 if (AArch64::GPR32RegClass.contains(DestReg) &&
2158 AArch64::FPR32RegClass.contains(SrcReg)) {
2159 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2160 .addReg(SrcReg, getKillRegState(KillSrc));
2164 if (DestReg == AArch64::NZCV) {
2165 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2166 BuildMI(MBB, I, DL, get(AArch64::MSR))
2167 .addImm(AArch64SysReg::NZCV)
2168 .addReg(SrcReg, getKillRegState(KillSrc))
2169 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2173 if (SrcReg == AArch64::NZCV) {
2174 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2175 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2176 .addImm(AArch64SysReg::NZCV)
2177 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2181 llvm_unreachable("unimplemented reg-to-reg copy");
2184 void AArch64InstrInfo::storeRegToStackSlot(
2185 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2186 bool isKill, int FI, const TargetRegisterClass *RC,
2187 const TargetRegisterInfo *TRI) const {
2189 if (MBBI != MBB.end())
2190 DL = MBBI->getDebugLoc();
2191 MachineFunction &MF = *MBB.getParent();
2192 MachineFrameInfo &MFI = *MF.getFrameInfo();
2193 unsigned Align = MFI.getObjectAlignment(FI);
2195 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2196 MachineMemOperand *MMO = MF.getMachineMemOperand(
2197 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2200 switch (RC->getSize()) {
2202 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2203 Opc = AArch64::STRBui;
2206 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2207 Opc = AArch64::STRHui;
2210 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2211 Opc = AArch64::STRWui;
2212 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2213 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2215 assert(SrcReg != AArch64::WSP);
2216 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2217 Opc = AArch64::STRSui;
2220 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2221 Opc = AArch64::STRXui;
2222 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2223 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2225 assert(SrcReg != AArch64::SP);
2226 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2227 Opc = AArch64::STRDui;
2230 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2231 Opc = AArch64::STRQui;
2232 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2233 assert(Subtarget.hasNEON() &&
2234 "Unexpected register store without NEON");
2235 Opc = AArch64::ST1Twov1d;
2240 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2241 assert(Subtarget.hasNEON() &&
2242 "Unexpected register store without NEON");
2243 Opc = AArch64::ST1Threev1d;
2248 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2249 assert(Subtarget.hasNEON() &&
2250 "Unexpected register store without NEON");
2251 Opc = AArch64::ST1Fourv1d;
2253 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2254 assert(Subtarget.hasNEON() &&
2255 "Unexpected register store without NEON");
2256 Opc = AArch64::ST1Twov2d;
2261 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2262 assert(Subtarget.hasNEON() &&
2263 "Unexpected register store without NEON");
2264 Opc = AArch64::ST1Threev2d;
2269 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2270 assert(Subtarget.hasNEON() &&
2271 "Unexpected register store without NEON");
2272 Opc = AArch64::ST1Fourv2d;
2277 assert(Opc && "Unknown register class");
2279 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2280 .addReg(SrcReg, getKillRegState(isKill))
2285 MI.addMemOperand(MMO);
2288 void AArch64InstrInfo::loadRegFromStackSlot(
2289 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2290 int FI, const TargetRegisterClass *RC,
2291 const TargetRegisterInfo *TRI) const {
2293 if (MBBI != MBB.end())
2294 DL = MBBI->getDebugLoc();
2295 MachineFunction &MF = *MBB.getParent();
2296 MachineFrameInfo &MFI = *MF.getFrameInfo();
2297 unsigned Align = MFI.getObjectAlignment(FI);
2298 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2299 MachineMemOperand *MMO = MF.getMachineMemOperand(
2300 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2304 switch (RC->getSize()) {
2306 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2307 Opc = AArch64::LDRBui;
2310 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2311 Opc = AArch64::LDRHui;
2314 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2315 Opc = AArch64::LDRWui;
2316 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2317 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2319 assert(DestReg != AArch64::WSP);
2320 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2321 Opc = AArch64::LDRSui;
2324 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2325 Opc = AArch64::LDRXui;
2326 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2327 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2329 assert(DestReg != AArch64::SP);
2330 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2331 Opc = AArch64::LDRDui;
2334 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2335 Opc = AArch64::LDRQui;
2336 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2337 assert(Subtarget.hasNEON() &&
2338 "Unexpected register load without NEON");
2339 Opc = AArch64::LD1Twov1d;
2344 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2345 assert(Subtarget.hasNEON() &&
2346 "Unexpected register load without NEON");
2347 Opc = AArch64::LD1Threev1d;
2352 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2353 assert(Subtarget.hasNEON() &&
2354 "Unexpected register load without NEON");
2355 Opc = AArch64::LD1Fourv1d;
2357 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2358 assert(Subtarget.hasNEON() &&
2359 "Unexpected register load without NEON");
2360 Opc = AArch64::LD1Twov2d;
2365 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2366 assert(Subtarget.hasNEON() &&
2367 "Unexpected register load without NEON");
2368 Opc = AArch64::LD1Threev2d;
2373 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2374 assert(Subtarget.hasNEON() &&
2375 "Unexpected register load without NEON");
2376 Opc = AArch64::LD1Fourv2d;
2381 assert(Opc && "Unknown register class");
2383 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2384 .addReg(DestReg, getDefRegState(true))
2388 MI.addMemOperand(MMO);
2391 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2392 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2393 unsigned DestReg, unsigned SrcReg, int Offset,
2394 const TargetInstrInfo *TII,
2395 MachineInstr::MIFlag Flag, bool SetNZCV) {
2396 if (DestReg == SrcReg && Offset == 0)
2399 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2400 "SP increment/decrement not 16-byte aligned");
2402 bool isSub = Offset < 0;
2406 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2407 // scratch register. If DestReg is a virtual register, use it as the
2408 // scratch register; otherwise, create a new virtual register (to be
2409 // replaced by the scavenger at the end of PEI). That case can be optimized
2410 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2411 // register can be loaded with offset%8 and the add/sub can use an extending
2412 // instruction with LSL#3.
2413 // Currently the function handles any offsets but generates a poor sequence
2415 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2419 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2421 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2422 const unsigned MaxEncoding = 0xfff;
2423 const unsigned ShiftSize = 12;
2424 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2425 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2427 if (((unsigned)Offset) > MaxEncodableValue) {
2428 ThisVal = MaxEncodableValue;
2430 ThisVal = Offset & MaxEncodableValue;
2432 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2433 "Encoding cannot handle value that big");
2434 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2436 .addImm(ThisVal >> ShiftSize)
2437 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2445 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2448 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2452 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2453 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2454 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2455 LiveIntervals *LIS) const {
2456 // This is a bit of a hack. Consider this instruction:
2458 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2460 // We explicitly chose GPR64all for the virtual register so such a copy might
2461 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2462 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2463 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2465 // To prevent that, we are going to constrain the %vreg0 register class here.
2467 // <rdar://problem/11522048>
2470 unsigned DstReg = MI.getOperand(0).getReg();
2471 unsigned SrcReg = MI.getOperand(1).getReg();
2472 if (SrcReg == AArch64::SP &&
2473 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2474 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2477 if (DstReg == AArch64::SP &&
2478 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2479 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2488 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2489 bool *OutUseUnscaledOp,
2490 unsigned *OutUnscaledOp,
2491 int *EmittableOffset) {
2493 bool IsSigned = false;
2494 // The ImmIdx should be changed case by case if it is not 2.
2495 unsigned ImmIdx = 2;
2496 unsigned UnscaledOp = 0;
2497 // Set output values in case of early exit.
2498 if (EmittableOffset)
2499 *EmittableOffset = 0;
2500 if (OutUseUnscaledOp)
2501 *OutUseUnscaledOp = false;
2504 switch (MI.getOpcode()) {
2506 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2507 // Vector spills/fills can't take an immediate offset.
2508 case AArch64::LD1Twov2d:
2509 case AArch64::LD1Threev2d:
2510 case AArch64::LD1Fourv2d:
2511 case AArch64::LD1Twov1d:
2512 case AArch64::LD1Threev1d:
2513 case AArch64::LD1Fourv1d:
2514 case AArch64::ST1Twov2d:
2515 case AArch64::ST1Threev2d:
2516 case AArch64::ST1Fourv2d:
2517 case AArch64::ST1Twov1d:
2518 case AArch64::ST1Threev1d:
2519 case AArch64::ST1Fourv1d:
2520 return AArch64FrameOffsetCannotUpdate;
2521 case AArch64::PRFMui:
2523 UnscaledOp = AArch64::PRFUMi;
2525 case AArch64::LDRXui:
2527 UnscaledOp = AArch64::LDURXi;
2529 case AArch64::LDRWui:
2531 UnscaledOp = AArch64::LDURWi;
2533 case AArch64::LDRBui:
2535 UnscaledOp = AArch64::LDURBi;
2537 case AArch64::LDRHui:
2539 UnscaledOp = AArch64::LDURHi;
2541 case AArch64::LDRSui:
2543 UnscaledOp = AArch64::LDURSi;
2545 case AArch64::LDRDui:
2547 UnscaledOp = AArch64::LDURDi;
2549 case AArch64::LDRQui:
2551 UnscaledOp = AArch64::LDURQi;
2553 case AArch64::LDRBBui:
2555 UnscaledOp = AArch64::LDURBBi;
2557 case AArch64::LDRHHui:
2559 UnscaledOp = AArch64::LDURHHi;
2561 case AArch64::LDRSBXui:
2563 UnscaledOp = AArch64::LDURSBXi;
2565 case AArch64::LDRSBWui:
2567 UnscaledOp = AArch64::LDURSBWi;
2569 case AArch64::LDRSHXui:
2571 UnscaledOp = AArch64::LDURSHXi;
2573 case AArch64::LDRSHWui:
2575 UnscaledOp = AArch64::LDURSHWi;
2577 case AArch64::LDRSWui:
2579 UnscaledOp = AArch64::LDURSWi;
2582 case AArch64::STRXui:
2584 UnscaledOp = AArch64::STURXi;
2586 case AArch64::STRWui:
2588 UnscaledOp = AArch64::STURWi;
2590 case AArch64::STRBui:
2592 UnscaledOp = AArch64::STURBi;
2594 case AArch64::STRHui:
2596 UnscaledOp = AArch64::STURHi;
2598 case AArch64::STRSui:
2600 UnscaledOp = AArch64::STURSi;
2602 case AArch64::STRDui:
2604 UnscaledOp = AArch64::STURDi;
2606 case AArch64::STRQui:
2608 UnscaledOp = AArch64::STURQi;
2610 case AArch64::STRBBui:
2612 UnscaledOp = AArch64::STURBBi;
2614 case AArch64::STRHHui:
2616 UnscaledOp = AArch64::STURHHi;
2619 case AArch64::LDPXi:
2620 case AArch64::LDPDi:
2621 case AArch64::STPXi:
2622 case AArch64::STPDi:
2623 case AArch64::LDNPXi:
2624 case AArch64::LDNPDi:
2625 case AArch64::STNPXi:
2626 case AArch64::STNPDi:
2631 case AArch64::LDPQi:
2632 case AArch64::STPQi:
2633 case AArch64::LDNPQi:
2634 case AArch64::STNPQi:
2639 case AArch64::LDPWi:
2640 case AArch64::LDPSi:
2641 case AArch64::STPWi:
2642 case AArch64::STPSi:
2643 case AArch64::LDNPWi:
2644 case AArch64::LDNPSi:
2645 case AArch64::STNPWi:
2646 case AArch64::STNPSi:
2652 case AArch64::LDURXi:
2653 case AArch64::LDURWi:
2654 case AArch64::LDURBi:
2655 case AArch64::LDURHi:
2656 case AArch64::LDURSi:
2657 case AArch64::LDURDi:
2658 case AArch64::LDURQi:
2659 case AArch64::LDURHHi:
2660 case AArch64::LDURBBi:
2661 case AArch64::LDURSBXi:
2662 case AArch64::LDURSBWi:
2663 case AArch64::LDURSHXi:
2664 case AArch64::LDURSHWi:
2665 case AArch64::LDURSWi:
2666 case AArch64::STURXi:
2667 case AArch64::STURWi:
2668 case AArch64::STURBi:
2669 case AArch64::STURHi:
2670 case AArch64::STURSi:
2671 case AArch64::STURDi:
2672 case AArch64::STURQi:
2673 case AArch64::STURBBi:
2674 case AArch64::STURHHi:
2679 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2681 bool useUnscaledOp = false;
2682 // If the offset doesn't match the scale, we rewrite the instruction to
2683 // use the unscaled instruction instead. Likewise, if we have a negative
2684 // offset (and have an unscaled op to use).
2685 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2686 useUnscaledOp = true;
2688 // Use an unscaled addressing mode if the instruction has a negative offset
2689 // (or if the instruction is already using an unscaled addressing mode).
2692 // ldp/stp instructions.
2695 } else if (UnscaledOp == 0 || useUnscaledOp) {
2705 // Attempt to fold address computation.
2706 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2707 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2708 if (Offset >= MinOff && Offset <= MaxOff) {
2709 if (EmittableOffset)
2710 *EmittableOffset = Offset;
2713 int NewOff = Offset < 0 ? MinOff : MaxOff;
2714 if (EmittableOffset)
2715 *EmittableOffset = NewOff;
2716 Offset = (Offset - NewOff) * Scale;
2718 if (OutUseUnscaledOp)
2719 *OutUseUnscaledOp = useUnscaledOp;
2721 *OutUnscaledOp = UnscaledOp;
2722 return AArch64FrameOffsetCanUpdate |
2723 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2726 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2727 unsigned FrameReg, int &Offset,
2728 const AArch64InstrInfo *TII) {
2729 unsigned Opcode = MI.getOpcode();
2730 unsigned ImmIdx = FrameRegIdx + 1;
2732 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2733 Offset += MI.getOperand(ImmIdx).getImm();
2734 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2735 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2736 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2737 MI.eraseFromParent();
2743 unsigned UnscaledOp;
2745 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2746 &UnscaledOp, &NewOffset);
2747 if (Status & AArch64FrameOffsetCanUpdate) {
2748 if (Status & AArch64FrameOffsetIsLegal)
2749 // Replace the FrameIndex with FrameReg.
2750 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2752 MI.setDesc(TII->get(UnscaledOp));
2754 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2761 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2762 NopInst.setOpcode(AArch64::HINT);
2763 NopInst.addOperand(MCOperand::createImm(0));
2766 // AArch64 supports MachineCombiner.
2767 bool AArch64InstrInfo::useMachineCombiner() const {
2772 // True when Opc sets flag
2773 static bool isCombineInstrSettingFlag(unsigned Opc) {
2775 case AArch64::ADDSWrr:
2776 case AArch64::ADDSWri:
2777 case AArch64::ADDSXrr:
2778 case AArch64::ADDSXri:
2779 case AArch64::SUBSWrr:
2780 case AArch64::SUBSXrr:
2781 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2782 case AArch64::SUBSWri:
2783 case AArch64::SUBSXri:
2791 // 32b Opcodes that can be combined with a MUL
2792 static bool isCombineInstrCandidate32(unsigned Opc) {
2794 case AArch64::ADDWrr:
2795 case AArch64::ADDWri:
2796 case AArch64::SUBWrr:
2797 case AArch64::ADDSWrr:
2798 case AArch64::ADDSWri:
2799 case AArch64::SUBSWrr:
2800 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2801 case AArch64::SUBWri:
2802 case AArch64::SUBSWri:
2810 // 64b Opcodes that can be combined with a MUL
2811 static bool isCombineInstrCandidate64(unsigned Opc) {
2813 case AArch64::ADDXrr:
2814 case AArch64::ADDXri:
2815 case AArch64::SUBXrr:
2816 case AArch64::ADDSXrr:
2817 case AArch64::ADDSXri:
2818 case AArch64::SUBSXrr:
2819 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2820 case AArch64::SUBXri:
2821 case AArch64::SUBSXri:
2829 // FP Opcodes that can be combined with a FMUL
2830 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2831 switch (Inst.getOpcode()) {
2832 case AArch64::FADDSrr:
2833 case AArch64::FADDDrr:
2834 case AArch64::FADDv2f32:
2835 case AArch64::FADDv2f64:
2836 case AArch64::FADDv4f32:
2837 case AArch64::FSUBSrr:
2838 case AArch64::FSUBDrr:
2839 case AArch64::FSUBv2f32:
2840 case AArch64::FSUBv2f64:
2841 case AArch64::FSUBv4f32:
2842 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2849 // Opcodes that can be combined with a MUL
2850 static bool isCombineInstrCandidate(unsigned Opc) {
2851 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2855 // Utility routine that checks if \param MO is defined by an
2856 // \param CombineOpc instruction in the basic block \param MBB
2857 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2858 unsigned CombineOpc, unsigned ZeroReg = 0,
2859 bool CheckZeroReg = false) {
2860 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2861 MachineInstr *MI = nullptr;
2863 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2864 MI = MRI.getUniqueVRegDef(MO.getReg());
2865 // And it needs to be in the trace (otherwise, it won't have a depth).
2866 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
2868 // Must only used by the user we combine with.
2869 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2873 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2874 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2875 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2876 // The third input reg must be zero.
2877 if (MI->getOperand(3).getReg() != ZeroReg)
2885 // Is \param MO defined by an integer multiply and can be combined?
2886 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2887 unsigned MulOpc, unsigned ZeroReg) {
2888 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2892 // Is \param MO defined by a floating-point multiply and can be combined?
2893 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2895 return canCombine(MBB, MO, MulOpc);
2898 // TODO: There are many more machine instruction opcodes to match:
2899 // 1. Other data types (integer, vectors)
2900 // 2. Other math / logic operations (xor, or)
2901 // 3. Other forms of the same operation (intrinsics and other variants)
2902 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2903 switch (Inst.getOpcode()) {
2904 case AArch64::FADDDrr:
2905 case AArch64::FADDSrr:
2906 case AArch64::FADDv2f32:
2907 case AArch64::FADDv2f64:
2908 case AArch64::FADDv4f32:
2909 case AArch64::FMULDrr:
2910 case AArch64::FMULSrr:
2911 case AArch64::FMULX32:
2912 case AArch64::FMULX64:
2913 case AArch64::FMULXv2f32:
2914 case AArch64::FMULXv2f64:
2915 case AArch64::FMULXv4f32:
2916 case AArch64::FMULv2f32:
2917 case AArch64::FMULv2f64:
2918 case AArch64::FMULv4f32:
2919 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2925 /// Find instructions that can be turned into madd.
2926 static bool getMaddPatterns(MachineInstr &Root,
2927 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
2928 unsigned Opc = Root.getOpcode();
2929 MachineBasicBlock &MBB = *Root.getParent();
2932 if (!isCombineInstrCandidate(Opc))
2934 if (isCombineInstrSettingFlag(Opc)) {
2935 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2936 // When NZCV is live bail out.
2939 unsigned NewOpc = convertFlagSettingOpcode(Root);
2940 // When opcode can't change bail out.
2941 // CHECKME: do we miss any cases for opcode conversion?
2950 case AArch64::ADDWrr:
2951 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2952 "ADDWrr does not have register operands");
2953 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2955 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
2958 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2960 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
2964 case AArch64::ADDXrr:
2965 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2967 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
2970 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2972 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
2976 case AArch64::SUBWrr:
2977 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2979 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
2982 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2984 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
2988 case AArch64::SUBXrr:
2989 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2991 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
2994 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2996 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3000 case AArch64::ADDWri:
3001 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3003 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3007 case AArch64::ADDXri:
3008 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3010 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3014 case AArch64::SUBWri:
3015 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3017 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3021 case AArch64::SUBXri:
3022 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3024 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3031 /// Floating-Point Support
3033 /// Find instructions that can be turned into madd.
3034 static bool getFMAPatterns(MachineInstr &Root,
3035 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3037 if (!isCombineInstrCandidateFP(Root))
3040 MachineBasicBlock &MBB = *Root.getParent();
3043 switch (Root.getOpcode()) {
3045 assert(false && "Unsupported FP instruction in combiner\n");
3047 case AArch64::FADDSrr:
3048 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3049 "FADDWrr does not have register operands");
3050 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3051 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3053 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3054 AArch64::FMULv1i32_indexed)) {
3055 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3058 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3059 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3061 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3062 AArch64::FMULv1i32_indexed)) {
3063 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3067 case AArch64::FADDDrr:
3068 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3069 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3071 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3072 AArch64::FMULv1i64_indexed)) {
3073 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3076 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3077 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3079 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3080 AArch64::FMULv1i64_indexed)) {
3081 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3085 case AArch64::FADDv2f32:
3086 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3087 AArch64::FMULv2i32_indexed)) {
3088 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3090 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3091 AArch64::FMULv2f32)) {
3092 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3095 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3096 AArch64::FMULv2i32_indexed)) {
3097 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3099 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3100 AArch64::FMULv2f32)) {
3101 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3105 case AArch64::FADDv2f64:
3106 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3107 AArch64::FMULv2i64_indexed)) {
3108 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3110 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3111 AArch64::FMULv2f64)) {
3112 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3115 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3116 AArch64::FMULv2i64_indexed)) {
3117 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3119 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3120 AArch64::FMULv2f64)) {
3121 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3125 case AArch64::FADDv4f32:
3126 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3127 AArch64::FMULv4i32_indexed)) {
3128 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3130 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3131 AArch64::FMULv4f32)) {
3132 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3135 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3136 AArch64::FMULv4i32_indexed)) {
3137 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3139 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3140 AArch64::FMULv4f32)) {
3141 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3146 case AArch64::FSUBSrr:
3147 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3148 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3151 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3152 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3154 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3155 AArch64::FMULv1i32_indexed)) {
3156 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3160 case AArch64::FSUBDrr:
3161 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3162 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3165 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3166 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3168 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3169 AArch64::FMULv1i64_indexed)) {
3170 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3174 case AArch64::FSUBv2f32:
3175 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3176 AArch64::FMULv2i32_indexed)) {
3177 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3179 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3180 AArch64::FMULv2f32)) {
3181 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3185 case AArch64::FSUBv2f64:
3186 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3187 AArch64::FMULv2i64_indexed)) {
3188 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3190 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3191 AArch64::FMULv2f64)) {
3192 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3196 case AArch64::FSUBv4f32:
3197 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3198 AArch64::FMULv4i32_indexed)) {
3199 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3201 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3202 AArch64::FMULv4f32)) {
3203 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3211 /// Return true when a code sequence can improve throughput. It
3212 /// should be called only for instructions in loops.
3213 /// \param Pattern - combiner pattern
3215 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3219 case MachineCombinerPattern::FMULADDS_OP1:
3220 case MachineCombinerPattern::FMULADDS_OP2:
3221 case MachineCombinerPattern::FMULSUBS_OP1:
3222 case MachineCombinerPattern::FMULSUBS_OP2:
3223 case MachineCombinerPattern::FMULADDD_OP1:
3224 case MachineCombinerPattern::FMULADDD_OP2:
3225 case MachineCombinerPattern::FMULSUBD_OP1:
3226 case MachineCombinerPattern::FMULSUBD_OP2:
3227 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3228 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3229 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3230 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3231 case MachineCombinerPattern::FMLAv2f32_OP2:
3232 case MachineCombinerPattern::FMLAv2f32_OP1:
3233 case MachineCombinerPattern::FMLAv2f64_OP1:
3234 case MachineCombinerPattern::FMLAv2f64_OP2:
3235 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3236 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3237 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3238 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3239 case MachineCombinerPattern::FMLAv4f32_OP1:
3240 case MachineCombinerPattern::FMLAv4f32_OP2:
3241 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3242 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3243 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3244 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3245 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3246 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3247 case MachineCombinerPattern::FMLSv2f32_OP2:
3248 case MachineCombinerPattern::FMLSv2f64_OP2:
3249 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3250 case MachineCombinerPattern::FMLSv4f32_OP2:
3252 } // end switch (Pattern)
3255 /// Return true when there is potentially a faster code sequence for an
3256 /// instruction chain ending in \p Root. All potential patterns are listed in
3257 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3258 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3260 bool AArch64InstrInfo::getMachineCombinerPatterns(
3262 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3264 if (getMaddPatterns(Root, Patterns))
3266 // Floating point patterns
3267 if (getFMAPatterns(Root, Patterns))
3270 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3273 enum class FMAInstKind { Default, Indexed, Accumulator };
3274 /// genFusedMultiply - Generate fused multiply instructions.
3275 /// This function supports both integer and floating point instructions.
3276 /// A typical example:
3279 /// ==> F|MADD R,A,B,C
3280 /// \param Root is the F|ADD instruction
3281 /// \param [out] InsInstrs is a vector of machine instructions and will
3282 /// contain the generated madd instruction
3283 /// \param IdxMulOpd is index of operand in Root that is the result of
3284 /// the F|MUL. In the example above IdxMulOpd is 1.
3285 /// \param MaddOpc the opcode fo the f|madd instruction
3286 static MachineInstr *
3287 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3288 const TargetInstrInfo *TII, MachineInstr &Root,
3289 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3290 unsigned MaddOpc, const TargetRegisterClass *RC,
3291 FMAInstKind kind = FMAInstKind::Default) {
3292 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3294 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3295 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3296 unsigned ResultReg = Root.getOperand(0).getReg();
3297 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3298 bool Src0IsKill = MUL->getOperand(1).isKill();
3299 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3300 bool Src1IsKill = MUL->getOperand(2).isKill();
3301 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3302 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3304 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3305 MRI.constrainRegClass(ResultReg, RC);
3306 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3307 MRI.constrainRegClass(SrcReg0, RC);
3308 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3309 MRI.constrainRegClass(SrcReg1, RC);
3310 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3311 MRI.constrainRegClass(SrcReg2, RC);
3313 MachineInstrBuilder MIB;
3314 if (kind == FMAInstKind::Default)
3315 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3316 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3317 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3318 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3319 else if (kind == FMAInstKind::Indexed)
3320 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3321 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3322 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3323 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3324 .addImm(MUL->getOperand(3).getImm());
3325 else if (kind == FMAInstKind::Accumulator)
3326 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3327 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3328 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3329 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3331 assert(false && "Invalid FMA instruction kind \n");
3332 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3333 InsInstrs.push_back(MIB);
3337 /// genMaddR - Generate madd instruction and combine mul and add using
3338 /// an extra virtual register
3339 /// Example - an ADD intermediate needs to be stored in a register:
3342 /// ==> ORR V, ZR, Imm
3343 /// ==> MADD R,A,B,V
3344 /// \param Root is the ADD instruction
3345 /// \param [out] InsInstrs is a vector of machine instructions and will
3346 /// contain the generated madd instruction
3347 /// \param IdxMulOpd is index of operand in Root that is the result of
3348 /// the MUL. In the example above IdxMulOpd is 1.
3349 /// \param MaddOpc the opcode fo the madd instruction
3350 /// \param VR is a virtual register that holds the value of an ADD operand
3351 /// (V in the example above).
3352 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3353 const TargetInstrInfo *TII, MachineInstr &Root,
3354 SmallVectorImpl<MachineInstr *> &InsInstrs,
3355 unsigned IdxMulOpd, unsigned MaddOpc,
3356 unsigned VR, const TargetRegisterClass *RC) {
3357 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3359 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3360 unsigned ResultReg = Root.getOperand(0).getReg();
3361 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3362 bool Src0IsKill = MUL->getOperand(1).isKill();
3363 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3364 bool Src1IsKill = MUL->getOperand(2).isKill();
3366 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3367 MRI.constrainRegClass(ResultReg, RC);
3368 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3369 MRI.constrainRegClass(SrcReg0, RC);
3370 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3371 MRI.constrainRegClass(SrcReg1, RC);
3372 if (TargetRegisterInfo::isVirtualRegister(VR))
3373 MRI.constrainRegClass(VR, RC);
3375 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3377 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3378 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3381 InsInstrs.push_back(MIB);
3385 /// When getMachineCombinerPatterns() finds potential patterns,
3386 /// this function generates the instructions that could replace the
3387 /// original code sequence
3388 void AArch64InstrInfo::genAlternativeCodeSequence(
3389 MachineInstr &Root, MachineCombinerPattern Pattern,
3390 SmallVectorImpl<MachineInstr *> &InsInstrs,
3391 SmallVectorImpl<MachineInstr *> &DelInstrs,
3392 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3393 MachineBasicBlock &MBB = *Root.getParent();
3394 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3395 MachineFunction &MF = *MBB.getParent();
3396 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3399 const TargetRegisterClass *RC;
3403 // Reassociate instructions.
3404 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3405 DelInstrs, InstrIdxForVirtReg);
3407 case MachineCombinerPattern::MULADDW_OP1:
3408 case MachineCombinerPattern::MULADDX_OP1:
3412 // --- Create(MADD);
3413 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3414 Opc = AArch64::MADDWrrr;
3415 RC = &AArch64::GPR32RegClass;
3417 Opc = AArch64::MADDXrrr;
3418 RC = &AArch64::GPR64RegClass;
3420 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3422 case MachineCombinerPattern::MULADDW_OP2:
3423 case MachineCombinerPattern::MULADDX_OP2:
3427 // --- Create(MADD);
3428 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3429 Opc = AArch64::MADDWrrr;
3430 RC = &AArch64::GPR32RegClass;
3432 Opc = AArch64::MADDXrrr;
3433 RC = &AArch64::GPR64RegClass;
3435 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3437 case MachineCombinerPattern::MULADDWI_OP1:
3438 case MachineCombinerPattern::MULADDXI_OP1: {
3441 // ==> ORR V, ZR, Imm
3443 // --- Create(MADD);
3444 const TargetRegisterClass *OrrRC;
3445 unsigned BitSize, OrrOpc, ZeroReg;
3446 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3447 OrrOpc = AArch64::ORRWri;
3448 OrrRC = &AArch64::GPR32spRegClass;
3450 ZeroReg = AArch64::WZR;
3451 Opc = AArch64::MADDWrrr;
3452 RC = &AArch64::GPR32RegClass;
3454 OrrOpc = AArch64::ORRXri;
3455 OrrRC = &AArch64::GPR64spRegClass;
3457 ZeroReg = AArch64::XZR;
3458 Opc = AArch64::MADDXrrr;
3459 RC = &AArch64::GPR64RegClass;
3461 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3462 uint64_t Imm = Root.getOperand(2).getImm();
3464 if (Root.getOperand(3).isImm()) {
3465 unsigned Val = Root.getOperand(3).getImm();
3468 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
3470 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3471 MachineInstrBuilder MIB1 =
3472 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3475 InsInstrs.push_back(MIB1);
3476 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3477 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3481 case MachineCombinerPattern::MULSUBW_OP1:
3482 case MachineCombinerPattern::MULSUBX_OP1: {
3486 // ==> MADD R,A,B,V // = -C + A*B
3487 // --- Create(MADD);
3488 const TargetRegisterClass *SubRC;
3489 unsigned SubOpc, ZeroReg;
3490 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
3491 SubOpc = AArch64::SUBWrr;
3492 SubRC = &AArch64::GPR32spRegClass;
3493 ZeroReg = AArch64::WZR;
3494 Opc = AArch64::MADDWrrr;
3495 RC = &AArch64::GPR32RegClass;
3497 SubOpc = AArch64::SUBXrr;
3498 SubRC = &AArch64::GPR64spRegClass;
3499 ZeroReg = AArch64::XZR;
3500 Opc = AArch64::MADDXrrr;
3501 RC = &AArch64::GPR64RegClass;
3503 unsigned NewVR = MRI.createVirtualRegister(SubRC);
3505 MachineInstrBuilder MIB1 =
3506 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
3508 .addOperand(Root.getOperand(2));
3509 InsInstrs.push_back(MIB1);
3510 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3511 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3514 case MachineCombinerPattern::MULSUBW_OP2:
3515 case MachineCombinerPattern::MULSUBX_OP2:
3518 // ==> MSUB R,A,B,C (computes C - A*B)
3519 // --- Create(MSUB);
3520 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
3521 Opc = AArch64::MSUBWrrr;
3522 RC = &AArch64::GPR32RegClass;
3524 Opc = AArch64::MSUBXrrr;
3525 RC = &AArch64::GPR64RegClass;
3527 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3529 case MachineCombinerPattern::MULSUBWI_OP1:
3530 case MachineCombinerPattern::MULSUBXI_OP1: {
3533 // ==> ORR V, ZR, -Imm
3534 // ==> MADD R,A,B,V // = -Imm + A*B
3535 // --- Create(MADD);
3536 const TargetRegisterClass *OrrRC;
3537 unsigned BitSize, OrrOpc, ZeroReg;
3538 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
3539 OrrOpc = AArch64::ORRWri;
3540 OrrRC = &AArch64::GPR32spRegClass;
3542 ZeroReg = AArch64::WZR;
3543 Opc = AArch64::MADDWrrr;
3544 RC = &AArch64::GPR32RegClass;
3546 OrrOpc = AArch64::ORRXri;
3547 OrrRC = &AArch64::GPR64spRegClass;
3549 ZeroReg = AArch64::XZR;
3550 Opc = AArch64::MADDXrrr;
3551 RC = &AArch64::GPR64RegClass;
3553 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3554 int Imm = Root.getOperand(2).getImm();
3555 if (Root.getOperand(3).isImm()) {
3556 unsigned Val = Root.getOperand(3).getImm();
3559 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
3561 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3562 MachineInstrBuilder MIB1 =
3563 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3566 InsInstrs.push_back(MIB1);
3567 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3568 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3572 // Floating Point Support
3573 case MachineCombinerPattern::FMULADDS_OP1:
3574 case MachineCombinerPattern::FMULADDD_OP1:
3578 // --- Create(MADD);
3579 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3580 Opc = AArch64::FMADDSrrr;
3581 RC = &AArch64::FPR32RegClass;
3583 Opc = AArch64::FMADDDrrr;
3584 RC = &AArch64::FPR64RegClass;
3586 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3588 case MachineCombinerPattern::FMULADDS_OP2:
3589 case MachineCombinerPattern::FMULADDD_OP2:
3592 // ==> FMADD R,A,B,C
3593 // --- Create(FMADD);
3594 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3595 Opc = AArch64::FMADDSrrr;
3596 RC = &AArch64::FPR32RegClass;
3598 Opc = AArch64::FMADDDrrr;
3599 RC = &AArch64::FPR64RegClass;
3601 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3604 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3605 Opc = AArch64::FMLAv1i32_indexed;
3606 RC = &AArch64::FPR32RegClass;
3607 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3608 FMAInstKind::Indexed);
3610 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3611 Opc = AArch64::FMLAv1i32_indexed;
3612 RC = &AArch64::FPR32RegClass;
3613 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3614 FMAInstKind::Indexed);
3617 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3618 Opc = AArch64::FMLAv1i64_indexed;
3619 RC = &AArch64::FPR64RegClass;
3620 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3621 FMAInstKind::Indexed);
3623 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3624 Opc = AArch64::FMLAv1i64_indexed;
3625 RC = &AArch64::FPR64RegClass;
3626 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3627 FMAInstKind::Indexed);
3630 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3631 case MachineCombinerPattern::FMLAv2f32_OP1:
3632 RC = &AArch64::FPR64RegClass;
3633 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3634 Opc = AArch64::FMLAv2i32_indexed;
3635 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3636 FMAInstKind::Indexed);
3638 Opc = AArch64::FMLAv2f32;
3639 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3640 FMAInstKind::Accumulator);
3643 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3644 case MachineCombinerPattern::FMLAv2f32_OP2:
3645 RC = &AArch64::FPR64RegClass;
3646 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3647 Opc = AArch64::FMLAv2i32_indexed;
3648 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3649 FMAInstKind::Indexed);
3651 Opc = AArch64::FMLAv2f32;
3652 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3653 FMAInstKind::Accumulator);
3657 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3658 case MachineCombinerPattern::FMLAv2f64_OP1:
3659 RC = &AArch64::FPR128RegClass;
3660 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3661 Opc = AArch64::FMLAv2i64_indexed;
3662 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3663 FMAInstKind::Indexed);
3665 Opc = AArch64::FMLAv2f64;
3666 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3667 FMAInstKind::Accumulator);
3670 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3671 case MachineCombinerPattern::FMLAv2f64_OP2:
3672 RC = &AArch64::FPR128RegClass;
3673 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3674 Opc = AArch64::FMLAv2i64_indexed;
3675 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3676 FMAInstKind::Indexed);
3678 Opc = AArch64::FMLAv2f64;
3679 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3680 FMAInstKind::Accumulator);
3684 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3685 case MachineCombinerPattern::FMLAv4f32_OP1:
3686 RC = &AArch64::FPR128RegClass;
3687 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3688 Opc = AArch64::FMLAv4i32_indexed;
3689 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3690 FMAInstKind::Indexed);
3692 Opc = AArch64::FMLAv4f32;
3693 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3694 FMAInstKind::Accumulator);
3698 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3699 case MachineCombinerPattern::FMLAv4f32_OP2:
3700 RC = &AArch64::FPR128RegClass;
3701 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3702 Opc = AArch64::FMLAv4i32_indexed;
3703 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3704 FMAInstKind::Indexed);
3706 Opc = AArch64::FMLAv4f32;
3707 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3708 FMAInstKind::Accumulator);
3712 case MachineCombinerPattern::FMULSUBS_OP1:
3713 case MachineCombinerPattern::FMULSUBD_OP1: {
3716 // ==> FNMSUB R,A,B,C // = -C + A*B
3717 // --- Create(FNMSUB);
3718 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3719 Opc = AArch64::FNMSUBSrrr;
3720 RC = &AArch64::FPR32RegClass;
3722 Opc = AArch64::FNMSUBDrrr;
3723 RC = &AArch64::FPR64RegClass;
3725 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3728 case MachineCombinerPattern::FMULSUBS_OP2:
3729 case MachineCombinerPattern::FMULSUBD_OP2: {
3732 // ==> FMSUB R,A,B,C (computes C - A*B)
3733 // --- Create(FMSUB);
3734 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3735 Opc = AArch64::FMSUBSrrr;
3736 RC = &AArch64::FPR32RegClass;
3738 Opc = AArch64::FMSUBDrrr;
3739 RC = &AArch64::FPR64RegClass;
3741 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3744 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3745 Opc = AArch64::FMLSv1i32_indexed;
3746 RC = &AArch64::FPR32RegClass;
3747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3748 FMAInstKind::Indexed);
3751 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3752 Opc = AArch64::FMLSv1i64_indexed;
3753 RC = &AArch64::FPR64RegClass;
3754 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3755 FMAInstKind::Indexed);
3758 case MachineCombinerPattern::FMLSv2f32_OP2:
3759 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3760 RC = &AArch64::FPR64RegClass;
3761 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3762 Opc = AArch64::FMLSv2i32_indexed;
3763 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3764 FMAInstKind::Indexed);
3766 Opc = AArch64::FMLSv2f32;
3767 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3768 FMAInstKind::Accumulator);
3772 case MachineCombinerPattern::FMLSv2f64_OP2:
3773 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3774 RC = &AArch64::FPR128RegClass;
3775 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3776 Opc = AArch64::FMLSv2i64_indexed;
3777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3778 FMAInstKind::Indexed);
3780 Opc = AArch64::FMLSv2f64;
3781 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3782 FMAInstKind::Accumulator);
3786 case MachineCombinerPattern::FMLSv4f32_OP2:
3787 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3788 RC = &AArch64::FPR128RegClass;
3789 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3790 Opc = AArch64::FMLSv4i32_indexed;
3791 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3792 FMAInstKind::Indexed);
3794 Opc = AArch64::FMLSv4f32;
3795 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3796 FMAInstKind::Accumulator);
3800 } // end switch (Pattern)
3801 // Record MUL and ADD/SUB for deletion
3802 DelInstrs.push_back(MUL);
3803 DelInstrs.push_back(&Root);
3808 /// \brief Replace csincr-branch sequence by simple conditional branch
3812 /// csinc w9, wzr, wzr, <condition code>
3813 /// tbnz w9, #0, 0x44
3815 /// b.<inverted condition code>
3818 /// csinc w9, wzr, wzr, <condition code>
3819 /// tbz w9, #0, 0x44
3821 /// b.<condition code>
3823 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3824 /// compare's constant operand is power of 2.
3827 /// and w8, w8, #0x400
3830 /// tbnz w8, #10, L1
3832 /// \param MI Conditional Branch
3833 /// \return True when the simple conditional branch is generated
3835 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
3836 bool IsNegativeBranch = false;
3837 bool IsTestAndBranch = false;
3838 unsigned TargetBBInMI = 0;
3839 switch (MI.getOpcode()) {
3841 llvm_unreachable("Unknown branch instruction?");
3848 case AArch64::CBNZW:
3849 case AArch64::CBNZX:
3851 IsNegativeBranch = true;
3856 IsTestAndBranch = true;
3858 case AArch64::TBNZW:
3859 case AArch64::TBNZX:
3861 IsNegativeBranch = true;
3862 IsTestAndBranch = true;
3865 // So we increment a zero register and test for bits other
3866 // than bit 0? Conservatively bail out in case the verifier
3867 // missed this case.
3868 if (IsTestAndBranch && MI.getOperand(1).getImm())
3872 assert(MI.getParent() && "Incomplete machine instruciton\n");
3873 MachineBasicBlock *MBB = MI.getParent();
3874 MachineFunction *MF = MBB->getParent();
3875 MachineRegisterInfo *MRI = &MF->getRegInfo();
3876 unsigned VReg = MI.getOperand(0).getReg();
3877 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3880 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3882 // Look through COPY instructions to find definition.
3883 while (DefMI->isCopy()) {
3884 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3885 if (!MRI->hasOneNonDBGUse(CopyVReg))
3887 if (!MRI->hasOneDef(CopyVReg))
3889 DefMI = MRI->getVRegDef(CopyVReg);
3892 switch (DefMI->getOpcode()) {
3895 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3896 case AArch64::ANDWri:
3897 case AArch64::ANDXri: {
3898 if (IsTestAndBranch)
3900 if (DefMI->getParent() != MBB)
3902 if (!MRI->hasOneNonDBGUse(VReg))
3905 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
3906 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
3907 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
3908 if (!isPowerOf2_64(Mask))
3911 MachineOperand &MO = DefMI->getOperand(1);
3912 unsigned NewReg = MO.getReg();
3913 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3916 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3918 MachineBasicBlock &RefToMBB = *MBB;
3919 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3920 DebugLoc DL = MI.getDebugLoc();
3921 unsigned Imm = Log2_64(Mask);
3922 unsigned Opc = (Imm < 32)
3923 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3924 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
3925 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3929 // Register lives on to the CBZ now.
3930 MO.setIsKill(false);
3932 // For immediate smaller than 32, we need to use the 32-bit
3933 // variant (W) in all cases. Indeed the 64-bit variant does not
3934 // allow to encode them.
3935 // Therefore, if the input register is 64-bit, we need to take the
3937 if (!Is32Bit && Imm < 32)
3938 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
3939 MI.eraseFromParent();
3943 case AArch64::CSINCWr:
3944 case AArch64::CSINCXr: {
3945 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3946 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3947 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3948 DefMI->getOperand(2).getReg() == AArch64::XZR))
3951 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3954 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
3955 // Convert only when the condition code is not modified between
3956 // the CSINC and the branch. The CC may be used by other
3957 // instructions in between.
3958 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
3960 MachineBasicBlock &RefToMBB = *MBB;
3961 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
3962 DebugLoc DL = MI.getDebugLoc();
3963 if (IsNegativeBranch)
3964 CC = AArch64CC::getInvertedCondCode(CC);
3965 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
3966 MI.eraseFromParent();
3972 std::pair<unsigned, unsigned>
3973 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3974 const unsigned Mask = AArch64II::MO_FRAGMENT;
3975 return std::make_pair(TF & Mask, TF & ~Mask);
3978 ArrayRef<std::pair<unsigned, const char *>>
3979 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3980 using namespace AArch64II;
3981 static const std::pair<unsigned, const char *> TargetFlags[] = {
3982 {MO_PAGE, "aarch64-page"},
3983 {MO_PAGEOFF, "aarch64-pageoff"},
3984 {MO_G3, "aarch64-g3"},
3985 {MO_G2, "aarch64-g2"},
3986 {MO_G1, "aarch64-g1"},
3987 {MO_G0, "aarch64-g0"},
3988 {MO_HI12, "aarch64-hi12"}};
3989 return makeArrayRef(TargetFlags);
3992 ArrayRef<std::pair<unsigned, const char *>>
3993 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
3994 using namespace AArch64II;
3995 static const std::pair<unsigned, const char *> TargetFlags[] = {
3996 {MO_GOT, "aarch64-got"},
3997 {MO_NC, "aarch64-nc"},
3998 {MO_TLS, "aarch64-tls"}};
3999 return makeArrayRef(TargetFlags);