1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
122 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
123 MachineBasicBlock::const_iterator Before);
124 unsigned findFreeReg(const TargetRegisterClass &RegClass);
125 void UpdateBaseRegUses(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
127 DebugLoc DL, unsigned Base, unsigned WordOffset,
128 ARMCC::CondCodes Pred, unsigned PredReg);
129 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
130 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
131 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
132 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
133 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
135 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
136 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
137 void FormCandidates(const MemOpQueue &MemOps);
138 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
139 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
140 MachineBasicBlock::iterator &MBBI);
141 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
142 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
143 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
144 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
146 char ARMLoadStoreOpt::ID = 0;
149 static bool definesCPSR(const MachineInstr *MI) {
150 for (const auto &MO : MI->operands()) {
153 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
154 // If the instruction has live CPSR def, then it's not safe to fold it
155 // into load / store.
162 static int getMemoryOpOffset(const MachineInstr *MI) {
163 unsigned Opcode = MI->getOpcode();
164 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
165 unsigned NumOperands = MI->getDesc().getNumOperands();
166 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
168 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
169 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
170 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
171 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
174 // Thumb1 immediate offsets are scaled by 4
175 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
176 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
179 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
180 : ARM_AM::getAM5Offset(OffField) * 4;
181 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
182 : ARM_AM::getAM5Op(OffField);
184 if (Op == ARM_AM::sub)
190 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
191 return MI.getOperand(1);
194 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
195 return MI.getOperand(0);
198 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
200 default: llvm_unreachable("Unhandled opcode!");
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::LDMIA;
206 case ARM_AM::da: return ARM::LDMDA;
207 case ARM_AM::db: return ARM::LDMDB;
208 case ARM_AM::ib: return ARM::LDMIB;
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::STMIA;
215 case ARM_AM::da: return ARM::STMDA;
216 case ARM_AM::db: return ARM::STMDB;
217 case ARM_AM::ib: return ARM::STMIB;
221 // tLDMIA is writeback-only - unless the base register is in the input
225 default: llvm_unreachable("Unhandled submode!");
226 case ARM_AM::ia: return ARM::tLDMIA;
230 // There is no non-writeback tSTMIA either.
233 default: llvm_unreachable("Unhandled submode!");
234 case ARM_AM::ia: return ARM::tSTMIA_UPD;
240 default: llvm_unreachable("Unhandled submode!");
241 case ARM_AM::ia: return ARM::t2LDMIA;
242 case ARM_AM::db: return ARM::t2LDMDB;
248 default: llvm_unreachable("Unhandled submode!");
249 case ARM_AM::ia: return ARM::t2STMIA;
250 case ARM_AM::db: return ARM::t2STMDB;
255 default: llvm_unreachable("Unhandled submode!");
256 case ARM_AM::ia: return ARM::VLDMSIA;
257 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
262 default: llvm_unreachable("Unhandled submode!");
263 case ARM_AM::ia: return ARM::VSTMSIA;
264 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
269 default: llvm_unreachable("Unhandled submode!");
270 case ARM_AM::ia: return ARM::VLDMDIA;
271 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
276 default: llvm_unreachable("Unhandled submode!");
277 case ARM_AM::ia: return ARM::VSTMDIA;
278 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
283 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
285 default: llvm_unreachable("Unhandled opcode!");
292 case ARM::tLDMIA_UPD:
293 case ARM::tSTMIA_UPD:
294 case ARM::t2LDMIA_RET:
296 case ARM::t2LDMIA_UPD:
298 case ARM::t2STMIA_UPD:
300 case ARM::VLDMSIA_UPD:
302 case ARM::VSTMSIA_UPD:
304 case ARM::VLDMDIA_UPD:
306 case ARM::VSTMDIA_UPD:
320 case ARM::t2LDMDB_UPD:
322 case ARM::t2STMDB_UPD:
323 case ARM::VLDMSDB_UPD:
324 case ARM::VSTMSDB_UPD:
325 case ARM::VLDMDDB_UPD:
326 case ARM::VSTMDDB_UPD:
337 static bool isT1i32Load(unsigned Opc) {
338 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
341 static bool isT2i32Load(unsigned Opc) {
342 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
345 static bool isi32Load(unsigned Opc) {
346 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
349 static bool isT1i32Store(unsigned Opc) {
350 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
353 static bool isT2i32Store(unsigned Opc) {
354 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
357 static bool isi32Store(unsigned Opc) {
358 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
361 static bool isLoadSingle(unsigned Opc) {
362 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
365 static unsigned getImmScale(unsigned Opc) {
367 default: llvm_unreachable("Unhandled opcode!");
382 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
383 switch (MI->getOpcode()) {
410 case ARM::tLDMIA_UPD:
411 case ARM::tSTMIA_UPD:
418 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
421 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
425 /// Update future uses of the base register with the offset introduced
426 /// due to writeback. This function only works on Thumb1.
428 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
429 MachineBasicBlock::iterator MBBI,
430 DebugLoc DL, unsigned Base,
432 ARMCC::CondCodes Pred, unsigned PredReg) {
433 assert(isThumb1 && "Can only update base register uses for Thumb1!");
434 // Start updating any instructions with immediate offsets. Insert a SUB before
435 // the first non-updateable instruction (if any).
436 for (; MBBI != MBB.end(); ++MBBI) {
437 bool InsertSub = false;
438 unsigned Opc = MBBI->getOpcode();
440 if (MBBI->readsRegister(Base)) {
443 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
445 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
447 if (IsLoad || IsStore) {
448 // Loads and stores with immediate offsets can be updated, but only if
449 // the new offset isn't negative.
450 // The MachineOperand containing the offset immediate is the last one
451 // before predicates.
453 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
454 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
455 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
457 // If storing the base register, it needs to be reset first.
458 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
460 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
465 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
466 !definesCPSR(MBBI)) {
467 // SUBS/ADDS using this register, with a dead def of the CPSR.
468 // Merge it with the update; if the merged offset is too large,
469 // insert a new sub instead.
471 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
472 Offset = (Opc == ARM::tSUBi8) ?
473 MO.getImm() + WordOffset * 4 :
474 MO.getImm() - WordOffset * 4 ;
475 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
476 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
479 // The base register has now been reset, so exit early.
486 // Can't update the instruction.
490 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
491 // Since SUBS sets the condition flags, we can't place the base reset
492 // after an instruction that has a live CPSR def.
493 // The base register might also contain an argument for a function call.
498 // An instruction above couldn't be updated, so insert a sub.
499 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
500 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
504 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
505 // Register got killed. Stop updating.
509 // End of block was reached.
510 if (MBB.succ_size() > 0) {
511 // FIXME: Because of a bug, live registers are sometimes missing from
512 // the successor blocks' live-in sets. This means we can't trust that
513 // information and *always* have to reset at the end of a block.
515 if (MBBI != MBB.end()) --MBBI;
517 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
518 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
522 /// Return the first register of class \p RegClass that is not in \p Regs.
523 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
524 if (!RegClassInfoValid) {
525 RegClassInfo.runOnMachineFunction(*MF);
526 RegClassInfoValid = true;
529 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
530 if (!LiveRegs.contains(Reg))
535 /// Compute live registers just before instruction \p Before (in normal schedule
536 /// direction). Computes backwards so multiple queries in the same block must
537 /// come in reverse order.
538 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
539 MachineBasicBlock::const_iterator Before) {
540 // Initialize if we never queried in this block.
541 if (!LiveRegsValid) {
543 LiveRegs.addLiveOuts(&MBB, true);
544 LiveRegPos = MBB.end();
545 LiveRegsValid = true;
547 // Move backward just before the "Before" position.
548 while (LiveRegPos != Before) {
550 LiveRegs.stepBackward(*LiveRegPos);
554 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
556 for (const std::pair<unsigned, bool> &R : Regs)
562 /// Create and insert a LDM or STM with Base as base register and registers in
563 /// Regs as the register operands that would be loaded / stored. It returns
564 /// true if the transformation is done.
565 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
566 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
567 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
568 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
569 unsigned NumRegs = Regs.size();
572 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
573 // Compute liveness information for that register to make the decision.
574 bool SafeToClobberCPSR = !isThumb1 ||
575 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
576 MachineBasicBlock::LQR_Dead);
578 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
580 // Exception: If the base register is in the input reglist, Thumb1 LDM is
582 // It's also not possible to merge an STR of the base register in Thumb1.
583 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
584 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
585 if (Opcode == ARM::tLDRi) {
587 } else if (Opcode == ARM::tSTRi) {
592 ARM_AM::AMSubMode Mode = ARM_AM::ia;
593 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
594 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
595 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
597 if (Offset == 4 && haveIBAndDA) {
599 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
602 // VLDM/VSTM do not support DB mode without also updating the base reg.
604 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
605 // Check if this is a supported opcode before inserting instructions to
606 // calculate a new base register.
607 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
609 // If starting offset isn't zero, insert a MI to materialize a new base.
610 // But only do so if it is cost effective, i.e. merging more than two
615 // On Thumb1, it's not worth materializing a new base register without
616 // clobbering the CPSR (i.e. not using ADDS/SUBS).
617 if (!SafeToClobberCPSR)
621 if (isi32Load(Opcode)) {
622 // If it is a load, then just use one of the destination register to
623 // use as the new base.
624 NewBase = Regs[NumRegs-1].first;
626 // Find a free register that we can use as scratch register.
627 moveLiveRegsBefore(MBB, InsertBefore);
628 // The merged instruction does not exist yet but will use several Regs if
630 if (!isLoadSingle(Opcode))
631 for (const std::pair<unsigned, bool> &R : Regs)
632 LiveRegs.addReg(R.first);
634 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
640 isThumb2 ? ARM::t2ADDri :
641 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
642 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
643 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
648 isThumb2 ? ARM::t2SUBri :
649 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
650 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
653 if (!TL->isLegalAddImmediate(Offset))
654 // FIXME: Try add with register operand?
655 return nullptr; // Probably not worth it then.
657 // We can only append a kill flag to the add/sub input if the value is not
658 // used in the register list of the stm as well.
659 bool KillOldBase = BaseKill &&
660 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
663 // Thumb1: depending on immediate size, use either
664 // ADDS NewBase, Base, #imm3
667 // ADDS NewBase, #imm8.
668 if (Base != NewBase &&
669 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
670 // Need to insert a MOV to the new base first.
671 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
673 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
674 if (Pred != ARMCC::AL)
676 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
677 .addReg(Base, getKillRegState(KillOldBase));
679 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
680 .addReg(Base, getKillRegState(KillOldBase))
681 .addImm(Pred).addReg(PredReg);
683 // The following ADDS/SUBS becomes an update.
687 if (BaseOpc == ARM::tADDrSPi) {
688 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
691 .addImm(Pred).addReg(PredReg);
694 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
695 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
696 .addImm(Pred).addReg(PredReg);
698 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
699 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
700 .addImm(Pred).addReg(PredReg).addReg(0);
703 BaseKill = true; // New base is always killed straight away.
706 bool isDef = isLoadSingle(Opcode);
708 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
709 // base register writeback.
710 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
714 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
715 // - There is no writeback (LDM of base register),
716 // - the base register is killed by the merged instruction,
717 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
718 // to reset the base register.
719 // Otherwise, don't merge.
720 // It's safe to return here since the code to materialize a new base register
721 // above is also conditional on SafeToClobberCPSR.
722 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
725 MachineInstrBuilder MIB;
728 if (Opcode == ARM::tLDMIA)
729 // Update tLDMIA with writeback if necessary.
730 Opcode = ARM::tLDMIA_UPD;
732 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
734 // Thumb1: we might need to set base writeback when building the MI.
735 MIB.addReg(Base, getDefRegState(true))
736 .addReg(Base, getKillRegState(BaseKill));
738 // The base isn't dead after a merged instruction with writeback.
739 // Insert a sub instruction after the newly formed instruction to reset.
741 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
744 // No writeback, simply build the MachineInstr.
745 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
746 MIB.addReg(Base, getKillRegState(BaseKill));
749 MIB.addImm(Pred).addReg(PredReg);
751 for (const std::pair<unsigned, bool> &R : Regs)
752 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
754 return MIB.getInstr();
757 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
758 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
759 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
760 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
761 bool IsLoad = isi32Load(Opcode);
762 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
763 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
765 assert(Regs.size() == 2);
766 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
767 TII->get(LoadStoreOpcode));
769 MIB.addReg(Regs[0].first, RegState::Define)
770 .addReg(Regs[1].first, RegState::Define);
772 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
773 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
775 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
776 return MIB.getInstr();
779 /// Call MergeOps and update MemOps and merges accordingly on success.
780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
781 const MachineInstr *First = Cand.Instrs.front();
782 unsigned Opcode = First->getOpcode();
783 bool IsLoad = isLoadSingle(Opcode);
784 SmallVector<std::pair<unsigned, bool>, 8> Regs;
785 SmallVector<unsigned, 4> ImpDefs;
786 DenseSet<unsigned> KilledRegs;
787 // Determine list of registers and list of implicit super-register defs.
788 for (const MachineInstr *MI : Cand.Instrs) {
789 const MachineOperand &MO = getLoadStoreRegOp(*MI);
790 unsigned Reg = MO.getReg();
791 bool IsKill = MO.isKill();
793 KilledRegs.insert(Reg);
794 Regs.push_back(std::make_pair(Reg, IsKill));
797 // Collect any implicit defs of super-registers, after merging we can't
798 // be sure anymore that we properly preserved these live ranges and must
799 // removed these implicit operands.
800 for (const MachineOperand &MO : MI->implicit_operands()) {
801 if (!MO.isReg() || !MO.isDef() || MO.isDead())
803 assert(MO.isImplicit());
804 unsigned DefReg = MO.getReg();
806 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
808 // We can ignore cases where the super-reg is read and written.
809 if (MI->readsRegister(DefReg))
811 ImpDefs.push_back(DefReg);
816 // Attempt the merge.
817 typedef MachineBasicBlock::iterator iterator;
818 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
819 iterator InsertBefore = std::next(iterator(LatestMI));
820 MachineBasicBlock &MBB = *LatestMI->getParent();
821 unsigned Offset = getMemoryOpOffset(First);
822 unsigned Base = getLoadStoreBaseOp(*First).getReg();
823 bool BaseKill = LatestMI->killsRegister(Base);
824 unsigned PredReg = 0;
825 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
826 DebugLoc DL = First->getDebugLoc();
827 MachineInstr *Merged = nullptr;
828 if (Cand.CanMergeToLSDouble)
829 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
830 Opcode, Pred, PredReg, DL, Regs);
831 if (!Merged && Cand.CanMergeToLSMulti)
832 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
833 Opcode, Pred, PredReg, DL, Regs);
837 // Determine earliest instruction that will get removed. We then keep an
838 // iterator just above it so the following erases don't invalidated it.
839 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
840 bool EarliestAtBegin = false;
841 if (EarliestI == MBB.begin()) {
842 EarliestAtBegin = true;
844 EarliestI = std::prev(EarliestI);
847 // Remove instructions which have been merged.
848 for (MachineInstr *MI : Cand.Instrs)
851 // Determine range between the earliest removed instruction and the new one.
853 EarliestI = MBB.begin();
855 EarliestI = std::next(EarliestI);
856 auto FixupRange = make_range(EarliestI, iterator(Merged));
858 if (isLoadSingle(Opcode)) {
859 // If the previous loads defined a super-reg, then we have to mark earlier
860 // operands undef; Replicate the super-reg def on the merged instruction.
861 for (MachineInstr &MI : FixupRange) {
862 for (unsigned &ImpDefReg : ImpDefs) {
863 for (MachineOperand &MO : MI.implicit_operands()) {
864 if (!MO.isReg() || MO.getReg() != ImpDefReg)
874 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
875 for (unsigned ImpDef : ImpDefs)
876 MIB.addReg(ImpDef, RegState::ImplicitDefine);
878 // Remove kill flags: We are possibly storing the values later now.
879 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
880 for (MachineInstr &MI : FixupRange) {
881 for (MachineOperand &MO : MI.uses()) {
882 if (!MO.isReg() || !MO.isKill())
884 if (KilledRegs.count(MO.getReg()))
888 assert(ImpDefs.empty());
894 static bool isValidLSDoubleOffset(int Offset) {
895 unsigned Value = abs(Offset);
896 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
898 return (Value % 4) == 0 && Value < 1024;
901 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
902 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
903 const MachineInstr *FirstMI = MemOps[0].MI;
904 unsigned Opcode = FirstMI->getOpcode();
905 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
906 unsigned Size = getLSMultipleTransferSize(FirstMI);
909 unsigned EIndex = MemOps.size();
911 // Look at the first instruction.
912 const MachineInstr *MI = MemOps[SIndex].MI;
913 int Offset = MemOps[SIndex].Offset;
914 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
915 unsigned PReg = PMO.getReg();
916 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
917 unsigned Latest = SIndex;
918 unsigned Earliest = SIndex;
920 bool CanMergeToLSDouble =
921 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
922 // ARM errata 602117: LDRD with base in list may result in incorrect base
923 // register when interrupted or faulted.
924 if (STI->isCortexM3() && isi32Load(Opcode) &&
925 PReg == getLoadStoreBaseOp(*MI).getReg())
926 CanMergeToLSDouble = false;
928 bool CanMergeToLSMulti = true;
929 // On swift vldm/vstm starting with an odd register number as that needs
930 // more uops than single vldrs.
931 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
932 CanMergeToLSMulti = false;
934 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
935 // deprecated; LDM to PC is fine but cannot happen here.
936 if (PReg == ARM::SP || PReg == ARM::PC)
937 CanMergeToLSMulti = CanMergeToLSDouble = false;
939 // Merge following instructions where possible.
940 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
941 int NewOffset = MemOps[I].Offset;
942 if (NewOffset != Offset + (int)Size)
944 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
945 unsigned Reg = MO.getReg();
946 if (Reg == ARM::SP || Reg == ARM::PC)
949 // See if the current load/store may be part of a multi load/store.
950 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
951 bool PartOfLSMulti = CanMergeToLSMulti;
953 // Register numbers must be in ascending order.
954 if (RegNum <= PRegNum)
955 PartOfLSMulti = false;
956 // For VFP / NEON load/store multiples, the registers must be
957 // consecutive and within the limit on the number of registers per
959 else if (!isNotVFP && RegNum != PRegNum+1)
960 PartOfLSMulti = false;
962 // See if the current load/store may be part of a double load/store.
963 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
965 if (!PartOfLSMulti && !PartOfLSDouble)
967 CanMergeToLSMulti &= PartOfLSMulti;
968 CanMergeToLSDouble &= PartOfLSDouble;
969 // Track MemOp with latest and earliest position (Positions are
970 // counted in reverse).
971 unsigned Position = MemOps[I].Position;
972 if (Position < MemOps[Latest].Position)
974 else if (Position > MemOps[Earliest].Position)
976 // Prepare for next MemOp.
981 // Form a candidate from the Ops collected so far.
982 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
983 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
984 Candidate->Instrs.push_back(MemOps[C].MI);
985 Candidate->LatestMIIdx = Latest - SIndex;
986 Candidate->EarliestMIIdx = Earliest - SIndex;
987 Candidate->InsertPos = MemOps[Latest].Position;
989 CanMergeToLSMulti = CanMergeToLSDouble = false;
990 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
991 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
992 Candidates.push_back(Candidate);
993 // Continue after the chain.
995 } while (SIndex < EIndex);
998 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
999 unsigned Bytes, unsigned Limit,
1000 ARMCC::CondCodes Pred, unsigned PredReg) {
1001 unsigned MyPredReg = 0;
1005 bool CheckCPSRDef = false;
1006 switch (MI->getOpcode()) {
1007 default: return false;
1011 CheckCPSRDef = true;
1017 // Make sure the offset fits in 8 bits.
1018 if (Bytes == 0 || (Limit && Bytes >= Limit))
1021 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
1022 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
1023 if (!(MI->getOperand(0).getReg() == Base &&
1024 MI->getOperand(1).getReg() == Base &&
1025 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1026 getInstrPredicate(MI, MyPredReg) == Pred &&
1027 MyPredReg == PredReg))
1030 return CheckCPSRDef ? !definesCPSR(MI) : true;
1033 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
1034 unsigned Bytes, unsigned Limit,
1035 ARMCC::CondCodes Pred, unsigned PredReg) {
1036 unsigned MyPredReg = 0;
1040 bool CheckCPSRDef = false;
1041 switch (MI->getOpcode()) {
1042 default: return false;
1046 CheckCPSRDef = true;
1052 if (Bytes == 0 || (Limit && Bytes >= Limit))
1053 // Make sure the offset fits in 8 bits.
1056 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
1057 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
1058 if (!(MI->getOperand(0).getReg() == Base &&
1059 MI->getOperand(1).getReg() == Base &&
1060 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1061 getInstrPredicate(MI, MyPredReg) == Pred &&
1062 MyPredReg == PredReg))
1065 return CheckCPSRDef ? !definesCPSR(MI) : true;
1068 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1069 ARM_AM::AMSubMode Mode) {
1071 default: llvm_unreachable("Unhandled opcode!");
1077 default: llvm_unreachable("Unhandled submode!");
1078 case ARM_AM::ia: return ARM::LDMIA_UPD;
1079 case ARM_AM::ib: return ARM::LDMIB_UPD;
1080 case ARM_AM::da: return ARM::LDMDA_UPD;
1081 case ARM_AM::db: return ARM::LDMDB_UPD;
1088 default: llvm_unreachable("Unhandled submode!");
1089 case ARM_AM::ia: return ARM::STMIA_UPD;
1090 case ARM_AM::ib: return ARM::STMIB_UPD;
1091 case ARM_AM::da: return ARM::STMDA_UPD;
1092 case ARM_AM::db: return ARM::STMDB_UPD;
1097 default: llvm_unreachable("Unhandled submode!");
1098 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1099 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1104 default: llvm_unreachable("Unhandled submode!");
1105 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1106 case ARM_AM::db: return ARM::t2STMDB_UPD;
1110 default: llvm_unreachable("Unhandled submode!");
1111 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1112 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1116 default: llvm_unreachable("Unhandled submode!");
1117 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1118 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1122 default: llvm_unreachable("Unhandled submode!");
1123 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1124 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1128 default: llvm_unreachable("Unhandled submode!");
1129 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1130 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1135 /// Fold proceeding/trailing inc/dec of base register into the
1136 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1138 /// stmia rn, <ra, rb, rc>
1139 /// rn := rn + 4 * 3;
1141 /// stmia rn!, <ra, rb, rc>
1143 /// rn := rn - 4 * 3;
1144 /// ldmia rn, <ra, rb, rc>
1146 /// ldmdb rn!, <ra, rb, rc>
1147 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1148 // Thumb1 is already using updating loads/stores.
1149 if (isThumb1) return false;
1151 const MachineOperand &BaseOP = MI->getOperand(0);
1152 unsigned Base = BaseOP.getReg();
1153 bool BaseKill = BaseOP.isKill();
1154 unsigned Bytes = getLSMultipleTransferSize(MI);
1155 unsigned PredReg = 0;
1156 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1157 unsigned Opcode = MI->getOpcode();
1158 DebugLoc DL = MI->getDebugLoc();
1160 // Can't use an updating ld/st if the base register is also a dest
1161 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1162 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1163 if (MI->getOperand(i).getReg() == Base)
1166 bool DoMerge = false;
1167 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1169 // Try merging with the previous instruction.
1170 MachineBasicBlock &MBB = *MI->getParent();
1171 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1172 MachineBasicBlock::iterator MBBI(MI);
1173 if (MBBI != BeginMBBI) {
1174 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1175 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1177 if (Mode == ARM_AM::ia &&
1178 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1181 } else if (Mode == ARM_AM::ib &&
1182 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1187 MBB.erase(PrevMBBI);
1190 // Try merging with the next instruction.
1191 MachineBasicBlock::iterator EndMBBI = MBB.end();
1192 if (!DoMerge && MBBI != EndMBBI) {
1193 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1194 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1196 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1197 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1199 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1200 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1204 MBB.erase(NextMBBI);
1210 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1211 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1212 .addReg(Base, getDefRegState(true)) // WB base register
1213 .addReg(Base, getKillRegState(BaseKill))
1214 .addImm(Pred).addReg(PredReg);
1216 // Transfer the rest of operands.
1217 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1218 MIB.addOperand(MI->getOperand(OpNum));
1220 // Transfer memoperands.
1221 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1227 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1228 ARM_AM::AddrOpc Mode) {
1231 return ARM::LDR_PRE_IMM;
1233 return ARM::STR_PRE_IMM;
1235 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1237 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1239 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1241 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1244 return ARM::t2LDR_PRE;
1247 return ARM::t2STR_PRE;
1248 default: llvm_unreachable("Unhandled opcode!");
1252 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1253 ARM_AM::AddrOpc Mode) {
1256 return ARM::LDR_POST_IMM;
1258 return ARM::STR_POST_IMM;
1260 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1262 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1264 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1266 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1269 return ARM::t2LDR_POST;
1272 return ARM::t2STR_POST;
1273 default: llvm_unreachable("Unhandled opcode!");
1277 /// Fold proceeding/trailing inc/dec of base register into the
1278 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1279 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1280 // Thumb1 doesn't have updating LDR/STR.
1281 // FIXME: Use LDM/STM with single register instead.
1282 if (isThumb1) return false;
1284 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1285 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1286 unsigned Bytes = getLSMultipleTransferSize(MI);
1287 unsigned Opcode = MI->getOpcode();
1288 DebugLoc DL = MI->getDebugLoc();
1289 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1290 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1291 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1292 if (isi32Load(Opcode) || isi32Store(Opcode))
1293 if (MI->getOperand(2).getImm() != 0)
1295 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1298 bool isLd = isLoadSingle(Opcode);
1299 // Can't do the merge if the destination register is the same as the would-be
1300 // writeback register.
1301 if (MI->getOperand(0).getReg() == Base)
1304 unsigned PredReg = 0;
1305 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1306 bool DoMerge = false;
1307 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1308 unsigned NewOpc = 0;
1309 // AM2 - 12 bits, thumb2 - 8 bits.
1310 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1312 // Try merging with the previous instruction.
1313 MachineBasicBlock &MBB = *MI->getParent();
1314 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1315 MachineBasicBlock::iterator MBBI(MI);
1316 if (MBBI != BeginMBBI) {
1317 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1318 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1320 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1322 AddSub = ARM_AM::sub;
1323 } else if (!isAM5 &&
1324 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1328 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1329 MBB.erase(PrevMBBI);
1333 // Try merging with the next instruction.
1334 MachineBasicBlock::iterator EndMBBI = MBB.end();
1335 if (!DoMerge && MBBI != EndMBBI) {
1336 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1337 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1340 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1342 AddSub = ARM_AM::sub;
1343 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1347 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1348 MBB.erase(NextMBBI);
1356 // VLDM[SD]_UPD, VSTM[SD]_UPD
1357 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1358 // updating load/store-multiple instructions can be used with only one
1360 MachineOperand &MO = MI->getOperand(0);
1361 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1362 .addReg(Base, getDefRegState(true)) // WB base register
1363 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1364 .addImm(Pred).addReg(PredReg)
1365 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1366 getKillRegState(MO.isKill())));
1369 // LDR_PRE, LDR_POST
1370 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1371 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1372 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1373 .addReg(Base, RegState::Define)
1374 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1376 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1377 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1378 .addReg(Base, RegState::Define)
1379 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1382 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1383 // t2LDR_PRE, t2LDR_POST
1384 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1385 .addReg(Base, RegState::Define)
1386 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1389 MachineOperand &MO = MI->getOperand(0);
1390 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1391 // the vestigal zero-reg offset register. When that's fixed, this clause
1392 // can be removed entirely.
1393 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1394 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1395 // STR_PRE, STR_POST
1396 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1397 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1398 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1400 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1401 // t2STR_PRE, t2STR_POST
1402 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1403 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1404 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1412 /// Returns true if instruction is a memory operation that this pass is capable
1413 /// of operating on.
1414 static bool isMemoryOp(const MachineInstr *MI) {
1415 // When no memory operands are present, conservatively assume unaligned,
1416 // volatile, unfoldable.
1417 if (!MI->hasOneMemOperand())
1420 const MachineMemOperand *MMO = *MI->memoperands_begin();
1422 // Don't touch volatile memory accesses - we may be changing their order.
1423 if (MMO->isVolatile())
1426 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1428 if (MMO->getAlignment() < 4)
1431 // str <undef> could probably be eliminated entirely, but for now we just want
1432 // to avoid making a mess of it.
1433 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1434 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1435 MI->getOperand(0).isUndef())
1438 // Likewise don't mess with references to undefined addresses.
1439 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1440 MI->getOperand(1).isUndef())
1443 unsigned Opcode = MI->getOpcode();
1448 return MI->getOperand(1).isReg();
1451 return MI->getOperand(1).isReg();
1462 return MI->getOperand(1).isReg();
1467 static void InsertLDR_STR(MachineBasicBlock &MBB,
1468 MachineBasicBlock::iterator &MBBI,
1469 int Offset, bool isDef,
1470 DebugLoc DL, unsigned NewOpc,
1471 unsigned Reg, bool RegDeadKill, bool RegUndef,
1472 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1473 bool OffKill, bool OffUndef,
1474 ARMCC::CondCodes Pred, unsigned PredReg,
1475 const TargetInstrInfo *TII, bool isT2) {
1477 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1479 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1480 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1481 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1483 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1485 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1486 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1487 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1491 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1492 MachineBasicBlock::iterator &MBBI) {
1493 MachineInstr *MI = &*MBBI;
1494 unsigned Opcode = MI->getOpcode();
1495 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1498 const MachineOperand &BaseOp = MI->getOperand(2);
1499 unsigned BaseReg = BaseOp.getReg();
1500 unsigned EvenReg = MI->getOperand(0).getReg();
1501 unsigned OddReg = MI->getOperand(1).getReg();
1502 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1503 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1505 // ARM errata 602117: LDRD with base in list may result in incorrect base
1506 // register when interrupted or faulted.
1507 bool Errata602117 = EvenReg == BaseReg &&
1508 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1509 // ARM LDRD/STRD needs consecutive registers.
1510 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1511 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1513 if (!Errata602117 && !NonConsecutiveRegs)
1516 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1517 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1518 bool EvenDeadKill = isLd ?
1519 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1520 bool EvenUndef = MI->getOperand(0).isUndef();
1521 bool OddDeadKill = isLd ?
1522 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1523 bool OddUndef = MI->getOperand(1).isUndef();
1524 bool BaseKill = BaseOp.isKill();
1525 bool BaseUndef = BaseOp.isUndef();
1526 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1527 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1528 int OffImm = getMemoryOpOffset(MI);
1529 unsigned PredReg = 0;
1530 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1532 if (OddRegNum > EvenRegNum && OffImm == 0) {
1533 // Ascending register numbers and no offset. It's safe to change it to a
1535 unsigned NewOpc = (isLd)
1536 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1537 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1539 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1540 .addReg(BaseReg, getKillRegState(BaseKill))
1541 .addImm(Pred).addReg(PredReg)
1542 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1543 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1546 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1547 .addReg(BaseReg, getKillRegState(BaseKill))
1548 .addImm(Pred).addReg(PredReg)
1550 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1552 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1556 // Split into two instructions.
1557 unsigned NewOpc = (isLd)
1558 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1559 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1560 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1561 // so adjust and use t2LDRi12 here for that.
1562 unsigned NewOpc2 = (isLd)
1563 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1564 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1565 DebugLoc dl = MBBI->getDebugLoc();
1566 // If this is a load and base register is killed, it may have been
1567 // re-defed by the load, make sure the first load does not clobber it.
1569 (BaseKill || OffKill) &&
1570 (TRI->regsOverlap(EvenReg, BaseReg))) {
1571 assert(!TRI->regsOverlap(OddReg, BaseReg));
1572 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1573 OddReg, OddDeadKill, false,
1574 BaseReg, false, BaseUndef, false, OffUndef,
1575 Pred, PredReg, TII, isT2);
1576 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1577 EvenReg, EvenDeadKill, false,
1578 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1579 Pred, PredReg, TII, isT2);
1581 if (OddReg == EvenReg && EvenDeadKill) {
1582 // If the two source operands are the same, the kill marker is
1583 // probably on the first one. e.g.
1584 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1585 EvenDeadKill = false;
1588 // Never kill the base register in the first instruction.
1589 if (EvenReg == BaseReg)
1590 EvenDeadKill = false;
1591 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1592 EvenReg, EvenDeadKill, EvenUndef,
1593 BaseReg, false, BaseUndef, false, OffUndef,
1594 Pred, PredReg, TII, isT2);
1595 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1596 OddReg, OddDeadKill, OddUndef,
1597 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1598 Pred, PredReg, TII, isT2);
1606 MBBI = MBB.erase(MBBI);
1610 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1611 /// incrementing offset into LDM / STM ops.
1612 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1614 unsigned CurrBase = 0;
1615 unsigned CurrOpc = ~0u;
1616 ARMCC::CondCodes CurrPred = ARMCC::AL;
1617 unsigned Position = 0;
1618 assert(Candidates.size() == 0);
1619 LiveRegsValid = false;
1621 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1623 // The instruction in front of the iterator is the one we look at.
1624 MBBI = std::prev(I);
1625 if (FixInvalidRegPairOp(MBB, MBBI))
1629 if (isMemoryOp(MBBI)) {
1630 unsigned Opcode = MBBI->getOpcode();
1631 const MachineOperand &MO = MBBI->getOperand(0);
1632 unsigned Reg = MO.getReg();
1633 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1634 unsigned PredReg = 0;
1635 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1636 int Offset = getMemoryOpOffset(MBBI);
1637 if (CurrBase == 0) {
1638 // Start of a new chain.
1642 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1645 // Note: No need to match PredReg in the next if.
1646 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1648 // r4 := ldr [r0, #8]
1649 // r4 := ldr [r0, #4]
1652 // If a load overrides the base register or a register loaded by
1653 // another load in our chain, we cannot take this instruction.
1654 bool Overlap = false;
1655 if (isLoadSingle(Opcode)) {
1656 Overlap = (Base == Reg);
1658 for (const MemOpQueueEntry &E : MemOps) {
1659 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1668 // Check offset and sort memory operation into the current chain.
1669 if (Offset > MemOps.back().Offset) {
1670 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1673 MemOpQueue::iterator MI, ME;
1674 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1675 if (Offset < MI->Offset) {
1676 // Found a place to insert.
1679 if (Offset == MI->Offset) {
1680 // Collision, abort.
1685 if (MI != MemOps.end()) {
1686 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1693 // Don't advance the iterator; The op will start a new chain next.
1696 // Fallthrough to look into existing chain.
1697 } else if (MBBI->isDebugValue())
1700 // If we are here then the chain is broken; Extract candidates for a merge.
1701 if (MemOps.size() > 0) {
1702 FormCandidates(MemOps);
1703 // Reset for the next chain.
1706 CurrPred = ARMCC::AL;
1710 if (MemOps.size() > 0)
1711 FormCandidates(MemOps);
1713 // Sort candidates so they get processed from end to begin of the basic
1714 // block later; This is necessary for liveness calculation.
1715 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1716 return M0->InsertPos < M1->InsertPos;
1718 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1720 // Go through list of candidates and merge.
1721 bool Changed = false;
1722 for (const MergeCandidate *Candidate : Candidates) {
1723 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1724 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1725 // Merge preceding/trailing base inc/dec into the merged op.
1728 unsigned Opcode = Merged->getOpcode();
1729 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
1730 MergeBaseUpdateLSMultiple(Merged);
1732 for (MachineInstr *MI : Candidate->Instrs) {
1733 if (MergeBaseUpdateLoadStore(MI))
1738 assert(Candidate->Instrs.size() == 1);
1739 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1748 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1749 /// into the preceding stack restore so it directly restore the value of LR
1751 /// ldmfd sp!, {..., lr}
1754 /// ldmfd sp!, {..., lr}
1757 /// ldmfd sp!, {..., pc}
1758 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1759 // Thumb1 LDM doesn't allow high registers.
1760 if (isThumb1) return false;
1761 if (MBB.empty()) return false;
1763 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1764 if (MBBI != MBB.begin() &&
1765 (MBBI->getOpcode() == ARM::BX_RET ||
1766 MBBI->getOpcode() == ARM::tBX_RET ||
1767 MBBI->getOpcode() == ARM::MOVPCLR)) {
1768 MachineInstr *PrevMI = std::prev(MBBI);
1769 unsigned Opcode = PrevMI->getOpcode();
1770 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1771 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1772 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1773 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1774 if (MO.getReg() != ARM::LR)
1776 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1777 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1778 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1779 PrevMI->setDesc(TII->get(NewOpc));
1781 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1789 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1791 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1792 TL = STI->getTargetLowering();
1793 AFI = Fn.getInfo<ARMFunctionInfo>();
1794 TII = STI->getInstrInfo();
1795 TRI = STI->getRegisterInfo();
1796 MRI = &Fn.getRegInfo();
1797 RegClassInfoValid = false;
1798 isThumb2 = AFI->isThumb2Function();
1799 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1801 bool Modified = false;
1802 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1804 MachineBasicBlock &MBB = *MFI;
1805 Modified |= LoadStoreMultipleOpti(MBB);
1806 if (STI->hasV5TOps())
1807 Modified |= MergeReturnIntoLDM(MBB);
1810 Allocator.DestroyAll();
1815 /// Pre- register allocation pass that move load / stores from consecutive
1816 /// locations close to make it more likely they will be combined later.
1817 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1819 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1821 const DataLayout *TD;
1822 const TargetInstrInfo *TII;
1823 const TargetRegisterInfo *TRI;
1824 const ARMSubtarget *STI;
1825 MachineRegisterInfo *MRI;
1826 MachineFunction *MF;
1828 bool runOnMachineFunction(MachineFunction &Fn) override;
1830 const char *getPassName() const override {
1831 return "ARM pre- register allocation load / store optimization pass";
1835 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1836 unsigned &NewOpc, unsigned &EvenReg,
1837 unsigned &OddReg, unsigned &BaseReg,
1839 unsigned &PredReg, ARMCC::CondCodes &Pred,
1841 bool RescheduleOps(MachineBasicBlock *MBB,
1842 SmallVectorImpl<MachineInstr *> &Ops,
1843 unsigned Base, bool isLd,
1844 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1845 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1847 char ARMPreAllocLoadStoreOpt::ID = 0;
1850 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1851 TD = Fn.getTarget().getDataLayout();
1852 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1853 TII = STI->getInstrInfo();
1854 TRI = STI->getRegisterInfo();
1855 MRI = &Fn.getRegInfo();
1858 bool Modified = false;
1859 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1861 Modified |= RescheduleLoadStoreInstrs(MFI);
1866 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1867 MachineBasicBlock::iterator I,
1868 MachineBasicBlock::iterator E,
1869 SmallPtrSetImpl<MachineInstr*> &MemOps,
1870 SmallSet<unsigned, 4> &MemRegs,
1871 const TargetRegisterInfo *TRI) {
1872 // Are there stores / loads / calls between them?
1873 // FIXME: This is overly conservative. We should make use of alias information
1875 SmallSet<unsigned, 4> AddedRegPressure;
1877 if (I->isDebugValue() || MemOps.count(&*I))
1879 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1881 if (isLd && I->mayStore())
1886 // It's not safe to move the first 'str' down.
1889 // str r4, [r0, #+4]
1893 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1894 MachineOperand &MO = I->getOperand(j);
1897 unsigned Reg = MO.getReg();
1898 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1900 if (Reg != Base && !MemRegs.count(Reg))
1901 AddedRegPressure.insert(Reg);
1905 // Estimate register pressure increase due to the transformation.
1906 if (MemRegs.size() <= 4)
1907 // Ok if we are moving small number of instructions.
1909 return AddedRegPressure.size() <= MemRegs.size() * 2;
1913 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1914 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1915 MachineInstr *Op1) {
1916 assert(MI->memoperands_empty() && "expected a new machineinstr");
1917 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1918 + (Op1->memoperands_end() - Op1->memoperands_begin());
1920 MachineFunction *MF = MI->getParent()->getParent();
1921 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1922 MachineSDNode::mmo_iterator MemEnd =
1923 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1925 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1926 MI->setMemRefs(MemBegin, MemEnd);
1930 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1931 DebugLoc &dl, unsigned &NewOpc,
1933 unsigned &SecondReg,
1934 unsigned &BaseReg, int &Offset,
1936 ARMCC::CondCodes &Pred,
1938 // Make sure we're allowed to generate LDRD/STRD.
1939 if (!STI->hasV5TEOps())
1942 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1944 unsigned Opcode = Op0->getOpcode();
1945 if (Opcode == ARM::LDRi12) {
1947 } else if (Opcode == ARM::STRi12) {
1949 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1950 NewOpc = ARM::t2LDRDi8;
1953 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1954 NewOpc = ARM::t2STRDi8;
1961 // Make sure the base address satisfies i64 ld / st alignment requirement.
1962 // At the moment, we ignore the memoryoperand's value.
1963 // If we want to use AliasAnalysis, we should check it accordingly.
1964 if (!Op0->hasOneMemOperand() ||
1965 (*Op0->memoperands_begin())->isVolatile())
1968 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1969 const Function *Func = MF->getFunction();
1970 unsigned ReqAlign = STI->hasV6Ops()
1971 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1972 : 8; // Pre-v6 need 8-byte align
1973 if (Align < ReqAlign)
1976 // Then make sure the immediate offset fits.
1977 int OffImm = getMemoryOpOffset(Op0);
1979 int Limit = (1 << 8) * Scale;
1980 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1984 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1986 AddSub = ARM_AM::sub;
1989 int Limit = (1 << 8) * Scale;
1990 if (OffImm >= Limit || (OffImm & (Scale-1)))
1992 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1994 FirstReg = Op0->getOperand(0).getReg();
1995 SecondReg = Op1->getOperand(0).getReg();
1996 if (FirstReg == SecondReg)
1998 BaseReg = Op0->getOperand(1).getReg();
1999 Pred = getInstrPredicate(Op0, PredReg);
2000 dl = Op0->getDebugLoc();
2004 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2005 SmallVectorImpl<MachineInstr *> &Ops,
2006 unsigned Base, bool isLd,
2007 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2008 bool RetVal = false;
2010 // Sort by offset (in reverse order).
2011 std::sort(Ops.begin(), Ops.end(),
2012 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2013 int LOffset = getMemoryOpOffset(LHS);
2014 int ROffset = getMemoryOpOffset(RHS);
2015 assert(LHS == RHS || LOffset != ROffset);
2016 return LOffset > ROffset;
2019 // The loads / stores of the same base are in order. Scan them from first to
2020 // last and check for the following:
2021 // 1. Any def of base.
2023 while (Ops.size() > 1) {
2024 unsigned FirstLoc = ~0U;
2025 unsigned LastLoc = 0;
2026 MachineInstr *FirstOp = nullptr;
2027 MachineInstr *LastOp = nullptr;
2029 unsigned LastOpcode = 0;
2030 unsigned LastBytes = 0;
2031 unsigned NumMove = 0;
2032 for (int i = Ops.size() - 1; i >= 0; --i) {
2033 MachineInstr *Op = Ops[i];
2034 unsigned Loc = MI2LocMap[Op];
2035 if (Loc <= FirstLoc) {
2039 if (Loc >= LastLoc) {
2045 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2046 if (LastOpcode && LSMOpcode != LastOpcode)
2049 int Offset = getMemoryOpOffset(Op);
2050 unsigned Bytes = getLSMultipleTransferSize(Op);
2052 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2055 LastOffset = Offset;
2057 LastOpcode = LSMOpcode;
2058 if (++NumMove == 8) // FIXME: Tune this limit.
2065 SmallPtrSet<MachineInstr*, 4> MemOps;
2066 SmallSet<unsigned, 4> MemRegs;
2067 for (int i = NumMove-1; i >= 0; --i) {
2068 MemOps.insert(Ops[i]);
2069 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2072 // Be conservative, if the instructions are too far apart, don't
2073 // move them. We want to limit the increase of register pressure.
2074 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2076 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2077 MemOps, MemRegs, TRI);
2079 for (unsigned i = 0; i != NumMove; ++i)
2082 // This is the new location for the loads / stores.
2083 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2084 while (InsertPos != MBB->end()
2085 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2088 // If we are moving a pair of loads / stores, see if it makes sense
2089 // to try to allocate a pair of registers that can form register pairs.
2090 MachineInstr *Op0 = Ops.back();
2091 MachineInstr *Op1 = Ops[Ops.size()-2];
2092 unsigned FirstReg = 0, SecondReg = 0;
2093 unsigned BaseReg = 0, PredReg = 0;
2094 ARMCC::CondCodes Pred = ARMCC::AL;
2096 unsigned NewOpc = 0;
2099 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2100 FirstReg, SecondReg, BaseReg,
2101 Offset, PredReg, Pred, isT2)) {
2105 const MCInstrDesc &MCID = TII->get(NewOpc);
2106 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2107 MRI->constrainRegClass(FirstReg, TRC);
2108 MRI->constrainRegClass(SecondReg, TRC);
2110 // Form the pair instruction.
2112 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2113 .addReg(FirstReg, RegState::Define)
2114 .addReg(SecondReg, RegState::Define)
2116 // FIXME: We're converting from LDRi12 to an insn that still
2117 // uses addrmode2, so we need an explicit offset reg. It should
2118 // always by reg0 since we're transforming LDRi12s.
2121 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2122 concatenateMemOperands(MIB, Op0, Op1);
2123 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2126 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2130 // FIXME: We're converting from LDRi12 to an insn that still
2131 // uses addrmode2, so we need an explicit offset reg. It should
2132 // always by reg0 since we're transforming STRi12s.
2135 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2136 concatenateMemOperands(MIB, Op0, Op1);
2137 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2144 // Add register allocation hints to form register pairs.
2145 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2146 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2149 for (unsigned i = 0; i != NumMove; ++i) {
2150 MachineInstr *Op = Ops.back();
2152 MBB->splice(InsertPos, MBB, Op);
2156 NumLdStMoved += NumMove;
2166 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2167 bool RetVal = false;
2169 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2170 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2171 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2172 SmallVector<unsigned, 4> LdBases;
2173 SmallVector<unsigned, 4> StBases;
2176 MachineBasicBlock::iterator MBBI = MBB->begin();
2177 MachineBasicBlock::iterator E = MBB->end();
2179 for (; MBBI != E; ++MBBI) {
2180 MachineInstr *MI = MBBI;
2181 if (MI->isCall() || MI->isTerminator()) {
2182 // Stop at barriers.
2187 if (!MI->isDebugValue())
2188 MI2LocMap[MI] = ++Loc;
2190 if (!isMemoryOp(MI))
2192 unsigned PredReg = 0;
2193 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2196 int Opc = MI->getOpcode();
2197 bool isLd = isLoadSingle(Opc);
2198 unsigned Base = MI->getOperand(1).getReg();
2199 int Offset = getMemoryOpOffset(MI);
2201 bool StopHere = false;
2203 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2204 Base2LdsMap.find(Base);
2205 if (BI != Base2LdsMap.end()) {
2206 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2207 if (Offset == getMemoryOpOffset(BI->second[i])) {
2213 BI->second.push_back(MI);
2215 Base2LdsMap[Base].push_back(MI);
2216 LdBases.push_back(Base);
2219 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2220 Base2StsMap.find(Base);
2221 if (BI != Base2StsMap.end()) {
2222 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2223 if (Offset == getMemoryOpOffset(BI->second[i])) {
2229 BI->second.push_back(MI);
2231 Base2StsMap[Base].push_back(MI);
2232 StBases.push_back(Base);
2237 // Found a duplicate (a base+offset combination that's seen earlier).
2244 // Re-schedule loads.
2245 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2246 unsigned Base = LdBases[i];
2247 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2249 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2252 // Re-schedule stores.
2253 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2254 unsigned Base = StBases[i];
2255 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2257 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2261 Base2LdsMap.clear();
2262 Base2StsMap.clear();
2272 /// Returns an instance of the load / store optimization pass.
2273 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2275 return new ARMPreAllocLoadStoreOpt();
2276 return new ARMLoadStoreOpt();