1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
121 SmallVector<MachineInstr*,4> MergeBaseCandidates;
123 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
124 MachineBasicBlock::const_iterator Before);
125 unsigned findFreeReg(const TargetRegisterClass &RegClass);
126 void UpdateBaseRegUses(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator MBBI,
128 DebugLoc DL, unsigned Base, unsigned WordOffset,
129 ARMCC::CondCodes Pred, unsigned PredReg);
130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
138 void FormCandidates(const MemOpQueue &MemOps);
139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator &MBBI);
142 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
148 char ARMLoadStoreOpt::ID = 0;
151 static bool definesCPSR(const MachineInstr *MI) {
152 for (const auto &MO : MI->operands()) {
155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
156 // If the instruction has live CPSR def, then it's not safe to fold it
157 // into load / store.
164 static int getMemoryOpOffset(const MachineInstr *MI) {
165 unsigned Opcode = MI->getOpcode();
166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
167 unsigned NumOperands = MI->getDesc().getNumOperands();
168 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
176 // Thumb1 immediate offsets are scaled by 4
177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
182 : ARM_AM::getAM5Offset(OffField) * 4;
183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
184 : ARM_AM::getAM5Op(OffField);
186 if (Op == ARM_AM::sub)
192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
193 return MI.getOperand(1);
196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
197 return MI.getOperand(0);
200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
202 default: llvm_unreachable("Unhandled opcode!");
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::LDMIA;
208 case ARM_AM::da: return ARM::LDMDA;
209 case ARM_AM::db: return ARM::LDMDB;
210 case ARM_AM::ib: return ARM::LDMIB;
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::STMIA;
217 case ARM_AM::da: return ARM::STMDA;
218 case ARM_AM::db: return ARM::STMDB;
219 case ARM_AM::ib: return ARM::STMIB;
223 // tLDMIA is writeback-only - unless the base register is in the input
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::tLDMIA;
232 // There is no non-writeback tSTMIA either.
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::tSTMIA_UPD;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::t2LDMIA;
244 case ARM_AM::db: return ARM::t2LDMDB;
250 default: llvm_unreachable("Unhandled submode!");
251 case ARM_AM::ia: return ARM::t2STMIA;
252 case ARM_AM::db: return ARM::t2STMDB;
257 default: llvm_unreachable("Unhandled submode!");
258 case ARM_AM::ia: return ARM::VLDMSIA;
259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
264 default: llvm_unreachable("Unhandled submode!");
265 case ARM_AM::ia: return ARM::VSTMSIA;
266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
271 default: llvm_unreachable("Unhandled submode!");
272 case ARM_AM::ia: return ARM::VLDMDIA;
273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
278 default: llvm_unreachable("Unhandled submode!");
279 case ARM_AM::ia: return ARM::VSTMDIA;
280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
287 default: llvm_unreachable("Unhandled opcode!");
294 case ARM::tLDMIA_UPD:
295 case ARM::tSTMIA_UPD:
296 case ARM::t2LDMIA_RET:
298 case ARM::t2LDMIA_UPD:
300 case ARM::t2STMIA_UPD:
302 case ARM::VLDMSIA_UPD:
304 case ARM::VSTMSIA_UPD:
306 case ARM::VLDMDIA_UPD:
308 case ARM::VSTMDIA_UPD:
322 case ARM::t2LDMDB_UPD:
324 case ARM::t2STMDB_UPD:
325 case ARM::VLDMSDB_UPD:
326 case ARM::VSTMSDB_UPD:
327 case ARM::VLDMDDB_UPD:
328 case ARM::VSTMDDB_UPD:
339 static bool isT1i32Load(unsigned Opc) {
340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
343 static bool isT2i32Load(unsigned Opc) {
344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
347 static bool isi32Load(unsigned Opc) {
348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
351 static bool isT1i32Store(unsigned Opc) {
352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
355 static bool isT2i32Store(unsigned Opc) {
356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
359 static bool isi32Store(unsigned Opc) {
360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
363 static bool isLoadSingle(unsigned Opc) {
364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
367 static unsigned getImmScale(unsigned Opc) {
369 default: llvm_unreachable("Unhandled opcode!");
384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
385 switch (MI->getOpcode()) {
412 case ARM::tLDMIA_UPD:
413 case ARM::tSTMIA_UPD:
420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
427 /// Update future uses of the base register with the offset introduced
428 /// due to writeback. This function only works on Thumb1.
430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
431 MachineBasicBlock::iterator MBBI,
432 DebugLoc DL, unsigned Base,
434 ARMCC::CondCodes Pred, unsigned PredReg) {
435 assert(isThumb1 && "Can only update base register uses for Thumb1!");
436 // Start updating any instructions with immediate offsets. Insert a SUB before
437 // the first non-updateable instruction (if any).
438 for (; MBBI != MBB.end(); ++MBBI) {
439 bool InsertSub = false;
440 unsigned Opc = MBBI->getOpcode();
442 if (MBBI->readsRegister(Base)) {
445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
449 if (IsLoad || IsStore) {
450 // Loads and stores with immediate offsets can be updated, but only if
451 // the new offset isn't negative.
452 // The MachineOperand containing the offset immediate is the last one
453 // before predicates.
455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
457 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
459 // If storing the base register, it needs to be reset first.
460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
468 !definesCPSR(MBBI)) {
469 // SUBS/ADDS using this register, with a dead def of the CPSR.
470 // Merge it with the update; if the merged offset is too large,
471 // insert a new sub instead.
473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
474 Offset = (Opc == ARM::tSUBi8) ?
475 MO.getImm() + WordOffset * 4 :
476 MO.getImm() - WordOffset * 4 ;
477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
481 // The base register has now been reset, so exit early.
488 // Can't update the instruction.
492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
493 // Since SUBS sets the condition flags, we can't place the base reset
494 // after an instruction that has a live CPSR def.
495 // The base register might also contain an argument for a function call.
500 // An instruction above couldn't be updated, so insert a sub.
501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
507 // Register got killed. Stop updating.
511 // End of block was reached.
512 if (MBB.succ_size() > 0) {
513 // FIXME: Because of a bug, live registers are sometimes missing from
514 // the successor blocks' live-in sets. This means we can't trust that
515 // information and *always* have to reset at the end of a block.
517 if (MBBI != MBB.end()) --MBBI;
519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
524 /// Return the first register of class \p RegClass that is not in \p Regs.
525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
526 if (!RegClassInfoValid) {
527 RegClassInfo.runOnMachineFunction(*MF);
528 RegClassInfoValid = true;
531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
532 if (!LiveRegs.contains(Reg))
537 /// Compute live registers just before instruction \p Before (in normal schedule
538 /// direction). Computes backwards so multiple queries in the same block must
539 /// come in reverse order.
540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
541 MachineBasicBlock::const_iterator Before) {
542 // Initialize if we never queried in this block.
543 if (!LiveRegsValid) {
545 LiveRegs.addLiveOuts(&MBB, true);
546 LiveRegPos = MBB.end();
547 LiveRegsValid = true;
549 // Move backward just before the "Before" position.
550 while (LiveRegPos != Before) {
552 LiveRegs.stepBackward(*LiveRegPos);
556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
558 for (const std::pair<unsigned, bool> &R : Regs)
564 /// Create and insert a LDM or STM with Base as base register and registers in
565 /// Regs as the register operands that would be loaded / stored. It returns
566 /// true if the transformation is done.
567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
571 unsigned NumRegs = Regs.size();
574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
575 // Compute liveness information for that register to make the decision.
576 bool SafeToClobberCPSR = !isThumb1 ||
577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
578 MachineBasicBlock::LQR_Dead);
580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
582 // Exception: If the base register is in the input reglist, Thumb1 LDM is
584 // It's also not possible to merge an STR of the base register in Thumb1.
585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
587 if (Opcode == ARM::tLDRi) {
589 } else if (Opcode == ARM::tSTRi) {
594 ARM_AM::AMSubMode Mode = ARM_AM::ia;
595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
599 if (Offset == 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
604 // VLDM/VSTM do not support DB mode without also updating the base reg.
606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
607 // Check if this is a supported opcode before inserting instructions to
608 // calculate a new base register.
609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
611 // If starting offset isn't zero, insert a MI to materialize a new base.
612 // But only do so if it is cost effective, i.e. merging more than two
617 // On Thumb1, it's not worth materializing a new base register without
618 // clobbering the CPSR (i.e. not using ADDS/SUBS).
619 if (!SafeToClobberCPSR)
623 if (isi32Load(Opcode)) {
624 // If it is a load, then just use one of the destination register to
625 // use as the new base.
626 NewBase = Regs[NumRegs-1].first;
628 // Find a free register that we can use as scratch register.
629 moveLiveRegsBefore(MBB, InsertBefore);
630 // The merged instruction does not exist yet but will use several Regs if
632 if (!isLoadSingle(Opcode))
633 for (const std::pair<unsigned, bool> &R : Regs)
634 LiveRegs.addReg(R.first);
636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
642 isThumb2 ? ARM::t2ADDri :
643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
644 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
645 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
650 isThumb2 ? ARM::t2SUBri :
651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
655 if (!TL->isLegalAddImmediate(Offset))
656 // FIXME: Try add with register operand?
657 return nullptr; // Probably not worth it then.
659 // We can only append a kill flag to the add/sub input if the value is not
660 // used in the register list of the stm as well.
661 bool KillOldBase = BaseKill &&
662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
665 // Thumb1: depending on immediate size, use either
666 // ADDS NewBase, Base, #imm3
669 // ADDS NewBase, #imm8.
670 if (Base != NewBase &&
671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
672 // Need to insert a MOV to the new base first.
673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
676 if (Pred != ARMCC::AL)
678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
679 .addReg(Base, getKillRegState(KillOldBase));
681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
682 .addReg(Base, getKillRegState(KillOldBase))
683 .addImm(Pred).addReg(PredReg);
685 // The following ADDS/SUBS becomes an update.
689 if (BaseOpc == ARM::tADDrSPi) {
690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
693 .addImm(Pred).addReg(PredReg);
696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
698 .addImm(Pred).addReg(PredReg);
700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
702 .addImm(Pred).addReg(PredReg).addReg(0);
705 BaseKill = true; // New base is always killed straight away.
708 bool isDef = isLoadSingle(Opcode);
710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
711 // base register writeback.
712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
717 // - There is no writeback (LDM of base register),
718 // - the base register is killed by the merged instruction,
719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
720 // to reset the base register.
721 // Otherwise, don't merge.
722 // It's safe to return here since the code to materialize a new base register
723 // above is also conditional on SafeToClobberCPSR.
724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
727 MachineInstrBuilder MIB;
730 if (Opcode == ARM::tLDMIA)
731 // Update tLDMIA with writeback if necessary.
732 Opcode = ARM::tLDMIA_UPD;
734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
736 // Thumb1: we might need to set base writeback when building the MI.
737 MIB.addReg(Base, getDefRegState(true))
738 .addReg(Base, getKillRegState(BaseKill));
740 // The base isn't dead after a merged instruction with writeback.
741 // Insert a sub instruction after the newly formed instruction to reset.
743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
746 // No writeback, simply build the MachineInstr.
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
748 MIB.addReg(Base, getKillRegState(BaseKill));
751 MIB.addImm(Pred).addReg(PredReg);
753 for (const std::pair<unsigned, bool> &R : Regs)
754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
756 return MIB.getInstr();
759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
763 bool IsLoad = isi32Load(Opcode);
764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
767 assert(Regs.size() == 2);
768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
769 TII->get(LoadStoreOpcode));
771 MIB.addReg(Regs[0].first, RegState::Define)
772 .addReg(Regs[1].first, RegState::Define);
774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
775 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
778 return MIB.getInstr();
781 /// Call MergeOps and update MemOps and merges accordingly on success.
782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
783 const MachineInstr *First = Cand.Instrs.front();
784 unsigned Opcode = First->getOpcode();
785 bool IsLoad = isLoadSingle(Opcode);
786 SmallVector<std::pair<unsigned, bool>, 8> Regs;
787 SmallVector<unsigned, 4> ImpDefs;
788 DenseSet<unsigned> KilledRegs;
789 // Determine list of registers and list of implicit super-register defs.
790 for (const MachineInstr *MI : Cand.Instrs) {
791 const MachineOperand &MO = getLoadStoreRegOp(*MI);
792 unsigned Reg = MO.getReg();
793 bool IsKill = MO.isKill();
795 KilledRegs.insert(Reg);
796 Regs.push_back(std::make_pair(Reg, IsKill));
799 // Collect any implicit defs of super-registers, after merging we can't
800 // be sure anymore that we properly preserved these live ranges and must
801 // removed these implicit operands.
802 for (const MachineOperand &MO : MI->implicit_operands()) {
803 if (!MO.isReg() || !MO.isDef() || MO.isDead())
805 assert(MO.isImplicit());
806 unsigned DefReg = MO.getReg();
808 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
810 // We can ignore cases where the super-reg is read and written.
811 if (MI->readsRegister(DefReg))
813 ImpDefs.push_back(DefReg);
818 // Attempt the merge.
819 typedef MachineBasicBlock::iterator iterator;
820 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
821 iterator InsertBefore = std::next(iterator(LatestMI));
822 MachineBasicBlock &MBB = *LatestMI->getParent();
823 unsigned Offset = getMemoryOpOffset(First);
824 unsigned Base = getLoadStoreBaseOp(*First).getReg();
825 bool BaseKill = LatestMI->killsRegister(Base);
826 unsigned PredReg = 0;
827 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
828 DebugLoc DL = First->getDebugLoc();
829 MachineInstr *Merged = nullptr;
830 if (Cand.CanMergeToLSDouble)
831 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
832 Opcode, Pred, PredReg, DL, Regs);
833 if (!Merged && Cand.CanMergeToLSMulti)
834 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
835 Opcode, Pred, PredReg, DL, Regs);
839 // Determine earliest instruction that will get removed. We then keep an
840 // iterator just above it so the following erases don't invalidated it.
841 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
842 bool EarliestAtBegin = false;
843 if (EarliestI == MBB.begin()) {
844 EarliestAtBegin = true;
846 EarliestI = std::prev(EarliestI);
849 // Remove instructions which have been merged.
850 for (MachineInstr *MI : Cand.Instrs)
853 // Determine range between the earliest removed instruction and the new one.
855 EarliestI = MBB.begin();
857 EarliestI = std::next(EarliestI);
858 auto FixupRange = make_range(EarliestI, iterator(Merged));
860 if (isLoadSingle(Opcode)) {
861 // If the previous loads defined a super-reg, then we have to mark earlier
862 // operands undef; Replicate the super-reg def on the merged instruction.
863 for (MachineInstr &MI : FixupRange) {
864 for (unsigned &ImpDefReg : ImpDefs) {
865 for (MachineOperand &MO : MI.implicit_operands()) {
866 if (!MO.isReg() || MO.getReg() != ImpDefReg)
876 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
877 for (unsigned ImpDef : ImpDefs)
878 MIB.addReg(ImpDef, RegState::ImplicitDefine);
880 // Remove kill flags: We are possibly storing the values later now.
881 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
882 for (MachineInstr &MI : FixupRange) {
883 for (MachineOperand &MO : MI.uses()) {
884 if (!MO.isReg() || !MO.isKill())
886 if (KilledRegs.count(MO.getReg()))
890 assert(ImpDefs.empty());
896 static bool isValidLSDoubleOffset(int Offset) {
897 unsigned Value = abs(Offset);
898 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
900 return (Value % 4) == 0 && Value < 1024;
903 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
904 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
905 const MachineInstr *FirstMI = MemOps[0].MI;
906 unsigned Opcode = FirstMI->getOpcode();
907 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
908 unsigned Size = getLSMultipleTransferSize(FirstMI);
911 unsigned EIndex = MemOps.size();
913 // Look at the first instruction.
914 const MachineInstr *MI = MemOps[SIndex].MI;
915 int Offset = MemOps[SIndex].Offset;
916 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
917 unsigned PReg = PMO.getReg();
918 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
919 unsigned Latest = SIndex;
920 unsigned Earliest = SIndex;
922 bool CanMergeToLSDouble =
923 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
924 // ARM errata 602117: LDRD with base in list may result in incorrect base
925 // register when interrupted or faulted.
926 if (STI->isCortexM3() && isi32Load(Opcode) &&
927 PReg == getLoadStoreBaseOp(*MI).getReg())
928 CanMergeToLSDouble = false;
930 bool CanMergeToLSMulti = true;
931 // On swift vldm/vstm starting with an odd register number as that needs
932 // more uops than single vldrs.
933 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
934 CanMergeToLSMulti = false;
936 // Merge following instructions where possible.
937 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
938 int NewOffset = MemOps[I].Offset;
939 if (NewOffset != Offset + (int)Size)
941 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
942 unsigned Reg = MO.getReg();
943 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
945 // See if the current load/store may be part of a multi load/store.
946 bool PartOfLSMulti = CanMergeToLSMulti;
948 // Cannot load from SP
950 PartOfLSMulti = false;
951 // Register numbers must be in ascending order.
952 else if (RegNum <= PRegNum)
953 PartOfLSMulti = false;
954 // For VFP / NEON load/store multiples, the registers must be
955 // consecutive and within the limit on the number of registers per
957 else if (!isNotVFP && RegNum != PRegNum+1)
958 PartOfLSMulti = false;
960 // See if the current load/store may be part of a double load/store.
961 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
963 if (!PartOfLSMulti && !PartOfLSDouble)
965 CanMergeToLSMulti &= PartOfLSMulti;
966 CanMergeToLSDouble &= PartOfLSDouble;
967 // Track MemOp with latest and earliest position (Positions are
968 // counted in reverse).
969 unsigned Position = MemOps[I].Position;
970 if (Position < MemOps[Latest].Position)
972 else if (Position > MemOps[Earliest].Position)
974 // Prepare for next MemOp.
979 // Form a candidate from the Ops collected so far.
980 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
981 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
982 Candidate->Instrs.push_back(MemOps[C].MI);
983 Candidate->LatestMIIdx = Latest - SIndex;
984 Candidate->EarliestMIIdx = Earliest - SIndex;
985 Candidate->InsertPos = MemOps[Latest].Position;
987 CanMergeToLSMulti = CanMergeToLSDouble = false;
988 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
989 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
990 Candidates.push_back(Candidate);
991 // Continue after the chain.
993 } while (SIndex < EIndex);
996 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
997 ARM_AM::AMSubMode Mode) {
999 default: llvm_unreachable("Unhandled opcode!");
1005 default: llvm_unreachable("Unhandled submode!");
1006 case ARM_AM::ia: return ARM::LDMIA_UPD;
1007 case ARM_AM::ib: return ARM::LDMIB_UPD;
1008 case ARM_AM::da: return ARM::LDMDA_UPD;
1009 case ARM_AM::db: return ARM::LDMDB_UPD;
1016 default: llvm_unreachable("Unhandled submode!");
1017 case ARM_AM::ia: return ARM::STMIA_UPD;
1018 case ARM_AM::ib: return ARM::STMIB_UPD;
1019 case ARM_AM::da: return ARM::STMDA_UPD;
1020 case ARM_AM::db: return ARM::STMDB_UPD;
1025 default: llvm_unreachable("Unhandled submode!");
1026 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1027 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1032 default: llvm_unreachable("Unhandled submode!");
1033 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1034 case ARM_AM::db: return ARM::t2STMDB_UPD;
1038 default: llvm_unreachable("Unhandled submode!");
1039 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1040 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1044 default: llvm_unreachable("Unhandled submode!");
1045 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1046 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1050 default: llvm_unreachable("Unhandled submode!");
1051 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1052 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1056 default: llvm_unreachable("Unhandled submode!");
1057 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1058 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1063 /// Check if the given instruction increments or decrements a register and
1064 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1065 /// generated by the instruction are possibly read as well.
1066 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1067 ARMCC::CondCodes Pred, unsigned PredReg) {
1070 switch (MI.getOpcode()) {
1071 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1072 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1074 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1076 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1077 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1078 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1083 if (MI.getOperand(0).getReg() != Reg ||
1084 MI.getOperand(1).getReg() != Reg ||
1085 getInstrPredicate(&MI, MIPredReg) != Pred ||
1086 MIPredReg != PredReg)
1089 if (CheckCPSRDef && definesCPSR(&MI))
1091 return MI.getOperand(2).getImm() * Scale;
1094 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1095 static MachineBasicBlock::iterator
1096 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1097 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1099 MachineBasicBlock &MBB = *MBBI->getParent();
1100 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1101 MachineBasicBlock::iterator EndMBBI = MBB.end();
1102 if (MBBI == BeginMBBI)
1105 // Skip debug values.
1106 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1107 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1110 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1111 return Offset == 0 ? EndMBBI : PrevMBBI;
1114 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1115 static MachineBasicBlock::iterator
1116 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1117 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1119 MachineBasicBlock &MBB = *MBBI->getParent();
1120 MachineBasicBlock::iterator EndMBBI = MBB.end();
1121 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1122 // Skip debug values.
1123 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1125 if (NextMBBI == EndMBBI)
1128 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1129 return Offset == 0 ? EndMBBI : NextMBBI;
1132 /// Fold proceeding/trailing inc/dec of base register into the
1133 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1135 /// stmia rn, <ra, rb, rc>
1136 /// rn := rn + 4 * 3;
1138 /// stmia rn!, <ra, rb, rc>
1140 /// rn := rn - 4 * 3;
1141 /// ldmia rn, <ra, rb, rc>
1143 /// ldmdb rn!, <ra, rb, rc>
1144 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1145 // Thumb1 is already using updating loads/stores.
1146 if (isThumb1) return false;
1148 const MachineOperand &BaseOP = MI->getOperand(0);
1149 unsigned Base = BaseOP.getReg();
1150 bool BaseKill = BaseOP.isKill();
1151 unsigned PredReg = 0;
1152 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1153 unsigned Opcode = MI->getOpcode();
1154 DebugLoc DL = MI->getDebugLoc();
1156 // Can't use an updating ld/st if the base register is also a dest
1157 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1158 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1159 if (MI->getOperand(i).getReg() == Base)
1162 int Bytes = getLSMultipleTransferSize(MI);
1163 MachineBasicBlock &MBB = *MI->getParent();
1164 MachineBasicBlock::iterator MBBI(MI);
1166 MachineBasicBlock::iterator MergeInstr
1167 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1168 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1169 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1171 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1174 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1175 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1176 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1179 MBB.erase(MergeInstr);
1181 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1182 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1183 .addReg(Base, getDefRegState(true)) // WB base register
1184 .addReg(Base, getKillRegState(BaseKill))
1185 .addImm(Pred).addReg(PredReg);
1187 // Transfer the rest of operands.
1188 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1189 MIB.addOperand(MI->getOperand(OpNum));
1191 // Transfer memoperands.
1192 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1198 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1199 ARM_AM::AddrOpc Mode) {
1202 return ARM::LDR_PRE_IMM;
1204 return ARM::STR_PRE_IMM;
1206 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1208 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1210 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1212 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1215 return ARM::t2LDR_PRE;
1218 return ARM::t2STR_PRE;
1219 default: llvm_unreachable("Unhandled opcode!");
1223 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1224 ARM_AM::AddrOpc Mode) {
1227 return ARM::LDR_POST_IMM;
1229 return ARM::STR_POST_IMM;
1231 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1233 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1235 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1237 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1240 return ARM::t2LDR_POST;
1243 return ARM::t2STR_POST;
1244 default: llvm_unreachable("Unhandled opcode!");
1248 /// Fold proceeding/trailing inc/dec of base register into the
1249 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1250 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1251 // Thumb1 doesn't have updating LDR/STR.
1252 // FIXME: Use LDM/STM with single register instead.
1253 if (isThumb1) return false;
1255 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1256 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1257 unsigned Opcode = MI->getOpcode();
1258 DebugLoc DL = MI->getDebugLoc();
1259 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1260 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1261 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1262 if (isi32Load(Opcode) || isi32Store(Opcode))
1263 if (MI->getOperand(2).getImm() != 0)
1265 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1268 // Can't do the merge if the destination register is the same as the would-be
1269 // writeback register.
1270 if (MI->getOperand(0).getReg() == Base)
1273 unsigned PredReg = 0;
1274 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1275 int Bytes = getLSMultipleTransferSize(MI);
1276 MachineBasicBlock &MBB = *MI->getParent();
1277 MachineBasicBlock::iterator MBBI(MI);
1279 MachineBasicBlock::iterator MergeInstr
1280 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1282 if (!isAM5 && Offset == Bytes) {
1283 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1284 } else if (Offset == -Bytes) {
1285 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1287 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1288 if (Offset == Bytes) {
1289 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1290 } else if (!isAM5 && Offset == -Bytes) {
1291 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1295 MBB.erase(MergeInstr);
1297 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1299 bool isLd = isLoadSingle(Opcode);
1301 // VLDM[SD]_UPD, VSTM[SD]_UPD
1302 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1303 // updating load/store-multiple instructions can be used with only one
1305 MachineOperand &MO = MI->getOperand(0);
1306 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1307 .addReg(Base, getDefRegState(true)) // WB base register
1308 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1309 .addImm(Pred).addReg(PredReg)
1310 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1311 getKillRegState(MO.isKill())));
1314 // LDR_PRE, LDR_POST
1315 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1316 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1317 .addReg(Base, RegState::Define)
1318 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1320 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1322 .addReg(Base, RegState::Define)
1323 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1326 // t2LDR_PRE, t2LDR_POST
1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1328 .addReg(Base, RegState::Define)
1329 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1332 MachineOperand &MO = MI->getOperand(0);
1333 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1334 // the vestigal zero-reg offset register. When that's fixed, this clause
1335 // can be removed entirely.
1336 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1337 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1338 // STR_PRE, STR_POST
1339 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1340 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1341 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1343 // t2STR_PRE, t2STR_POST
1344 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1345 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1346 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1354 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1355 unsigned Opcode = MI.getOpcode();
1356 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1357 "Must have t2STRDi8 or t2LDRDi8");
1358 if (MI.getOperand(3).getImm() != 0)
1361 // Behaviour for writeback is undefined if base register is the same as one
1363 const MachineOperand &BaseOp = MI.getOperand(2);
1364 unsigned Base = BaseOp.getReg();
1365 const MachineOperand &Reg0Op = MI.getOperand(0);
1366 const MachineOperand &Reg1Op = MI.getOperand(1);
1367 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1371 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1372 MachineBasicBlock::iterator MBBI(MI);
1373 MachineBasicBlock &MBB = *MI.getParent();
1375 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1378 if (Offset == 8 || Offset == -8) {
1379 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1381 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1382 if (Offset == 8 || Offset == -8) {
1383 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1387 MBB.erase(MergeInstr);
1389 DebugLoc DL = MI.getDebugLoc();
1390 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1391 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1392 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1393 .addReg(BaseOp.getReg(), RegState::Define);
1395 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1396 MIB.addReg(BaseOp.getReg(), RegState::Define)
1397 .addOperand(Reg0Op).addOperand(Reg1Op);
1399 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1400 .addImm(Offset).addImm(Pred).addReg(PredReg);
1401 assert(TII->get(Opcode).getNumOperands() == 6 &&
1402 TII->get(NewOpc).getNumOperands() == 7 &&
1403 "Unexpected number of operands in Opcode specification.");
1405 // Transfer implicit operands.
1406 for (const MachineOperand &MO : MI.implicit_operands())
1408 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1414 /// Returns true if instruction is a memory operation that this pass is capable
1415 /// of operating on.
1416 static bool isMemoryOp(const MachineInstr *MI) {
1417 // When no memory operands are present, conservatively assume unaligned,
1418 // volatile, unfoldable.
1419 if (!MI->hasOneMemOperand())
1422 const MachineMemOperand *MMO = *MI->memoperands_begin();
1424 // Don't touch volatile memory accesses - we may be changing their order.
1425 if (MMO->isVolatile())
1428 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1430 if (MMO->getAlignment() < 4)
1433 // str <undef> could probably be eliminated entirely, but for now we just want
1434 // to avoid making a mess of it.
1435 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1436 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1437 MI->getOperand(0).isUndef())
1440 // Likewise don't mess with references to undefined addresses.
1441 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1442 MI->getOperand(1).isUndef())
1445 unsigned Opcode = MI->getOpcode();
1450 return MI->getOperand(1).isReg();
1453 return MI->getOperand(1).isReg();
1464 return MI->getOperand(1).isReg();
1469 static void InsertLDR_STR(MachineBasicBlock &MBB,
1470 MachineBasicBlock::iterator &MBBI,
1471 int Offset, bool isDef,
1472 DebugLoc DL, unsigned NewOpc,
1473 unsigned Reg, bool RegDeadKill, bool RegUndef,
1474 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1475 bool OffKill, bool OffUndef,
1476 ARMCC::CondCodes Pred, unsigned PredReg,
1477 const TargetInstrInfo *TII, bool isT2) {
1479 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1481 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1482 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1483 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1485 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1487 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1488 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1493 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1494 MachineBasicBlock::iterator &MBBI) {
1495 MachineInstr *MI = &*MBBI;
1496 unsigned Opcode = MI->getOpcode();
1497 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1500 const MachineOperand &BaseOp = MI->getOperand(2);
1501 unsigned BaseReg = BaseOp.getReg();
1502 unsigned EvenReg = MI->getOperand(0).getReg();
1503 unsigned OddReg = MI->getOperand(1).getReg();
1504 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1505 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1507 // ARM errata 602117: LDRD with base in list may result in incorrect base
1508 // register when interrupted or faulted.
1509 bool Errata602117 = EvenReg == BaseReg &&
1510 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1511 // ARM LDRD/STRD needs consecutive registers.
1512 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1513 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1515 if (!Errata602117 && !NonConsecutiveRegs)
1518 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1519 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1520 bool EvenDeadKill = isLd ?
1521 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1522 bool EvenUndef = MI->getOperand(0).isUndef();
1523 bool OddDeadKill = isLd ?
1524 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1525 bool OddUndef = MI->getOperand(1).isUndef();
1526 bool BaseKill = BaseOp.isKill();
1527 bool BaseUndef = BaseOp.isUndef();
1528 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1529 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1530 int OffImm = getMemoryOpOffset(MI);
1531 unsigned PredReg = 0;
1532 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1534 if (OddRegNum > EvenRegNum && OffImm == 0) {
1535 // Ascending register numbers and no offset. It's safe to change it to a
1537 unsigned NewOpc = (isLd)
1538 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1539 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1541 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1542 .addReg(BaseReg, getKillRegState(BaseKill))
1543 .addImm(Pred).addReg(PredReg)
1544 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1545 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1548 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1549 .addReg(BaseReg, getKillRegState(BaseKill))
1550 .addImm(Pred).addReg(PredReg)
1552 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1554 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1558 // Split into two instructions.
1559 unsigned NewOpc = (isLd)
1560 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1561 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1562 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1563 // so adjust and use t2LDRi12 here for that.
1564 unsigned NewOpc2 = (isLd)
1565 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1566 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1567 DebugLoc dl = MBBI->getDebugLoc();
1568 // If this is a load and base register is killed, it may have been
1569 // re-defed by the load, make sure the first load does not clobber it.
1571 (BaseKill || OffKill) &&
1572 (TRI->regsOverlap(EvenReg, BaseReg))) {
1573 assert(!TRI->regsOverlap(OddReg, BaseReg));
1574 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1575 OddReg, OddDeadKill, false,
1576 BaseReg, false, BaseUndef, false, OffUndef,
1577 Pred, PredReg, TII, isT2);
1578 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1579 EvenReg, EvenDeadKill, false,
1580 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1581 Pred, PredReg, TII, isT2);
1583 if (OddReg == EvenReg && EvenDeadKill) {
1584 // If the two source operands are the same, the kill marker is
1585 // probably on the first one. e.g.
1586 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1587 EvenDeadKill = false;
1590 // Never kill the base register in the first instruction.
1591 if (EvenReg == BaseReg)
1592 EvenDeadKill = false;
1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1594 EvenReg, EvenDeadKill, EvenUndef,
1595 BaseReg, false, BaseUndef, false, OffUndef,
1596 Pred, PredReg, TII, isT2);
1597 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1598 OddReg, OddDeadKill, OddUndef,
1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1600 Pred, PredReg, TII, isT2);
1608 MBBI = MBB.erase(MBBI);
1612 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1613 /// incrementing offset into LDM / STM ops.
1614 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1616 unsigned CurrBase = 0;
1617 unsigned CurrOpc = ~0u;
1618 ARMCC::CondCodes CurrPred = ARMCC::AL;
1619 unsigned Position = 0;
1620 assert(Candidates.size() == 0);
1621 assert(MergeBaseCandidates.size() == 0);
1622 LiveRegsValid = false;
1624 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1626 // The instruction in front of the iterator is the one we look at.
1627 MBBI = std::prev(I);
1628 if (FixInvalidRegPairOp(MBB, MBBI))
1632 if (isMemoryOp(MBBI)) {
1633 unsigned Opcode = MBBI->getOpcode();
1634 const MachineOperand &MO = MBBI->getOperand(0);
1635 unsigned Reg = MO.getReg();
1636 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1637 unsigned PredReg = 0;
1638 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1639 int Offset = getMemoryOpOffset(MBBI);
1640 if (CurrBase == 0) {
1641 // Start of a new chain.
1645 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1648 // Note: No need to match PredReg in the next if.
1649 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1651 // r4 := ldr [r0, #8]
1652 // r4 := ldr [r0, #4]
1655 // If a load overrides the base register or a register loaded by
1656 // another load in our chain, we cannot take this instruction.
1657 bool Overlap = false;
1658 if (isLoadSingle(Opcode)) {
1659 Overlap = (Base == Reg);
1661 for (const MemOpQueueEntry &E : MemOps) {
1662 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1671 // Check offset and sort memory operation into the current chain.
1672 if (Offset > MemOps.back().Offset) {
1673 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1676 MemOpQueue::iterator MI, ME;
1677 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1678 if (Offset < MI->Offset) {
1679 // Found a place to insert.
1682 if (Offset == MI->Offset) {
1683 // Collision, abort.
1688 if (MI != MemOps.end()) {
1689 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1696 // Don't advance the iterator; The op will start a new chain next.
1699 // Fallthrough to look into existing chain.
1700 } else if (MBBI->isDebugValue()) {
1702 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1703 MBBI->getOpcode() == ARM::t2STRDi8) {
1704 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1705 // remember them because we may still be able to merge add/sub into them.
1706 MergeBaseCandidates.push_back(MBBI);
1710 // If we are here then the chain is broken; Extract candidates for a merge.
1711 if (MemOps.size() > 0) {
1712 FormCandidates(MemOps);
1713 // Reset for the next chain.
1716 CurrPred = ARMCC::AL;
1720 if (MemOps.size() > 0)
1721 FormCandidates(MemOps);
1723 // Sort candidates so they get processed from end to begin of the basic
1724 // block later; This is necessary for liveness calculation.
1725 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1726 return M0->InsertPos < M1->InsertPos;
1728 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1730 // Go through list of candidates and merge.
1731 bool Changed = false;
1732 for (const MergeCandidate *Candidate : Candidates) {
1733 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1734 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1735 // Merge preceding/trailing base inc/dec into the merged op.
1738 unsigned Opcode = Merged->getOpcode();
1739 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1740 MergeBaseUpdateLSDouble(*Merged);
1742 MergeBaseUpdateLSMultiple(Merged);
1744 for (MachineInstr *MI : Candidate->Instrs) {
1745 if (MergeBaseUpdateLoadStore(MI))
1750 assert(Candidate->Instrs.size() == 1);
1751 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1756 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1757 for (MachineInstr *MI : MergeBaseCandidates)
1758 MergeBaseUpdateLSDouble(*MI);
1759 MergeBaseCandidates.clear();
1764 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1765 /// into the preceding stack restore so it directly restore the value of LR
1767 /// ldmfd sp!, {..., lr}
1770 /// ldmfd sp!, {..., lr}
1773 /// ldmfd sp!, {..., pc}
1774 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1775 // Thumb1 LDM doesn't allow high registers.
1776 if (isThumb1) return false;
1777 if (MBB.empty()) return false;
1779 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1780 if (MBBI != MBB.begin() &&
1781 (MBBI->getOpcode() == ARM::BX_RET ||
1782 MBBI->getOpcode() == ARM::tBX_RET ||
1783 MBBI->getOpcode() == ARM::MOVPCLR)) {
1784 MachineInstr *PrevMI = std::prev(MBBI);
1785 unsigned Opcode = PrevMI->getOpcode();
1786 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1787 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1788 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1789 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1790 if (MO.getReg() != ARM::LR)
1792 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1793 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1794 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1795 PrevMI->setDesc(TII->get(NewOpc));
1797 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1805 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1807 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1808 TL = STI->getTargetLowering();
1809 AFI = Fn.getInfo<ARMFunctionInfo>();
1810 TII = STI->getInstrInfo();
1811 TRI = STI->getRegisterInfo();
1812 MRI = &Fn.getRegInfo();
1813 RegClassInfoValid = false;
1814 isThumb2 = AFI->isThumb2Function();
1815 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1817 bool Modified = false;
1818 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1820 MachineBasicBlock &MBB = *MFI;
1821 Modified |= LoadStoreMultipleOpti(MBB);
1822 if (STI->hasV5TOps())
1823 Modified |= MergeReturnIntoLDM(MBB);
1826 Allocator.DestroyAll();
1831 /// Pre- register allocation pass that move load / stores from consecutive
1832 /// locations close to make it more likely they will be combined later.
1833 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1835 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1837 const DataLayout *TD;
1838 const TargetInstrInfo *TII;
1839 const TargetRegisterInfo *TRI;
1840 const ARMSubtarget *STI;
1841 MachineRegisterInfo *MRI;
1842 MachineFunction *MF;
1844 bool runOnMachineFunction(MachineFunction &Fn) override;
1846 const char *getPassName() const override {
1847 return "ARM pre- register allocation load / store optimization pass";
1851 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1852 unsigned &NewOpc, unsigned &EvenReg,
1853 unsigned &OddReg, unsigned &BaseReg,
1855 unsigned &PredReg, ARMCC::CondCodes &Pred,
1857 bool RescheduleOps(MachineBasicBlock *MBB,
1858 SmallVectorImpl<MachineInstr *> &Ops,
1859 unsigned Base, bool isLd,
1860 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1861 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1863 char ARMPreAllocLoadStoreOpt::ID = 0;
1866 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1867 TD = Fn.getTarget().getDataLayout();
1868 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1869 TII = STI->getInstrInfo();
1870 TRI = STI->getRegisterInfo();
1871 MRI = &Fn.getRegInfo();
1874 bool Modified = false;
1875 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1877 Modified |= RescheduleLoadStoreInstrs(MFI);
1882 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1883 MachineBasicBlock::iterator I,
1884 MachineBasicBlock::iterator E,
1885 SmallPtrSetImpl<MachineInstr*> &MemOps,
1886 SmallSet<unsigned, 4> &MemRegs,
1887 const TargetRegisterInfo *TRI) {
1888 // Are there stores / loads / calls between them?
1889 // FIXME: This is overly conservative. We should make use of alias information
1891 SmallSet<unsigned, 4> AddedRegPressure;
1893 if (I->isDebugValue() || MemOps.count(&*I))
1895 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1897 if (isLd && I->mayStore())
1902 // It's not safe to move the first 'str' down.
1905 // str r4, [r0, #+4]
1909 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1910 MachineOperand &MO = I->getOperand(j);
1913 unsigned Reg = MO.getReg();
1914 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1916 if (Reg != Base && !MemRegs.count(Reg))
1917 AddedRegPressure.insert(Reg);
1921 // Estimate register pressure increase due to the transformation.
1922 if (MemRegs.size() <= 4)
1923 // Ok if we are moving small number of instructions.
1925 return AddedRegPressure.size() <= MemRegs.size() * 2;
1929 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1930 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1931 MachineInstr *Op1) {
1932 assert(MI->memoperands_empty() && "expected a new machineinstr");
1933 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1934 + (Op1->memoperands_end() - Op1->memoperands_begin());
1936 MachineFunction *MF = MI->getParent()->getParent();
1937 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1938 MachineSDNode::mmo_iterator MemEnd =
1939 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1941 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1942 MI->setMemRefs(MemBegin, MemEnd);
1946 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1947 DebugLoc &dl, unsigned &NewOpc,
1949 unsigned &SecondReg,
1950 unsigned &BaseReg, int &Offset,
1952 ARMCC::CondCodes &Pred,
1954 // Make sure we're allowed to generate LDRD/STRD.
1955 if (!STI->hasV5TEOps())
1958 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1960 unsigned Opcode = Op0->getOpcode();
1961 if (Opcode == ARM::LDRi12) {
1963 } else if (Opcode == ARM::STRi12) {
1965 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1966 NewOpc = ARM::t2LDRDi8;
1969 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1970 NewOpc = ARM::t2STRDi8;
1977 // Make sure the base address satisfies i64 ld / st alignment requirement.
1978 // At the moment, we ignore the memoryoperand's value.
1979 // If we want to use AliasAnalysis, we should check it accordingly.
1980 if (!Op0->hasOneMemOperand() ||
1981 (*Op0->memoperands_begin())->isVolatile())
1984 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1985 const Function *Func = MF->getFunction();
1986 unsigned ReqAlign = STI->hasV6Ops()
1987 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1988 : 8; // Pre-v6 need 8-byte align
1989 if (Align < ReqAlign)
1992 // Then make sure the immediate offset fits.
1993 int OffImm = getMemoryOpOffset(Op0);
1995 int Limit = (1 << 8) * Scale;
1996 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2000 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2002 AddSub = ARM_AM::sub;
2005 int Limit = (1 << 8) * Scale;
2006 if (OffImm >= Limit || (OffImm & (Scale-1)))
2008 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2010 FirstReg = Op0->getOperand(0).getReg();
2011 SecondReg = Op1->getOperand(0).getReg();
2012 if (FirstReg == SecondReg)
2014 BaseReg = Op0->getOperand(1).getReg();
2015 Pred = getInstrPredicate(Op0, PredReg);
2016 dl = Op0->getDebugLoc();
2020 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2021 SmallVectorImpl<MachineInstr *> &Ops,
2022 unsigned Base, bool isLd,
2023 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2024 bool RetVal = false;
2026 // Sort by offset (in reverse order).
2027 std::sort(Ops.begin(), Ops.end(),
2028 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2029 int LOffset = getMemoryOpOffset(LHS);
2030 int ROffset = getMemoryOpOffset(RHS);
2031 assert(LHS == RHS || LOffset != ROffset);
2032 return LOffset > ROffset;
2035 // The loads / stores of the same base are in order. Scan them from first to
2036 // last and check for the following:
2037 // 1. Any def of base.
2039 while (Ops.size() > 1) {
2040 unsigned FirstLoc = ~0U;
2041 unsigned LastLoc = 0;
2042 MachineInstr *FirstOp = nullptr;
2043 MachineInstr *LastOp = nullptr;
2045 unsigned LastOpcode = 0;
2046 unsigned LastBytes = 0;
2047 unsigned NumMove = 0;
2048 for (int i = Ops.size() - 1; i >= 0; --i) {
2049 MachineInstr *Op = Ops[i];
2050 unsigned Loc = MI2LocMap[Op];
2051 if (Loc <= FirstLoc) {
2055 if (Loc >= LastLoc) {
2061 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2062 if (LastOpcode && LSMOpcode != LastOpcode)
2065 int Offset = getMemoryOpOffset(Op);
2066 unsigned Bytes = getLSMultipleTransferSize(Op);
2068 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2071 LastOffset = Offset;
2073 LastOpcode = LSMOpcode;
2074 if (++NumMove == 8) // FIXME: Tune this limit.
2081 SmallPtrSet<MachineInstr*, 4> MemOps;
2082 SmallSet<unsigned, 4> MemRegs;
2083 for (int i = NumMove-1; i >= 0; --i) {
2084 MemOps.insert(Ops[i]);
2085 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2088 // Be conservative, if the instructions are too far apart, don't
2089 // move them. We want to limit the increase of register pressure.
2090 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2092 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2093 MemOps, MemRegs, TRI);
2095 for (unsigned i = 0; i != NumMove; ++i)
2098 // This is the new location for the loads / stores.
2099 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2100 while (InsertPos != MBB->end()
2101 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2104 // If we are moving a pair of loads / stores, see if it makes sense
2105 // to try to allocate a pair of registers that can form register pairs.
2106 MachineInstr *Op0 = Ops.back();
2107 MachineInstr *Op1 = Ops[Ops.size()-2];
2108 unsigned FirstReg = 0, SecondReg = 0;
2109 unsigned BaseReg = 0, PredReg = 0;
2110 ARMCC::CondCodes Pred = ARMCC::AL;
2112 unsigned NewOpc = 0;
2115 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2116 FirstReg, SecondReg, BaseReg,
2117 Offset, PredReg, Pred, isT2)) {
2121 const MCInstrDesc &MCID = TII->get(NewOpc);
2122 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2123 MRI->constrainRegClass(FirstReg, TRC);
2124 MRI->constrainRegClass(SecondReg, TRC);
2126 // Form the pair instruction.
2128 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2129 .addReg(FirstReg, RegState::Define)
2130 .addReg(SecondReg, RegState::Define)
2132 // FIXME: We're converting from LDRi12 to an insn that still
2133 // uses addrmode2, so we need an explicit offset reg. It should
2134 // always by reg0 since we're transforming LDRi12s.
2137 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2138 concatenateMemOperands(MIB, Op0, Op1);
2139 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2142 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2146 // FIXME: We're converting from LDRi12 to an insn that still
2147 // uses addrmode2, so we need an explicit offset reg. It should
2148 // always by reg0 since we're transforming STRi12s.
2151 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2152 concatenateMemOperands(MIB, Op0, Op1);
2153 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2160 // Add register allocation hints to form register pairs.
2161 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2162 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2165 for (unsigned i = 0; i != NumMove; ++i) {
2166 MachineInstr *Op = Ops.back();
2168 MBB->splice(InsertPos, MBB, Op);
2172 NumLdStMoved += NumMove;
2182 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2183 bool RetVal = false;
2185 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2186 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2187 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2188 SmallVector<unsigned, 4> LdBases;
2189 SmallVector<unsigned, 4> StBases;
2192 MachineBasicBlock::iterator MBBI = MBB->begin();
2193 MachineBasicBlock::iterator E = MBB->end();
2195 for (; MBBI != E; ++MBBI) {
2196 MachineInstr *MI = MBBI;
2197 if (MI->isCall() || MI->isTerminator()) {
2198 // Stop at barriers.
2203 if (!MI->isDebugValue())
2204 MI2LocMap[MI] = ++Loc;
2206 if (!isMemoryOp(MI))
2208 unsigned PredReg = 0;
2209 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2212 int Opc = MI->getOpcode();
2213 bool isLd = isLoadSingle(Opc);
2214 unsigned Base = MI->getOperand(1).getReg();
2215 int Offset = getMemoryOpOffset(MI);
2217 bool StopHere = false;
2219 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2220 Base2LdsMap.find(Base);
2221 if (BI != Base2LdsMap.end()) {
2222 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2223 if (Offset == getMemoryOpOffset(BI->second[i])) {
2229 BI->second.push_back(MI);
2231 Base2LdsMap[Base].push_back(MI);
2232 LdBases.push_back(Base);
2235 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2236 Base2StsMap.find(Base);
2237 if (BI != Base2StsMap.end()) {
2238 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2239 if (Offset == getMemoryOpOffset(BI->second[i])) {
2245 BI->second.push_back(MI);
2247 Base2StsMap[Base].push_back(MI);
2248 StBases.push_back(Base);
2253 // Found a duplicate (a base+offset combination that's seen earlier).
2260 // Re-schedule loads.
2261 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2262 unsigned Base = LdBases[i];
2263 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2265 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2268 // Re-schedule stores.
2269 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2270 unsigned Base = StBases[i];
2271 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2273 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2277 Base2LdsMap.clear();
2278 Base2StsMap.clear();
2288 /// Returns an instance of the load / store optimization pass.
2289 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2291 return new ARMPreAllocLoadStoreOpt();
2292 return new ARMLoadStoreOpt();