1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
62 /// Post- register allocation pass the combine load / store instructions to
63 /// form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 unsigned Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 unsigned Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
178 : ARM_AM::getAM5Offset(OffField) * 4;
179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
180 : ARM_AM::getAM5Op(OffField);
182 if (Op == ARM_AM::sub)
188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
190 default: llvm_unreachable("Unhandled opcode!");
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::LDMIA;
196 case ARM_AM::da: return ARM::LDMDA;
197 case ARM_AM::db: return ARM::LDMDB;
198 case ARM_AM::ib: return ARM::LDMIB;
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::STMIA;
205 case ARM_AM::da: return ARM::STMDA;
206 case ARM_AM::db: return ARM::STMDB;
207 case ARM_AM::ib: return ARM::STMIB;
211 // tLDMIA is writeback-only - unless the base register is in the input
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::tLDMIA;
220 // There is no non-writeback tSTMIA either.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::tSTMIA_UPD;
230 default: llvm_unreachable("Unhandled submode!");
231 case ARM_AM::ia: return ARM::t2LDMIA;
232 case ARM_AM::db: return ARM::t2LDMDB;
238 default: llvm_unreachable("Unhandled submode!");
239 case ARM_AM::ia: return ARM::t2STMIA;
240 case ARM_AM::db: return ARM::t2STMDB;
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::VLDMSIA;
247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::VSTMSIA;
254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::VLDMDIA;
261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VSTMDIA;
268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 static bool isT1i32Load(unsigned Opc) {
328 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
331 static bool isT2i32Load(unsigned Opc) {
332 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
335 static bool isi32Load(unsigned Opc) {
336 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
339 static bool isT1i32Store(unsigned Opc) {
340 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
343 static bool isT2i32Store(unsigned Opc) {
344 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
347 static bool isi32Store(unsigned Opc) {
348 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
351 static unsigned getImmScale(unsigned Opc) {
353 default: llvm_unreachable("Unhandled opcode!");
368 /// Update future uses of the base register with the offset introduced
369 /// due to writeback. This function only works on Thumb1.
371 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
372 MachineBasicBlock::iterator MBBI,
373 DebugLoc dl, unsigned Base,
375 ARMCC::CondCodes Pred, unsigned PredReg) {
376 assert(isThumb1 && "Can only update base register uses for Thumb1!");
377 // Start updating any instructions with immediate offsets. Insert a SUB before
378 // the first non-updateable instruction (if any).
379 for (; MBBI != MBB.end(); ++MBBI) {
380 bool InsertSub = false;
381 unsigned Opc = MBBI->getOpcode();
383 if (MBBI->readsRegister(Base)) {
386 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
388 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
390 if (IsLoad || IsStore) {
391 // Loads and stores with immediate offsets can be updated, but only if
392 // the new offset isn't negative.
393 // The MachineOperand containing the offset immediate is the last one
394 // before predicates.
396 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
397 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
398 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
400 // If storing the base register, it needs to be reset first.
401 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
403 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
408 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
409 !definesCPSR(MBBI)) {
410 // SUBS/ADDS using this register, with a dead def of the CPSR.
411 // Merge it with the update; if the merged offset is too large,
412 // insert a new sub instead.
414 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
415 Offset = (Opc == ARM::tSUBi8) ?
416 MO.getImm() + WordOffset * 4 :
417 MO.getImm() - WordOffset * 4 ;
418 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
419 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
422 // The base register has now been reset, so exit early.
429 // Can't update the instruction.
433 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
434 // Since SUBS sets the condition flags, we can't place the base reset
435 // after an instruction that has a live CPSR def.
436 // The base register might also contain an argument for a function call.
441 // An instruction above couldn't be updated, so insert a sub.
442 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
443 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
447 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
448 // Register got killed. Stop updating.
452 // End of block was reached.
453 if (MBB.succ_size() > 0) {
454 // FIXME: Because of a bug, live registers are sometimes missing from
455 // the successor blocks' live-in sets. This means we can't trust that
456 // information and *always* have to reset at the end of a block.
458 if (MBBI != MBB.end()) --MBBI;
460 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
461 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
465 /// Create and insert a LDM or STM with Base as base register and registers in
466 /// Regs as the register operands that would be loaded / stored. It returns
467 /// true if the transformation is done.
469 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
470 MachineBasicBlock::iterator MBBI,
471 int Offset, unsigned Base, bool BaseKill,
472 unsigned Opcode, ARMCC::CondCodes Pred,
473 unsigned PredReg, unsigned Scratch, DebugLoc dl,
474 ArrayRef<std::pair<unsigned, bool> > Regs,
475 ArrayRef<unsigned> ImpDefs) {
476 // Only a single register to load / store. Don't bother.
477 unsigned NumRegs = Regs.size();
481 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
482 // Compute liveness information for that register to make the decision.
483 bool SafeToClobberCPSR = !isThumb1 ||
484 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
485 MachineBasicBlock::LQR_Dead);
487 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
489 // Exception: If the base register is in the input reglist, Thumb1 LDM is
491 // It's also not possible to merge an STR of the base register in Thumb1.
493 for (const std::pair<unsigned, bool> &R : Regs)
494 if (Base == R.first) {
495 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
496 if (Opcode == ARM::tLDRi) {
499 } else if (Opcode == ARM::tSTRi) {
504 ARM_AM::AMSubMode Mode = ARM_AM::ia;
505 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
506 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
507 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
509 if (Offset == 4 && haveIBAndDA) {
511 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
514 // VLDM/VSTM do not support DB mode without also updating the base reg.
516 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
517 // Check if this is a supported opcode before inserting instructions to
518 // calculate a new base register.
519 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
521 // If starting offset isn't zero, insert a MI to materialize a new base.
522 // But only do so if it is cost effective, i.e. merging more than two
527 // On Thumb1, it's not worth materializing a new base register without
528 // clobbering the CPSR (i.e. not using ADDS/SUBS).
529 if (!SafeToClobberCPSR)
533 if (isi32Load(Opcode)) {
534 // If it is a load, then just use one of the destination register to
535 // use as the new base.
536 NewBase = Regs[NumRegs-1].first;
538 // Use the scratch register to use as a new base.
545 isThumb2 ? ARM::t2ADDri :
546 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
547 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
548 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
553 isThumb2 ? ARM::t2SUBri :
554 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
555 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
558 if (!TL->isLegalAddImmediate(Offset))
559 // FIXME: Try add with register operand?
560 return false; // Probably not worth it then.
563 // Thumb1: depending on immediate size, use either
564 // ADDS NewBase, Base, #imm3
567 // ADDS NewBase, #imm8.
568 if (Base != NewBase &&
569 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
570 // Need to insert a MOV to the new base first.
571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
574 if (Pred != ARMCC::AL)
576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
577 .addReg(Base, getKillRegState(BaseKill));
579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
580 .addReg(Base, getKillRegState(BaseKill))
581 .addImm(Pred).addReg(PredReg);
583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
587 if (BaseOpc == ARM::tADDrSPi) {
588 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
589 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
591 .addImm(Pred).addReg(PredReg);
593 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595 .addImm(Pred).addReg(PredReg);
597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
599 .addImm(Pred).addReg(PredReg).addReg(0);
602 BaseKill = true; // New base is always killed straight away.
605 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
606 Opcode == ARM::VLDRD);
608 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
609 // base register writeback.
610 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
611 if (!Opcode) return false;
613 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
614 // - There is no writeback (LDM of base register),
615 // - the base register is killed by the merged instruction,
616 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
617 // to reset the base register.
618 // Otherwise, don't merge.
619 // It's safe to return here since the code to materialize a new base register
620 // above is also conditional on SafeToClobberCPSR.
621 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
624 MachineInstrBuilder MIB;
627 if (Opcode == ARM::tLDMIA)
628 // Update tLDMIA with writeback if necessary.
629 Opcode = ARM::tLDMIA_UPD;
631 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
633 // Thumb1: we might need to set base writeback when building the MI.
634 MIB.addReg(Base, getDefRegState(true))
635 .addReg(Base, getKillRegState(BaseKill));
637 // The base isn't dead after a merged instruction with writeback.
638 // Insert a sub instruction after the newly formed instruction to reset.
640 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
643 // No writeback, simply build the MachineInstr.
644 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
645 MIB.addReg(Base, getKillRegState(BaseKill));
648 MIB.addImm(Pred).addReg(PredReg);
650 for (const std::pair<unsigned, bool> &R : Regs)
651 MIB = MIB.addReg(R.first, getDefRegState(isDef)
652 | getKillRegState(R.second));
654 // Add implicit defs for super-registers.
655 for (unsigned ImpDef : ImpDefs)
656 MIB.addReg(ImpDef, RegState::ImplicitDefine);
661 /// Find all instructions using a given imp-def within a range.
663 /// We are trying to combine a range of instructions, one of which (located at
664 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
665 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
666 /// and RangeEnd must be modified to use an undefined value.
668 /// The live range continues until we find a second definition or one of the
669 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
670 /// we must consider all uses and decide which are relevant in a second pass.
671 void ARMLoadStoreOpt::findUsesOfImpDef(
672 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
673 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
674 std::map<unsigned, MachineOperand *> Uses;
675 unsigned LastLivePos = RangeEnd;
677 // First we find all uses of this register with Position between RangeBegin
678 // and RangeEnd, any or all of these could be uses of a definition at
679 // RangeBegin. We also record the latest position a definition at RangeBegin
680 // would be considered live.
681 for (unsigned i = 0; i < MemOps.size(); ++i) {
682 MachineInstr &MI = *MemOps[i].MBBI;
683 unsigned MIPosition = MemOps[i].Position;
684 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
687 // If this instruction defines the register, then any later use will be of
688 // that definition rather than ours.
689 if (MI.definesRegister(DefReg))
690 LastLivePos = std::min(LastLivePos, MIPosition);
692 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
696 // If this instruction kills the register then (assuming liveness is
697 // correct when we start) we don't need to think about anything after here.
699 LastLivePos = std::min(LastLivePos, MIPosition);
701 Uses[MIPosition] = UseOp;
704 // Now we traverse the list of all uses, and append the ones that actually use
705 // our definition to the requested list.
706 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
709 // List is sorted by position so once we've found one out of range there
710 // will be no more to consider.
711 if (I->first > LastLivePos)
713 UsesOfImpDefs.push_back(I->second);
717 /// Call MergeOps and update MemOps and merges accordingly on success.
718 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
720 unsigned memOpsBegin, unsigned memOpsEnd,
721 unsigned insertAfter, int Offset,
722 unsigned Base, bool BaseKill,
724 ARMCC::CondCodes Pred, unsigned PredReg,
727 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
728 // First calculate which of the registers should be killed by the merged
730 const unsigned insertPos = memOps[insertAfter].Position;
731 SmallSet<unsigned, 4> KilledRegs;
732 DenseMap<unsigned, unsigned> Killer;
733 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
734 if (i == memOpsBegin) {
739 if (memOps[i].Position < insertPos && memOps[i].isKill) {
740 unsigned Reg = memOps[i].Reg;
741 KilledRegs.insert(Reg);
746 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
747 MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
748 if (TransferOp.isUse() && TransferOp.getReg() == Base)
752 SmallVector<std::pair<unsigned, bool>, 8> Regs;
753 SmallVector<unsigned, 8> ImpDefs;
754 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
755 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
756 unsigned Reg = memOps[i].Reg;
757 // If we are inserting the merged operation after an operation that
758 // uses the same register, make sure to transfer any kill flag.
759 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
760 Regs.push_back(std::make_pair(Reg, isKill));
762 // Collect any implicit defs of super-registers. They must be preserved.
763 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
764 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
766 unsigned DefReg = MO.getReg();
767 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
768 ImpDefs.push_back(DefReg);
770 // There may be other uses of the definition between this instruction and
771 // the eventual LDM/STM position. These should be marked undef if the
772 // merge takes place.
773 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
778 // Try to do the merge.
779 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
781 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
782 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
785 // Merge succeeded, update records.
786 Merges.push_back(std::prev(Loc));
788 // In gathering loads together, we may have moved the imp-def of a register
789 // past one of its uses. This is OK, since we know better than the rest of
790 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
792 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
793 E = UsesOfImpDefs.end();
797 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
798 // Remove kill flags from any memops that come before insertPos.
799 if (Regs[i-memOpsBegin].second) {
800 unsigned Reg = Regs[i-memOpsBegin].first;
801 if (KilledRegs.count(Reg)) {
802 unsigned j = Killer[Reg];
803 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
804 assert(Idx >= 0 && "Cannot find killing operand");
805 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
806 memOps[j].isKill = false;
808 memOps[i].isKill = true;
810 MBB.erase(memOps[i].MBBI);
811 // Update this memop to refer to the merged instruction.
812 // We may need to move kill flags again.
813 memOps[i].Merged = true;
814 memOps[i].MBBI = Merges.back();
815 memOps[i].Position = insertPos;
818 // Update memOps offsets, since they may have been modified by MergeOps.
819 for (auto &MemOp : memOps) {
820 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
824 /// Merge a number of load / store instructions into one or more load / store
825 /// multiple instructions.
827 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
828 unsigned Base, unsigned Opcode, unsigned Size,
829 ARMCC::CondCodes Pred, unsigned PredReg,
830 unsigned Scratch, MemOpQueue &MemOps,
831 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
832 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
833 int Offset = MemOps[SIndex].Offset;
834 int SOffset = Offset;
835 unsigned insertAfter = SIndex;
836 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
837 DebugLoc dl = Loc->getDebugLoc();
838 const MachineOperand &PMO = Loc->getOperand(0);
839 unsigned PReg = PMO.getReg();
840 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
842 unsigned Limit = ~0U;
843 bool BaseKill = false;
844 // vldm / vstm limit are 32 for S variants, 16 for D variants.
862 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
863 int NewOffset = MemOps[i].Offset;
864 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
865 unsigned Reg = MO.getReg();
866 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
867 // Register numbers must be in ascending order. For VFP / NEON load and
868 // store multiples, the registers must also be consecutive and within the
869 // limit on the number of registers per instruction.
870 if (Reg != ARM::SP &&
871 NewOffset == Offset + (int)Size &&
872 ((isNotVFP && RegNum > PRegNum) ||
873 ((Count < Limit) && RegNum == PRegNum+1)) &&
874 // On Swift we don't want vldm/vstm to start with a odd register num
875 // because Q register unaligned vldm/vstm need more uops.
876 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
881 // Can't merge this in. Try merge the earlier ones first.
882 // We need to compute BaseKill here because the MemOps may have been
884 BaseKill = Loc->killsRegister(Base);
886 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
887 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
888 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
893 if (MemOps[i].Position > MemOps[insertAfter].Position) {
895 Loc = MemOps[i].MBBI;
899 BaseKill = Loc->killsRegister(Base);
900 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
901 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
904 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
905 unsigned Bytes, unsigned Limit,
906 ARMCC::CondCodes Pred, unsigned PredReg) {
907 unsigned MyPredReg = 0;
911 bool CheckCPSRDef = false;
912 switch (MI->getOpcode()) {
913 default: return false;
923 // Make sure the offset fits in 8 bits.
924 if (Bytes == 0 || (Limit && Bytes >= Limit))
927 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
928 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
929 if (!(MI->getOperand(0).getReg() == Base &&
930 MI->getOperand(1).getReg() == Base &&
931 (MI->getOperand(2).getImm() * Scale) == Bytes &&
932 getInstrPredicate(MI, MyPredReg) == Pred &&
933 MyPredReg == PredReg))
936 return CheckCPSRDef ? !definesCPSR(MI) : true;
939 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
940 unsigned Bytes, unsigned Limit,
941 ARMCC::CondCodes Pred, unsigned PredReg) {
942 unsigned MyPredReg = 0;
946 bool CheckCPSRDef = false;
947 switch (MI->getOpcode()) {
948 default: return false;
958 if (Bytes == 0 || (Limit && Bytes >= Limit))
959 // Make sure the offset fits in 8 bits.
962 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
963 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
964 if (!(MI->getOperand(0).getReg() == Base &&
965 MI->getOperand(1).getReg() == Base &&
966 (MI->getOperand(2).getImm() * Scale) == Bytes &&
967 getInstrPredicate(MI, MyPredReg) == Pred &&
968 MyPredReg == PredReg))
971 return CheckCPSRDef ? !definesCPSR(MI) : true;
974 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
975 switch (MI->getOpcode()) {
1002 case ARM::tLDMIA_UPD:
1003 case ARM::tSTMIA_UPD:
1010 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1017 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1018 ARM_AM::AMSubMode Mode) {
1020 default: llvm_unreachable("Unhandled opcode!");
1026 default: llvm_unreachable("Unhandled submode!");
1027 case ARM_AM::ia: return ARM::LDMIA_UPD;
1028 case ARM_AM::ib: return ARM::LDMIB_UPD;
1029 case ARM_AM::da: return ARM::LDMDA_UPD;
1030 case ARM_AM::db: return ARM::LDMDB_UPD;
1037 default: llvm_unreachable("Unhandled submode!");
1038 case ARM_AM::ia: return ARM::STMIA_UPD;
1039 case ARM_AM::ib: return ARM::STMIB_UPD;
1040 case ARM_AM::da: return ARM::STMDA_UPD;
1041 case ARM_AM::db: return ARM::STMDB_UPD;
1046 default: llvm_unreachable("Unhandled submode!");
1047 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1048 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1053 default: llvm_unreachable("Unhandled submode!");
1054 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1055 case ARM_AM::db: return ARM::t2STMDB_UPD;
1059 default: llvm_unreachable("Unhandled submode!");
1060 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1061 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1065 default: llvm_unreachable("Unhandled submode!");
1066 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1067 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1071 default: llvm_unreachable("Unhandled submode!");
1072 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1073 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1077 default: llvm_unreachable("Unhandled submode!");
1078 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1079 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1084 /// Fold proceeding/trailing inc/dec of base register into the
1085 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1087 /// stmia rn, <ra, rb, rc>
1088 /// rn := rn + 4 * 3;
1090 /// stmia rn!, <ra, rb, rc>
1092 /// rn := rn - 4 * 3;
1093 /// ldmia rn, <ra, rb, rc>
1095 /// ldmdb rn!, <ra, rb, rc>
1096 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1097 MachineBasicBlock::iterator MBBI,
1099 MachineBasicBlock::iterator &I) {
1100 // Thumb1 is already using updating loads/stores.
1101 if (isThumb1) return false;
1103 MachineInstr *MI = MBBI;
1104 unsigned Base = MI->getOperand(0).getReg();
1105 bool BaseKill = MI->getOperand(0).isKill();
1106 unsigned Bytes = getLSMultipleTransferSize(MI);
1107 unsigned PredReg = 0;
1108 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1109 unsigned Opcode = MI->getOpcode();
1110 DebugLoc dl = MI->getDebugLoc();
1112 // Can't use an updating ld/st if the base register is also a dest
1113 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1114 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1115 if (MI->getOperand(i).getReg() == Base)
1118 bool DoMerge = false;
1119 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1121 // Try merging with the previous instruction.
1122 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1123 if (MBBI != BeginMBBI) {
1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1127 if (Mode == ARM_AM::ia &&
1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1131 } else if (Mode == ARM_AM::ib &&
1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 MBB.erase(PrevMBBI);
1140 // Try merging with the next instruction.
1141 MachineBasicBlock::iterator EndMBBI = MBB.end();
1142 if (!DoMerge && MBBI != EndMBBI) {
1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1154 if (NextMBBI == I) {
1158 MBB.erase(NextMBBI);
1165 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1166 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1167 .addReg(Base, getDefRegState(true)) // WB base register
1168 .addReg(Base, getKillRegState(BaseKill))
1169 .addImm(Pred).addReg(PredReg);
1171 // Transfer the rest of operands.
1172 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1173 MIB.addOperand(MI->getOperand(OpNum));
1175 // Transfer memoperands.
1176 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1182 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1183 ARM_AM::AddrOpc Mode) {
1186 return ARM::LDR_PRE_IMM;
1188 return ARM::STR_PRE_IMM;
1190 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1192 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1194 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1196 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1199 return ARM::t2LDR_PRE;
1202 return ARM::t2STR_PRE;
1203 default: llvm_unreachable("Unhandled opcode!");
1207 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1208 ARM_AM::AddrOpc Mode) {
1211 return ARM::LDR_POST_IMM;
1213 return ARM::STR_POST_IMM;
1215 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1217 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1219 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1221 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1224 return ARM::t2LDR_POST;
1227 return ARM::t2STR_POST;
1228 default: llvm_unreachable("Unhandled opcode!");
1232 /// Fold proceeding/trailing inc/dec of base register into the
1233 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1234 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1235 MachineBasicBlock::iterator MBBI,
1236 const TargetInstrInfo *TII,
1238 MachineBasicBlock::iterator &I) {
1239 // Thumb1 doesn't have updating LDR/STR.
1240 // FIXME: Use LDM/STM with single register instead.
1241 if (isThumb1) return false;
1243 MachineInstr *MI = MBBI;
1244 unsigned Base = MI->getOperand(1).getReg();
1245 bool BaseKill = MI->getOperand(1).isKill();
1246 unsigned Bytes = getLSMultipleTransferSize(MI);
1247 unsigned Opcode = MI->getOpcode();
1248 DebugLoc dl = MI->getDebugLoc();
1249 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1250 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1251 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1252 if (isi32Load(Opcode) || isi32Store(Opcode))
1253 if (MI->getOperand(2).getImm() != 0)
1255 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1258 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1259 // Can't do the merge if the destination register is the same as the would-be
1260 // writeback register.
1261 if (MI->getOperand(0).getReg() == Base)
1264 unsigned PredReg = 0;
1265 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1266 bool DoMerge = false;
1267 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1268 unsigned NewOpc = 0;
1269 // AM2 - 12 bits, thumb2 - 8 bits.
1270 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1272 // Try merging with the previous instruction.
1273 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1274 if (MBBI != BeginMBBI) {
1275 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1276 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1278 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1280 AddSub = ARM_AM::sub;
1281 } else if (!isAM5 &&
1282 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1286 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1287 MBB.erase(PrevMBBI);
1291 // Try merging with the next instruction.
1292 MachineBasicBlock::iterator EndMBBI = MBB.end();
1293 if (!DoMerge && MBBI != EndMBBI) {
1294 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1295 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1298 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1300 AddSub = ARM_AM::sub;
1301 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1305 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1306 if (NextMBBI == I) {
1310 MBB.erase(NextMBBI);
1318 // VLDM[SD]_UPD, VSTM[SD]_UPD
1319 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1320 // updating load/store-multiple instructions can be used with only one
1322 MachineOperand &MO = MI->getOperand(0);
1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1324 .addReg(Base, getDefRegState(true)) // WB base register
1325 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1326 .addImm(Pred).addReg(PredReg)
1327 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1328 getKillRegState(MO.isKill())));
1331 // LDR_PRE, LDR_POST
1332 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1335 .addReg(Base, RegState::Define)
1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1339 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1340 .addReg(Base, RegState::Define)
1341 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1344 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1345 // t2LDR_PRE, t2LDR_POST
1346 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1347 .addReg(Base, RegState::Define)
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 MachineOperand &MO = MI->getOperand(0);
1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1353 // the vestigal zero-reg offset register. When that's fixed, this clause
1354 // can be removed entirely.
1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1356 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1357 // STR_PRE, STR_POST
1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1362 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1363 // t2STR_PRE, t2STR_POST
1364 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1365 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1374 /// Returns true if instruction is a memory operation that this pass is capable
1375 /// of operating on.
1376 static bool isMemoryOp(const MachineInstr *MI) {
1377 // When no memory operands are present, conservatively assume unaligned,
1378 // volatile, unfoldable.
1379 if (!MI->hasOneMemOperand())
1382 const MachineMemOperand *MMO = *MI->memoperands_begin();
1384 // Don't touch volatile memory accesses - we may be changing their order.
1385 if (MMO->isVolatile())
1388 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1390 if (MMO->getAlignment() < 4)
1393 // str <undef> could probably be eliminated entirely, but for now we just want
1394 // to avoid making a mess of it.
1395 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1396 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1397 MI->getOperand(0).isUndef())
1400 // Likewise don't mess with references to undefined addresses.
1401 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1402 MI->getOperand(1).isUndef())
1405 unsigned Opcode = MI->getOpcode();
1410 return MI->getOperand(1).isReg();
1413 return MI->getOperand(1).isReg();
1424 return MI->getOperand(1).isReg();
1429 /// Advance register scavenger to just before the earliest memory op that is
1431 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1432 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1433 unsigned Position = MemOps[0].Position;
1434 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1435 if (MemOps[i].Position < Position) {
1436 Position = MemOps[i].Position;
1437 Loc = MemOps[i].MBBI;
1441 if (Loc != MBB.begin())
1442 RS->forward(std::prev(Loc));
1445 static void InsertLDR_STR(MachineBasicBlock &MBB,
1446 MachineBasicBlock::iterator &MBBI,
1447 int Offset, bool isDef,
1448 DebugLoc dl, unsigned NewOpc,
1449 unsigned Reg, bool RegDeadKill, bool RegUndef,
1450 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1451 bool OffKill, bool OffUndef,
1452 ARMCC::CondCodes Pred, unsigned PredReg,
1453 const TargetInstrInfo *TII, bool isT2) {
1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1457 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1461 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1463 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1464 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1465 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1469 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1470 MachineBasicBlock::iterator &MBBI) {
1471 MachineInstr *MI = &*MBBI;
1472 unsigned Opcode = MI->getOpcode();
1473 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1476 const MachineOperand &BaseOp = MI->getOperand(2);
1477 unsigned BaseReg = BaseOp.getReg();
1478 unsigned EvenReg = MI->getOperand(0).getReg();
1479 unsigned OddReg = MI->getOperand(1).getReg();
1480 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1481 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1483 // ARM errata 602117: LDRD with base in list may result in incorrect base
1484 // register when interrupted or faulted.
1485 bool Errata602117 = EvenReg == BaseReg &&
1486 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1487 // ARM LDRD/STRD needs consecutive registers.
1488 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1489 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1491 if (!Errata602117 && !NonConsecutiveRegs)
1494 MachineBasicBlock::iterator NewBBI = MBBI;
1495 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1496 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1497 bool EvenDeadKill = isLd ?
1498 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1499 bool EvenUndef = MI->getOperand(0).isUndef();
1500 bool OddDeadKill = isLd ?
1501 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1502 bool OddUndef = MI->getOperand(1).isUndef();
1503 bool BaseKill = BaseOp.isKill();
1504 bool BaseUndef = BaseOp.isUndef();
1505 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1506 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1507 int OffImm = getMemoryOpOffset(MI);
1508 unsigned PredReg = 0;
1509 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1511 if (OddRegNum > EvenRegNum && OffImm == 0) {
1512 // Ascending register numbers and no offset. It's safe to change it to a
1514 unsigned NewOpc = (isLd)
1515 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1516 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1518 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1519 .addReg(BaseReg, getKillRegState(BaseKill))
1520 .addImm(Pred).addReg(PredReg)
1521 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1522 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1525 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1526 .addReg(BaseReg, getKillRegState(BaseKill))
1527 .addImm(Pred).addReg(PredReg)
1529 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1531 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1534 NewBBI = std::prev(MBBI);
1536 // Split into two instructions.
1537 unsigned NewOpc = (isLd)
1538 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1539 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1540 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1541 // so adjust and use t2LDRi12 here for that.
1542 unsigned NewOpc2 = (isLd)
1543 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1544 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1545 DebugLoc dl = MBBI->getDebugLoc();
1546 // If this is a load and base register is killed, it may have been
1547 // re-defed by the load, make sure the first load does not clobber it.
1549 (BaseKill || OffKill) &&
1550 (TRI->regsOverlap(EvenReg, BaseReg))) {
1551 assert(!TRI->regsOverlap(OddReg, BaseReg));
1552 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1553 OddReg, OddDeadKill, false,
1554 BaseReg, false, BaseUndef, false, OffUndef,
1555 Pred, PredReg, TII, isT2);
1556 NewBBI = std::prev(MBBI);
1557 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1558 EvenReg, EvenDeadKill, false,
1559 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1560 Pred, PredReg, TII, isT2);
1562 if (OddReg == EvenReg && EvenDeadKill) {
1563 // If the two source operands are the same, the kill marker is
1564 // probably on the first one. e.g.
1565 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1566 EvenDeadKill = false;
1569 // Never kill the base register in the first instruction.
1570 if (EvenReg == BaseReg)
1571 EvenDeadKill = false;
1572 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1573 EvenReg, EvenDeadKill, EvenUndef,
1574 BaseReg, false, BaseUndef, false, OffUndef,
1575 Pred, PredReg, TII, isT2);
1576 NewBBI = std::prev(MBBI);
1577 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1578 OddReg, OddDeadKill, OddUndef,
1579 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1580 Pred, PredReg, TII, isT2);
1593 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1594 /// incrementing offset into LDM / STM ops.
1595 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1596 unsigned NumMerges = 0;
1597 unsigned NumMemOps = 0;
1599 unsigned CurrBase = 0;
1600 unsigned CurrOpc = ~0u;
1601 unsigned CurrSize = 0;
1602 ARMCC::CondCodes CurrPred = ARMCC::AL;
1603 unsigned CurrPredReg = 0;
1604 unsigned Position = 0;
1605 SmallVector<MachineBasicBlock::iterator,4> Merges;
1607 RS->enterBasicBlock(&MBB);
1608 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1610 if (FixInvalidRegPairOp(MBB, MBBI))
1613 bool Advance = false;
1614 bool TryMerge = false;
1616 bool isMemOp = isMemoryOp(MBBI);
1618 unsigned Opcode = MBBI->getOpcode();
1619 unsigned Size = getLSMultipleTransferSize(MBBI);
1620 const MachineOperand &MO = MBBI->getOperand(0);
1621 unsigned Reg = MO.getReg();
1622 bool isKill = MO.isDef() ? false : MO.isKill();
1623 unsigned Base = MBBI->getOperand(1).getReg();
1624 unsigned PredReg = 0;
1625 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1626 int Offset = getMemoryOpOffset(MBBI);
1629 // r5 := ldr [r5, #4]
1630 // r6 := ldr [r5, #8]
1632 // The second ldr has effectively broken the chain even though it
1633 // looks like the later ldr(s) use the same base register. Try to
1634 // merge the ldr's so far, including this one. But don't try to
1635 // combine the following ldr(s).
1636 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1639 // r4 := ldr [r0, #8]
1640 // r4 := ldr [r0, #4]
1642 // The optimization may reorder the second ldr in front of the first
1643 // ldr, which violates write after write(WAW) dependence. The same as
1644 // str. Try to merge inst(s) already in MemOps.
1645 bool Overlap = false;
1646 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1647 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1653 if (CurrBase == 0 && !Clobber) {
1654 // Start of a new chain.
1659 CurrPredReg = PredReg;
1660 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1663 } else if (!Overlap) {
1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1670 // No need to match PredReg.
1671 // Continue adding to the queue.
1672 if (Offset > MemOps.back().Offset) {
1673 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1678 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1680 if (Offset < I->Offset) {
1681 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1686 } else if (Offset == I->Offset) {
1687 // Collision! This can't be merged!
1696 if (MBBI->isDebugValue()) {
1699 // Reach the end of the block, try merging the memory instructions.
1701 } else if (Advance) {
1705 // Reach the end of the block, try merging the memory instructions.
1712 if (NumMemOps > 1) {
1713 // Try to find a free register to use as a new base in case it's needed.
1714 // First advance to the instruction just before the start of the chain.
1715 AdvanceRS(MBB, MemOps);
1717 // Find a scratch register.
1719 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1721 // Process the load / store instructions.
1722 RS->forward(std::prev(MBBI));
1726 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1727 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1729 // Try folding preceding/trailing base inc/dec into the generated
1731 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1732 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1734 NumMerges += Merges.size();
1736 // Try folding preceding/trailing base inc/dec into those load/store
1737 // that were not merged to form LDM/STM ops.
1738 for (unsigned i = 0; i != NumMemOps; ++i)
1739 if (!MemOps[i].Merged)
1740 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1743 // RS may be pointing to an instruction that's deleted.
1744 RS->skipTo(std::prev(MBBI));
1745 } else if (NumMemOps == 1) {
1746 // Try folding preceding/trailing base inc/dec into the single
1748 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1750 RS->forward(std::prev(MBBI));
1757 CurrPred = ARMCC::AL;
1764 // If iterator hasn't been advanced and this is not a memory op, skip it.
1765 // It can't start a new chain anyway.
1766 if (!Advance && !isMemOp && MBBI != E) {
1772 return NumMerges > 0;
1775 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1776 /// into the preceding stack restore so it directly restore the value of LR
1778 /// ldmfd sp!, {..., lr}
1781 /// ldmfd sp!, {..., lr}
1784 /// ldmfd sp!, {..., pc}
1785 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1786 // Thumb1 LDM doesn't allow high registers.
1787 if (isThumb1) return false;
1788 if (MBB.empty()) return false;
1790 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1791 if (MBBI != MBB.begin() &&
1792 (MBBI->getOpcode() == ARM::BX_RET ||
1793 MBBI->getOpcode() == ARM::tBX_RET ||
1794 MBBI->getOpcode() == ARM::MOVPCLR)) {
1795 MachineInstr *PrevMI = std::prev(MBBI);
1796 unsigned Opcode = PrevMI->getOpcode();
1797 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1798 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1799 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1800 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1801 if (MO.getReg() != ARM::LR)
1803 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1804 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1805 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1806 PrevMI->setDesc(TII->get(NewOpc));
1808 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1816 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1817 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1818 TL = STI->getTargetLowering();
1819 AFI = Fn.getInfo<ARMFunctionInfo>();
1820 TII = STI->getInstrInfo();
1821 TRI = STI->getRegisterInfo();
1822 RS = new RegScavenger();
1823 isThumb2 = AFI->isThumb2Function();
1824 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1826 bool Modified = false;
1827 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1829 MachineBasicBlock &MBB = *MFI;
1830 Modified |= LoadStoreMultipleOpti(MBB);
1831 if (STI->hasV5TOps())
1832 Modified |= MergeReturnIntoLDM(MBB);
1840 /// Pre- register allocation pass that move load / stores from consecutive
1841 /// locations close to make it more likely they will be combined later.
1842 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1844 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1846 const DataLayout *TD;
1847 const TargetInstrInfo *TII;
1848 const TargetRegisterInfo *TRI;
1849 const ARMSubtarget *STI;
1850 MachineRegisterInfo *MRI;
1851 MachineFunction *MF;
1853 bool runOnMachineFunction(MachineFunction &Fn) override;
1855 const char *getPassName() const override {
1856 return "ARM pre- register allocation load / store optimization pass";
1860 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1861 unsigned &NewOpc, unsigned &EvenReg,
1862 unsigned &OddReg, unsigned &BaseReg,
1864 unsigned &PredReg, ARMCC::CondCodes &Pred,
1866 bool RescheduleOps(MachineBasicBlock *MBB,
1867 SmallVectorImpl<MachineInstr *> &Ops,
1868 unsigned Base, bool isLd,
1869 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1870 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1872 char ARMPreAllocLoadStoreOpt::ID = 0;
1875 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1876 TD = Fn.getTarget().getDataLayout();
1877 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1878 TII = STI->getInstrInfo();
1879 TRI = STI->getRegisterInfo();
1880 MRI = &Fn.getRegInfo();
1883 bool Modified = false;
1884 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1886 Modified |= RescheduleLoadStoreInstrs(MFI);
1891 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1892 MachineBasicBlock::iterator I,
1893 MachineBasicBlock::iterator E,
1894 SmallPtrSetImpl<MachineInstr*> &MemOps,
1895 SmallSet<unsigned, 4> &MemRegs,
1896 const TargetRegisterInfo *TRI) {
1897 // Are there stores / loads / calls between them?
1898 // FIXME: This is overly conservative. We should make use of alias information
1900 SmallSet<unsigned, 4> AddedRegPressure;
1902 if (I->isDebugValue() || MemOps.count(&*I))
1904 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1906 if (isLd && I->mayStore())
1911 // It's not safe to move the first 'str' down.
1914 // str r4, [r0, #+4]
1918 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1919 MachineOperand &MO = I->getOperand(j);
1922 unsigned Reg = MO.getReg();
1923 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1925 if (Reg != Base && !MemRegs.count(Reg))
1926 AddedRegPressure.insert(Reg);
1930 // Estimate register pressure increase due to the transformation.
1931 if (MemRegs.size() <= 4)
1932 // Ok if we are moving small number of instructions.
1934 return AddedRegPressure.size() <= MemRegs.size() * 2;
1938 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1939 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1940 MachineInstr *Op1) {
1941 assert(MI->memoperands_empty() && "expected a new machineinstr");
1942 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1943 + (Op1->memoperands_end() - Op1->memoperands_begin());
1945 MachineFunction *MF = MI->getParent()->getParent();
1946 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1947 MachineSDNode::mmo_iterator MemEnd =
1948 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1950 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1951 MI->setMemRefs(MemBegin, MemEnd);
1955 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1956 DebugLoc &dl, unsigned &NewOpc,
1958 unsigned &SecondReg,
1959 unsigned &BaseReg, int &Offset,
1961 ARMCC::CondCodes &Pred,
1963 // Make sure we're allowed to generate LDRD/STRD.
1964 if (!STI->hasV5TEOps())
1967 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1969 unsigned Opcode = Op0->getOpcode();
1970 if (Opcode == ARM::LDRi12) {
1972 } else if (Opcode == ARM::STRi12) {
1974 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1975 NewOpc = ARM::t2LDRDi8;
1978 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1979 NewOpc = ARM::t2STRDi8;
1986 // Make sure the base address satisfies i64 ld / st alignment requirement.
1987 // At the moment, we ignore the memoryoperand's value.
1988 // If we want to use AliasAnalysis, we should check it accordingly.
1989 if (!Op0->hasOneMemOperand() ||
1990 (*Op0->memoperands_begin())->isVolatile())
1993 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1994 const Function *Func = MF->getFunction();
1995 unsigned ReqAlign = STI->hasV6Ops()
1996 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1997 : 8; // Pre-v6 need 8-byte align
1998 if (Align < ReqAlign)
2001 // Then make sure the immediate offset fits.
2002 int OffImm = getMemoryOpOffset(Op0);
2004 int Limit = (1 << 8) * Scale;
2005 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2009 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2011 AddSub = ARM_AM::sub;
2014 int Limit = (1 << 8) * Scale;
2015 if (OffImm >= Limit || (OffImm & (Scale-1)))
2017 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2019 FirstReg = Op0->getOperand(0).getReg();
2020 SecondReg = Op1->getOperand(0).getReg();
2021 if (FirstReg == SecondReg)
2023 BaseReg = Op0->getOperand(1).getReg();
2024 Pred = getInstrPredicate(Op0, PredReg);
2025 dl = Op0->getDebugLoc();
2029 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2030 SmallVectorImpl<MachineInstr *> &Ops,
2031 unsigned Base, bool isLd,
2032 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2033 bool RetVal = false;
2035 // Sort by offset (in reverse order).
2036 std::sort(Ops.begin(), Ops.end(),
2037 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2038 int LOffset = getMemoryOpOffset(LHS);
2039 int ROffset = getMemoryOpOffset(RHS);
2040 assert(LHS == RHS || LOffset != ROffset);
2041 return LOffset > ROffset;
2044 // The loads / stores of the same base are in order. Scan them from first to
2045 // last and check for the following:
2046 // 1. Any def of base.
2048 while (Ops.size() > 1) {
2049 unsigned FirstLoc = ~0U;
2050 unsigned LastLoc = 0;
2051 MachineInstr *FirstOp = nullptr;
2052 MachineInstr *LastOp = nullptr;
2054 unsigned LastOpcode = 0;
2055 unsigned LastBytes = 0;
2056 unsigned NumMove = 0;
2057 for (int i = Ops.size() - 1; i >= 0; --i) {
2058 MachineInstr *Op = Ops[i];
2059 unsigned Loc = MI2LocMap[Op];
2060 if (Loc <= FirstLoc) {
2064 if (Loc >= LastLoc) {
2070 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2071 if (LastOpcode && LSMOpcode != LastOpcode)
2074 int Offset = getMemoryOpOffset(Op);
2075 unsigned Bytes = getLSMultipleTransferSize(Op);
2077 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2080 LastOffset = Offset;
2082 LastOpcode = LSMOpcode;
2083 if (++NumMove == 8) // FIXME: Tune this limit.
2090 SmallPtrSet<MachineInstr*, 4> MemOps;
2091 SmallSet<unsigned, 4> MemRegs;
2092 for (int i = NumMove-1; i >= 0; --i) {
2093 MemOps.insert(Ops[i]);
2094 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2097 // Be conservative, if the instructions are too far apart, don't
2098 // move them. We want to limit the increase of register pressure.
2099 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2101 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2102 MemOps, MemRegs, TRI);
2104 for (unsigned i = 0; i != NumMove; ++i)
2107 // This is the new location for the loads / stores.
2108 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2109 while (InsertPos != MBB->end()
2110 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2113 // If we are moving a pair of loads / stores, see if it makes sense
2114 // to try to allocate a pair of registers that can form register pairs.
2115 MachineInstr *Op0 = Ops.back();
2116 MachineInstr *Op1 = Ops[Ops.size()-2];
2117 unsigned FirstReg = 0, SecondReg = 0;
2118 unsigned BaseReg = 0, PredReg = 0;
2119 ARMCC::CondCodes Pred = ARMCC::AL;
2121 unsigned NewOpc = 0;
2124 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2125 FirstReg, SecondReg, BaseReg,
2126 Offset, PredReg, Pred, isT2)) {
2130 const MCInstrDesc &MCID = TII->get(NewOpc);
2131 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2132 MRI->constrainRegClass(FirstReg, TRC);
2133 MRI->constrainRegClass(SecondReg, TRC);
2135 // Form the pair instruction.
2137 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2138 .addReg(FirstReg, RegState::Define)
2139 .addReg(SecondReg, RegState::Define)
2141 // FIXME: We're converting from LDRi12 to an insn that still
2142 // uses addrmode2, so we need an explicit offset reg. It should
2143 // always by reg0 since we're transforming LDRi12s.
2146 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2147 concatenateMemOperands(MIB, Op0, Op1);
2148 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2151 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2155 // FIXME: We're converting from LDRi12 to an insn that still
2156 // uses addrmode2, so we need an explicit offset reg. It should
2157 // always by reg0 since we're transforming STRi12s.
2160 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2161 concatenateMemOperands(MIB, Op0, Op1);
2162 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2169 // Add register allocation hints to form register pairs.
2170 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2171 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2174 for (unsigned i = 0; i != NumMove; ++i) {
2175 MachineInstr *Op = Ops.back();
2177 MBB->splice(InsertPos, MBB, Op);
2181 NumLdStMoved += NumMove;
2191 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2192 bool RetVal = false;
2194 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2195 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2197 SmallVector<unsigned, 4> LdBases;
2198 SmallVector<unsigned, 4> StBases;
2201 MachineBasicBlock::iterator MBBI = MBB->begin();
2202 MachineBasicBlock::iterator E = MBB->end();
2204 for (; MBBI != E; ++MBBI) {
2205 MachineInstr *MI = MBBI;
2206 if (MI->isCall() || MI->isTerminator()) {
2207 // Stop at barriers.
2212 if (!MI->isDebugValue())
2213 MI2LocMap[MI] = ++Loc;
2215 if (!isMemoryOp(MI))
2217 unsigned PredReg = 0;
2218 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2221 int Opc = MI->getOpcode();
2222 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2223 unsigned Base = MI->getOperand(1).getReg();
2224 int Offset = getMemoryOpOffset(MI);
2226 bool StopHere = false;
2228 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2229 Base2LdsMap.find(Base);
2230 if (BI != Base2LdsMap.end()) {
2231 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2232 if (Offset == getMemoryOpOffset(BI->second[i])) {
2238 BI->second.push_back(MI);
2240 Base2LdsMap[Base].push_back(MI);
2241 LdBases.push_back(Base);
2244 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2245 Base2StsMap.find(Base);
2246 if (BI != Base2StsMap.end()) {
2247 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2248 if (Offset == getMemoryOpOffset(BI->second[i])) {
2254 BI->second.push_back(MI);
2256 Base2StsMap[Base].push_back(MI);
2257 StBases.push_back(Base);
2262 // Found a duplicate (a base+offset combination that's seen earlier).
2269 // Re-schedule loads.
2270 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2271 unsigned Base = LdBases[i];
2272 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2274 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2277 // Re-schedule stores.
2278 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2279 unsigned Base = StBases[i];
2280 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2282 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2286 Base2LdsMap.clear();
2287 Base2StsMap.clear();
2297 /// Returns an instance of the load / store optimization pass.
2298 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2300 return new ARMPreAllocLoadStoreOpt();
2301 return new ARMLoadStoreOpt();