1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
46 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
61 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
69 virtual bool runOnMachineFunction(MachineFunction &Fn);
71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
76 struct MemOpQueueEntry {
79 MachineBasicBlock::iterator MBBI;
81 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
82 : Offset(o), Position(p), MBBI(i), Merged(false) {};
84 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
85 typedef MemOpQueue::iterator MemOpQueueIter;
87 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
88 int Offset, unsigned Base, bool BaseKill, int Opcode,
89 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
90 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
91 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
92 int Opcode, unsigned Size,
93 ARMCC::CondCodes Pred, unsigned PredReg,
94 unsigned Scratch, MemOpQueue &MemOps,
95 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
97 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
98 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
99 MachineBasicBlock::iterator &MBBI);
100 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 const TargetInstrInfo *TII,
104 MachineBasicBlock::iterator &I);
105 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI,
108 MachineBasicBlock::iterator &I);
109 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
110 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
112 char ARMLoadStoreOpt::ID = 0;
115 static int getLoadStoreMultipleOpcode(int Opcode) {
143 default: llvm_unreachable("Unhandled opcode!");
148 static bool isT2i32Load(unsigned Opc) {
149 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
152 static bool isi32Load(unsigned Opc) {
153 return Opc == ARM::LDR || isT2i32Load(Opc);
156 static bool isT2i32Store(unsigned Opc) {
157 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
160 static bool isi32Store(unsigned Opc) {
161 return Opc == ARM::STR || isT2i32Store(Opc);
164 /// MergeOps - Create and insert a LDM or STM with Base as base register and
165 /// registers in Regs as the register operands that would be loaded / stored.
166 /// It returns true if the transformation is done.
168 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
169 MachineBasicBlock::iterator MBBI,
170 int Offset, unsigned Base, bool BaseKill,
171 int Opcode, ARMCC::CondCodes Pred,
172 unsigned PredReg, unsigned Scratch, DebugLoc dl,
173 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
174 // Only a single register to load / store. Don't bother.
175 unsigned NumRegs = Regs.size();
179 ARM_AM::AMSubMode Mode = ARM_AM::ia;
180 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
181 if (isAM4 && Offset == 4) {
183 // Thumb2 does not support ldmib / stmib.
186 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
188 // Thumb2 does not support ldmda / stmda.
191 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
193 } else if (Offset != 0) {
194 // If starting offset isn't zero, insert a MI to materialize a new base.
195 // But only do so if it is cost effective, i.e. merging more than two
201 if (isi32Load(Opcode))
202 // If it is a load, then just use one of the destination register to
203 // use as the new base.
204 NewBase = Regs[NumRegs-1].first;
206 // Use the scratch register to use as a new base.
211 int BaseOpc = !isThumb2
213 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
217 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
220 int ImmedOffset = isThumb2
221 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
222 if (ImmedOffset == -1)
223 // FIXME: Try t2ADDri12 or t2SUBri12?
224 return false; // Probably not worth it then.
226 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
227 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
228 .addImm(Pred).addReg(PredReg).addReg(0);
230 BaseKill = true; // New base is always killed right its use.
233 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
234 bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
235 Opcode = getLoadStoreMultipleOpcode(Opcode);
236 MachineInstrBuilder MIB = (isAM4)
237 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
238 .addReg(Base, getKillRegState(BaseKill))
239 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
240 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
241 .addReg(Base, getKillRegState(BaseKill))
242 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
243 .addImm(Pred).addReg(PredReg);
244 MIB.addReg(0); // Add optional writeback (0 for now).
245 for (unsigned i = 0; i != NumRegs; ++i)
246 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
247 | getKillRegState(Regs[i].second));
252 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
253 /// load / store multiple instructions.
255 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
256 unsigned Base, int Opcode, unsigned Size,
257 ARMCC::CondCodes Pred, unsigned PredReg,
258 unsigned Scratch, MemOpQueue &MemOps,
259 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
260 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
261 int Offset = MemOps[SIndex].Offset;
262 int SOffset = Offset;
263 unsigned Pos = MemOps[SIndex].Position;
264 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
265 DebugLoc dl = Loc->getDebugLoc();
266 unsigned PReg = Loc->getOperand(0).getReg();
267 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
268 bool isKill = Loc->getOperand(0).isKill();
270 SmallVector<std::pair<unsigned,bool>, 8> Regs;
271 Regs.push_back(std::make_pair(PReg, isKill));
272 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
273 int NewOffset = MemOps[i].Offset;
274 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
275 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
276 isKill = MemOps[i].MBBI->getOperand(0).isKill();
277 // AM4 - register numbers in ascending order.
278 // AM5 - consecutive register numbers in ascending order.
279 if (NewOffset == Offset + (int)Size &&
280 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
282 Regs.push_back(std::make_pair(Reg, isKill));
285 // Can't merge this in. Try merge the earlier ones first.
286 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
287 Scratch, dl, Regs)) {
288 Merges.push_back(prior(Loc));
289 for (unsigned j = SIndex; j < i; ++j) {
290 MBB.erase(MemOps[j].MBBI);
291 MemOps[j].Merged = true;
294 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
299 if (MemOps[i].Position > Pos) {
300 Pos = MemOps[i].Position;
301 Loc = MemOps[i].MBBI;
305 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
306 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
307 Scratch, dl, Regs)) {
308 Merges.push_back(prior(Loc));
309 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
310 MBB.erase(MemOps[i].MBBI);
311 MemOps[i].Merged = true;
318 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
319 unsigned Bytes, unsigned Limit,
320 ARMCC::CondCodes Pred, unsigned PredReg){
321 unsigned MyPredReg = 0;
324 if (MI->getOpcode() != ARM::t2SUBri &&
325 MI->getOpcode() != ARM::t2SUBrSPi &&
326 MI->getOpcode() != ARM::t2SUBrSPi12 &&
327 MI->getOpcode() != ARM::tSUBspi &&
328 MI->getOpcode() != ARM::SUBri)
331 // Make sure the offset fits in 8 bits.
332 if (Bytes <= 0 || (Limit && Bytes >= Limit))
335 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
336 return (MI->getOperand(0).getReg() == Base &&
337 MI->getOperand(1).getReg() == Base &&
338 (MI->getOperand(2).getImm()*Scale) == Bytes &&
339 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
340 MyPredReg == PredReg);
343 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
344 unsigned Bytes, unsigned Limit,
345 ARMCC::CondCodes Pred, unsigned PredReg){
346 unsigned MyPredReg = 0;
349 if (MI->getOpcode() != ARM::t2ADDri &&
350 MI->getOpcode() != ARM::t2ADDrSPi &&
351 MI->getOpcode() != ARM::t2ADDrSPi12 &&
352 MI->getOpcode() != ARM::tADDspi &&
353 MI->getOpcode() != ARM::ADDri)
356 if (Bytes <= 0 || (Limit && Bytes >= Limit))
357 // Make sure the offset fits in 8 bits.
360 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
361 return (MI->getOperand(0).getReg() == Base &&
362 MI->getOperand(1).getReg() == Base &&
363 (MI->getOperand(2).getImm()*Scale) == Bytes &&
364 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
365 MyPredReg == PredReg);
368 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
369 switch (MI->getOpcode()) {
387 return (MI->getNumOperands() - 5) * 4;
392 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
396 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
397 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
399 /// stmia rn, <ra, rb, rc>
400 /// rn := rn + 4 * 3;
402 /// stmia rn!, <ra, rb, rc>
404 /// rn := rn - 4 * 3;
405 /// ldmia rn, <ra, rb, rc>
407 /// ldmdb rn!, <ra, rb, rc>
408 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
409 MachineBasicBlock::iterator MBBI,
411 MachineBasicBlock::iterator &I) {
412 MachineInstr *MI = MBBI;
413 unsigned Base = MI->getOperand(0).getReg();
414 unsigned Bytes = getLSMultipleTransferSize(MI);
415 unsigned PredReg = 0;
416 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
417 int Opcode = MI->getOpcode();
418 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
419 Opcode == ARM::STM || Opcode == ARM::t2STM;
422 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
425 // Can't use the updating AM4 sub-mode if the base register is also a dest
426 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
427 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
428 if (MI->getOperand(i).getReg() == Base)
432 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
433 if (MBBI != MBB.begin()) {
434 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
435 if (Mode == ARM_AM::ia &&
436 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
437 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
438 MI->getOperand(4).setReg(Base);
439 MI->getOperand(4).setIsDef();
442 } else if (Mode == ARM_AM::ib &&
443 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
444 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
445 MI->getOperand(4).setReg(Base); // WB to base
446 MI->getOperand(4).setIsDef();
452 if (MBBI != MBB.end()) {
453 MachineBasicBlock::iterator NextMBBI = next(MBBI);
454 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
455 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
456 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
457 MI->getOperand(4).setReg(Base); // WB to base
458 MI->getOperand(4).setIsDef();
465 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
466 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
467 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
468 MI->getOperand(4).setReg(Base); // WB to base
469 MI->getOperand(4).setIsDef();
479 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
480 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
483 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
484 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
485 if (MBBI != MBB.begin()) {
486 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
487 if (Mode == ARM_AM::ia &&
488 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
489 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
490 MI->getOperand(4).setReg(Base); // WB to base
491 MI->getOperand(4).setIsDef();
497 if (MBBI != MBB.end()) {
498 MachineBasicBlock::iterator NextMBBI = next(MBBI);
499 if (Mode == ARM_AM::ia &&
500 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
501 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
502 MI->getOperand(4).setReg(Base); // WB to base
503 MI->getOperand(4).setIsDef();
517 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
519 case ARM::LDR: return ARM::LDR_PRE;
520 case ARM::STR: return ARM::STR_PRE;
521 case ARM::FLDS: return ARM::FLDMS;
522 case ARM::FLDD: return ARM::FLDMD;
523 case ARM::FSTS: return ARM::FSTMS;
524 case ARM::FSTD: return ARM::FSTMD;
527 return ARM::t2LDR_PRE;
530 return ARM::t2STR_PRE;
531 default: llvm_unreachable("Unhandled opcode!");
536 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
538 case ARM::LDR: return ARM::LDR_POST;
539 case ARM::STR: return ARM::STR_POST;
540 case ARM::FLDS: return ARM::FLDMS;
541 case ARM::FLDD: return ARM::FLDMD;
542 case ARM::FSTS: return ARM::FSTMS;
543 case ARM::FSTD: return ARM::FSTMD;
546 return ARM::t2LDR_POST;
549 return ARM::t2STR_POST;
550 default: llvm_unreachable("Unhandled opcode!");
555 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
556 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
557 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
558 MachineBasicBlock::iterator MBBI,
559 const TargetInstrInfo *TII,
561 MachineBasicBlock::iterator &I) {
562 MachineInstr *MI = MBBI;
563 unsigned Base = MI->getOperand(1).getReg();
564 bool BaseKill = MI->getOperand(1).isKill();
565 unsigned Bytes = getLSMultipleTransferSize(MI);
566 int Opcode = MI->getOpcode();
567 DebugLoc dl = MI->getDebugLoc();
568 bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
569 Opcode == ARM::FSTD || Opcode == ARM::FSTS;
570 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
571 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
573 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
575 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
576 if (MI->getOperand(2).getImm() != 0)
579 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
580 // Can't do the merge if the destination register is the same as the would-be
581 // writeback register.
582 if (isLd && MI->getOperand(0).getReg() == Base)
585 unsigned PredReg = 0;
586 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
587 bool DoMerge = false;
588 ARM_AM::AddrOpc AddSub = ARM_AM::add;
590 // AM2 - 12 bits, thumb2 - 8 bits.
591 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
592 if (MBBI != MBB.begin()) {
593 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
594 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
596 AddSub = ARM_AM::sub;
597 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
599 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
601 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
607 if (!DoMerge && MBBI != MBB.end()) {
608 MachineBasicBlock::iterator NextMBBI = next(MBBI);
610 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
612 AddSub = ARM_AM::sub;
613 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
614 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
616 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
630 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
633 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
635 : ARM_AM::ia, true, (isDPR ? 2 : 1));
637 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
639 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
643 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
644 .addReg(Base, getKillRegState(BaseKill))
645 .addImm(Offset).addImm(Pred).addReg(PredReg)
646 .addReg(Base, getDefRegState(true)) // WB base register
647 .addReg(MI->getOperand(0).getReg(), RegState::Define);
649 // LDR_PRE, LDR_POST,
650 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
651 .addReg(Base, RegState::Define)
652 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
654 // t2LDR_PRE, t2LDR_POST
655 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
656 .addReg(Base, RegState::Define)
657 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
659 MachineOperand &MO = MI->getOperand(0);
662 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
663 .addImm(Pred).addReg(PredReg)
664 .addReg(Base, getDefRegState(true)) // WB base register
665 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
668 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
669 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
670 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
672 // t2STR_PRE, t2STR_POST
673 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
674 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
675 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
682 /// isMemoryOp - Returns true if instruction is a memory operations (that this
683 /// pass is capable of operating on).
684 static bool isMemoryOp(const MachineInstr *MI) {
685 int Opcode = MI->getOpcode();
690 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
693 return MI->getOperand(1).isReg();
696 return MI->getOperand(1).isReg();
701 return MI->getOperand(1).isReg();
706 /// AdvanceRS - Advance register scavenger to just before the earliest memory
707 /// op that is being merged.
708 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
709 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
710 unsigned Position = MemOps[0].Position;
711 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
712 if (MemOps[i].Position < Position) {
713 Position = MemOps[i].Position;
714 Loc = MemOps[i].MBBI;
718 if (Loc != MBB.begin())
719 RS->forward(prior(Loc));
722 static int getMemoryOpOffset(const MachineInstr *MI) {
723 int Opcode = MI->getOpcode();
724 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
725 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
726 unsigned NumOperands = MI->getDesc().getNumOperands();
727 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
729 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
730 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
731 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
735 ? ARM_AM::getAM2Offset(OffField)
736 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
737 : ARM_AM::getAM5Offset(OffField) * 4);
739 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
742 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
745 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
751 static void InsertLDR_STR(MachineBasicBlock &MBB,
752 MachineBasicBlock::iterator &MBBI,
753 int OffImm, bool isDef,
754 DebugLoc dl, unsigned NewOpc,
755 unsigned Reg, bool RegDeadKill, bool RegUndef,
756 unsigned BaseReg, bool BaseKill, bool BaseUndef,
757 unsigned OffReg, bool OffKill, bool OffUndef,
758 ARMCC::CondCodes Pred, unsigned PredReg,
759 const TargetInstrInfo *TII, bool isT2) {
763 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
765 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
768 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
770 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
771 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
773 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
774 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
776 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
778 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
779 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
781 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
782 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
786 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
787 MachineBasicBlock::iterator &MBBI) {
788 MachineInstr *MI = &*MBBI;
789 unsigned Opcode = MI->getOpcode();
790 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
791 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
792 unsigned EvenReg = MI->getOperand(0).getReg();
793 unsigned OddReg = MI->getOperand(1).getReg();
794 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
795 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
796 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
799 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
800 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
801 bool EvenDeadKill = isLd ?
802 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
803 bool EvenUndef = MI->getOperand(0).isUndef();
804 bool OddDeadKill = isLd ?
805 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
806 bool OddUndef = MI->getOperand(1).isUndef();
807 const MachineOperand &BaseOp = MI->getOperand(2);
808 unsigned BaseReg = BaseOp.getReg();
809 bool BaseKill = BaseOp.isKill();
810 bool BaseUndef = BaseOp.isUndef();
811 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
812 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
813 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
814 int OffImm = getMemoryOpOffset(MI);
815 unsigned PredReg = 0;
816 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
818 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
819 // Ascending register numbers and no offset. It's safe to change it to a
821 unsigned NewOpc = (isLd)
822 ? (isT2 ? ARM::t2LDM : ARM::LDM)
823 : (isT2 ? ARM::t2STM : ARM::STM);
825 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
826 .addReg(BaseReg, getKillRegState(BaseKill))
827 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
828 .addImm(Pred).addReg(PredReg)
830 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
831 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
834 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
835 .addReg(BaseReg, getKillRegState(BaseKill))
836 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
837 .addImm(Pred).addReg(PredReg)
840 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
842 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
846 // Split into two instructions.
847 assert((!isT2 || !OffReg) &&
848 "Thumb2 ldrd / strd does not encode offset register!");
849 unsigned NewOpc = (isLd)
850 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
851 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
852 DebugLoc dl = MBBI->getDebugLoc();
853 // If this is a load and base register is killed, it may have been
854 // re-defed by the load, make sure the first load does not clobber it.
856 (BaseKill || OffKill) &&
857 (TRI->regsOverlap(EvenReg, BaseReg) ||
858 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
859 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
860 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
861 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
862 OddReg, OddDeadKill, false,
863 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
864 Pred, PredReg, TII, isT2);
865 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
866 EvenReg, EvenDeadKill, false,
867 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
868 Pred, PredReg, TII, isT2);
870 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
871 EvenReg, EvenDeadKill, EvenUndef,
872 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
873 Pred, PredReg, TII, isT2);
874 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
875 OddReg, OddDeadKill, OddUndef,
876 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
877 Pred, PredReg, TII, isT2);
891 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
892 /// ops of the same base and incrementing offset into LDM / STM ops.
893 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
894 unsigned NumMerges = 0;
895 unsigned NumMemOps = 0;
897 unsigned CurrBase = 0;
899 unsigned CurrSize = 0;
900 ARMCC::CondCodes CurrPred = ARMCC::AL;
901 unsigned CurrPredReg = 0;
902 unsigned Position = 0;
903 SmallVector<MachineBasicBlock::iterator,4> Merges;
905 RS->enterBasicBlock(&MBB);
906 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
908 if (FixInvalidRegPairOp(MBB, MBBI))
911 bool Advance = false;
912 bool TryMerge = false;
913 bool Clobber = false;
915 bool isMemOp = isMemoryOp(MBBI);
917 int Opcode = MBBI->getOpcode();
918 unsigned Size = getLSMultipleTransferSize(MBBI);
919 unsigned Base = MBBI->getOperand(1).getReg();
920 unsigned PredReg = 0;
921 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
922 int Offset = getMemoryOpOffset(MBBI);
925 // r5 := ldr [r5, #4]
926 // r6 := ldr [r5, #8]
928 // The second ldr has effectively broken the chain even though it
929 // looks like the later ldr(s) use the same base register. Try to
930 // merge the ldr's so far, including this one. But don't try to
931 // combine the following ldr(s).
932 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
933 if (CurrBase == 0 && !Clobber) {
934 // Start of a new chain.
939 CurrPredReg = PredReg;
940 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
949 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
950 // No need to match PredReg.
951 // Continue adding to the queue.
952 if (Offset > MemOps.back().Offset) {
953 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
957 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
959 if (Offset < I->Offset) {
960 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
964 } else if (Offset == I->Offset) {
965 // Collision! This can't be merged!
978 // Reach the end of the block, try merging the memory instructions.
985 // Try to find a free register to use as a new base in case it's needed.
986 // First advance to the instruction just before the start of the chain.
987 AdvanceRS(MBB, MemOps);
988 // Find a scratch register.
989 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
990 // Process the load / store instructions.
991 RS->forward(prior(MBBI));
995 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
996 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
998 // Try folding preceeding/trailing base inc/dec into the generated
1000 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1001 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1003 NumMerges += Merges.size();
1005 // Try folding preceeding/trailing base inc/dec into those load/store
1006 // that were not merged to form LDM/STM ops.
1007 for (unsigned i = 0; i != NumMemOps; ++i)
1008 if (!MemOps[i].Merged)
1009 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1012 // RS may be pointing to an instruction that's deleted.
1013 RS->skipTo(prior(MBBI));
1014 } else if (NumMemOps == 1) {
1015 // Try folding preceeding/trailing base inc/dec into the single
1017 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1019 RS->forward(prior(MBBI));
1026 CurrPred = ARMCC::AL;
1033 // If iterator hasn't been advanced and this is not a memory op, skip it.
1034 // It can't start a new chain anyway.
1035 if (!Advance && !isMemOp && MBBI != E) {
1041 return NumMerges > 0;
1045 struct OffsetCompare {
1046 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1047 int LOffset = getMemoryOpOffset(LHS);
1048 int ROffset = getMemoryOpOffset(RHS);
1049 assert(LHS == RHS || LOffset != ROffset);
1050 return LOffset > ROffset;
1055 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1056 /// (bx lr) into the preceeding stack restore so it directly restore the value
1058 /// ldmfd sp!, {r7, lr}
1061 /// ldmfd sp!, {r7, pc}
1062 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1063 if (MBB.empty()) return false;
1065 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1066 if (MBBI != MBB.begin() &&
1067 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1068 MachineInstr *PrevMI = prior(MBBI);
1069 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1070 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1071 if (MO.getReg() != ARM::LR)
1073 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1074 PrevMI->setDesc(TII->get(NewOpc));
1083 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1084 const TargetMachine &TM = Fn.getTarget();
1085 AFI = Fn.getInfo<ARMFunctionInfo>();
1086 TII = TM.getInstrInfo();
1087 TRI = TM.getRegisterInfo();
1088 RS = new RegScavenger();
1089 isThumb2 = AFI->isThumb2Function();
1091 bool Modified = false;
1092 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1094 MachineBasicBlock &MBB = *MFI;
1095 Modified |= LoadStoreMultipleOpti(MBB);
1096 Modified |= MergeReturnIntoLDM(MBB);
1104 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1105 /// load / stores from consecutive locations close to make it more
1106 /// likely they will be combined later.
1109 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1111 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1113 const TargetData *TD;
1114 const TargetInstrInfo *TII;
1115 const TargetRegisterInfo *TRI;
1116 const ARMSubtarget *STI;
1117 MachineRegisterInfo *MRI;
1118 MachineFunction *MF;
1120 virtual bool runOnMachineFunction(MachineFunction &Fn);
1122 virtual const char *getPassName() const {
1123 return "ARM pre- register allocation load / store optimization pass";
1127 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1128 unsigned &NewOpc, unsigned &EvenReg,
1129 unsigned &OddReg, unsigned &BaseReg,
1130 unsigned &OffReg, int &Offset,
1131 unsigned &PredReg, ARMCC::CondCodes &Pred,
1133 bool RescheduleOps(MachineBasicBlock *MBB,
1134 SmallVector<MachineInstr*, 4> &Ops,
1135 unsigned Base, bool isLd,
1136 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1137 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1139 char ARMPreAllocLoadStoreOpt::ID = 0;
1142 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1143 TD = Fn.getTarget().getTargetData();
1144 TII = Fn.getTarget().getInstrInfo();
1145 TRI = Fn.getTarget().getRegisterInfo();
1146 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1147 MRI = &Fn.getRegInfo();
1150 bool Modified = false;
1151 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1153 Modified |= RescheduleLoadStoreInstrs(MFI);
1158 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1159 MachineBasicBlock::iterator I,
1160 MachineBasicBlock::iterator E,
1161 SmallPtrSet<MachineInstr*, 4> &MemOps,
1162 SmallSet<unsigned, 4> &MemRegs,
1163 const TargetRegisterInfo *TRI) {
1164 // Are there stores / loads / calls between them?
1165 // FIXME: This is overly conservative. We should make use of alias information
1167 SmallSet<unsigned, 4> AddedRegPressure;
1169 if (MemOps.count(&*I))
1171 const TargetInstrDesc &TID = I->getDesc();
1172 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1174 if (isLd && TID.mayStore())
1179 // It's not safe to move the first 'str' down.
1182 // str r4, [r0, #+4]
1186 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1187 MachineOperand &MO = I->getOperand(j);
1190 unsigned Reg = MO.getReg();
1191 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1193 if (Reg != Base && !MemRegs.count(Reg))
1194 AddedRegPressure.insert(Reg);
1198 // Estimate register pressure increase due to the transformation.
1199 if (MemRegs.size() <= 4)
1200 // Ok if we are moving small number of instructions.
1202 return AddedRegPressure.size() <= MemRegs.size() * 2;
1206 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1208 unsigned &NewOpc, unsigned &EvenReg,
1209 unsigned &OddReg, unsigned &BaseReg,
1210 unsigned &OffReg, int &Offset,
1212 ARMCC::CondCodes &Pred,
1214 // Make sure we're allowed to generate LDRD/STRD.
1215 if (!STI->hasV5TEOps())
1218 // FIXME: FLDS / FSTS -> FLDD / FSTD
1220 unsigned Opcode = Op0->getOpcode();
1221 if (Opcode == ARM::LDR)
1223 else if (Opcode == ARM::STR)
1225 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1226 NewOpc = ARM::t2LDRDi8;
1229 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1230 NewOpc = ARM::t2STRDi8;
1236 // Make sure the offset registers match.
1238 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1241 // Must sure the base address satisfies i64 ld / st alignment requirement.
1242 if (!Op0->hasOneMemOperand() ||
1243 !(*Op0->memoperands_begin())->getValue() ||
1244 (*Op0->memoperands_begin())->isVolatile())
1247 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1248 Function *Func = MF->getFunction();
1249 unsigned ReqAlign = STI->hasV6Ops()
1250 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1251 : 8; // Pre-v6 need 8-byte align
1252 if (Align < ReqAlign)
1255 // Then make sure the immediate offset fits.
1256 int OffImm = getMemoryOpOffset(Op0);
1260 // Can't fall back to t2LDRi8 / t2STRi8.
1263 int Limit = (1 << 8) * Scale;
1264 if (OffImm >= Limit || (OffImm & (Scale-1)))
1269 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1271 AddSub = ARM_AM::sub;
1274 int Limit = (1 << 8) * Scale;
1275 if (OffImm >= Limit || (OffImm & (Scale-1)))
1277 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1279 EvenReg = Op0->getOperand(0).getReg();
1280 OddReg = Op1->getOperand(0).getReg();
1281 if (EvenReg == OddReg)
1283 BaseReg = Op0->getOperand(1).getReg();
1285 OffReg = Op0->getOperand(2).getReg();
1286 Pred = llvm::getInstrPredicate(Op0, PredReg);
1287 dl = Op0->getDebugLoc();
1291 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1292 SmallVector<MachineInstr*, 4> &Ops,
1293 unsigned Base, bool isLd,
1294 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1295 bool RetVal = false;
1297 // Sort by offset (in reverse order).
1298 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1300 // The loads / stores of the same base are in order. Scan them from first to
1301 // last and check for the followins:
1302 // 1. Any def of base.
1304 while (Ops.size() > 1) {
1305 unsigned FirstLoc = ~0U;
1306 unsigned LastLoc = 0;
1307 MachineInstr *FirstOp = 0;
1308 MachineInstr *LastOp = 0;
1310 unsigned LastOpcode = 0;
1311 unsigned LastBytes = 0;
1312 unsigned NumMove = 0;
1313 for (int i = Ops.size() - 1; i >= 0; --i) {
1314 MachineInstr *Op = Ops[i];
1315 unsigned Loc = MI2LocMap[Op];
1316 if (Loc <= FirstLoc) {
1320 if (Loc >= LastLoc) {
1325 unsigned Opcode = Op->getOpcode();
1326 if (LastOpcode && Opcode != LastOpcode)
1329 int Offset = getMemoryOpOffset(Op);
1330 unsigned Bytes = getLSMultipleTransferSize(Op);
1332 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1335 LastOffset = Offset;
1337 LastOpcode = Opcode;
1338 if (++NumMove == 8) // FIXME: Tune this limit.
1345 SmallPtrSet<MachineInstr*, 4> MemOps;
1346 SmallSet<unsigned, 4> MemRegs;
1347 for (int i = NumMove-1; i >= 0; --i) {
1348 MemOps.insert(Ops[i]);
1349 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1352 // Be conservative, if the instructions are too far apart, don't
1353 // move them. We want to limit the increase of register pressure.
1354 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1356 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1357 MemOps, MemRegs, TRI);
1359 for (unsigned i = 0; i != NumMove; ++i)
1362 // This is the new location for the loads / stores.
1363 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1364 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1367 // If we are moving a pair of loads / stores, see if it makes sense
1368 // to try to allocate a pair of registers that can form register pairs.
1369 MachineInstr *Op0 = Ops.back();
1370 MachineInstr *Op1 = Ops[Ops.size()-2];
1371 unsigned EvenReg = 0, OddReg = 0;
1372 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1373 ARMCC::CondCodes Pred = ARMCC::AL;
1375 unsigned NewOpc = 0;
1378 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1379 EvenReg, OddReg, BaseReg, OffReg,
1380 Offset, PredReg, Pred, isT2)) {
1384 // Form the pair instruction.
1386 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1387 dl, TII->get(NewOpc))
1388 .addReg(EvenReg, RegState::Define)
1389 .addReg(OddReg, RegState::Define)
1393 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1396 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1397 dl, TII->get(NewOpc))
1403 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1409 // Add register allocation hints to form register pairs.
1410 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1411 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1413 for (unsigned i = 0; i != NumMove; ++i) {
1414 MachineInstr *Op = Ops.back();
1416 MBB->splice(InsertPos, MBB, Op);
1420 NumLdStMoved += NumMove;
1430 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1431 bool RetVal = false;
1433 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1434 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1435 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1436 SmallVector<unsigned, 4> LdBases;
1437 SmallVector<unsigned, 4> StBases;
1440 MachineBasicBlock::iterator MBBI = MBB->begin();
1441 MachineBasicBlock::iterator E = MBB->end();
1443 for (; MBBI != E; ++MBBI) {
1444 MachineInstr *MI = MBBI;
1445 const TargetInstrDesc &TID = MI->getDesc();
1446 if (TID.isCall() || TID.isTerminator()) {
1447 // Stop at barriers.
1452 MI2LocMap[MI] = Loc++;
1453 if (!isMemoryOp(MI))
1455 unsigned PredReg = 0;
1456 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1459 int Opc = MI->getOpcode();
1460 bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
1461 unsigned Base = MI->getOperand(1).getReg();
1462 int Offset = getMemoryOpOffset(MI);
1464 bool StopHere = false;
1466 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1467 Base2LdsMap.find(Base);
1468 if (BI != Base2LdsMap.end()) {
1469 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1470 if (Offset == getMemoryOpOffset(BI->second[i])) {
1476 BI->second.push_back(MI);
1478 SmallVector<MachineInstr*, 4> MIs;
1480 Base2LdsMap[Base] = MIs;
1481 LdBases.push_back(Base);
1484 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1485 Base2StsMap.find(Base);
1486 if (BI != Base2StsMap.end()) {
1487 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1488 if (Offset == getMemoryOpOffset(BI->second[i])) {
1494 BI->second.push_back(MI);
1496 SmallVector<MachineInstr*, 4> MIs;
1498 Base2StsMap[Base] = MIs;
1499 StBases.push_back(Base);
1504 // Found a duplicate (a base+offset combination that's seen earlier).
1511 // Re-schedule loads.
1512 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1513 unsigned Base = LdBases[i];
1514 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1516 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1519 // Re-schedule stores.
1520 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1521 unsigned Base = StBases[i];
1522 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1524 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1528 Base2LdsMap.clear();
1529 Base2StsMap.clear();
1539 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1540 /// optimization pass.
1541 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1543 return new ARMPreAllocLoadStoreOpt();
1544 return new ARMLoadStoreOpt();