1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = (isAM4)
250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251 .addReg(Base, getKillRegState(BaseKill))
252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254 .addReg(Base, getKillRegState(BaseKill))
255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256 .addImm(Pred).addReg(PredReg);
257 MIB.addReg(0); // Add optional writeback (0 for now).
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346 int NewOffset = MemOps[i].Offset;
347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348 unsigned Reg = MO.getReg();
349 unsigned RegNum = MO.isUndef() ? UINT_MAX
350 : ARMRegisterInfo::getRegisterNumbering(Reg);
351 // AM4 - register numbers in ascending order.
352 // AM5 - consecutive register numbers in ascending order.
353 if (Reg != ARM::SP &&
354 NewOffset == Offset + (int)Size &&
355 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
359 // Can't merge this in. Try merge the earlier ones first.
360 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
361 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
362 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
367 if (MemOps[i].Position > MemOps[insertAfter].Position)
371 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
372 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
373 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
377 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
378 unsigned Bytes, unsigned Limit,
379 ARMCC::CondCodes Pred, unsigned PredReg){
380 unsigned MyPredReg = 0;
383 if (MI->getOpcode() != ARM::t2SUBri &&
384 MI->getOpcode() != ARM::t2SUBrSPi &&
385 MI->getOpcode() != ARM::t2SUBrSPi12 &&
386 MI->getOpcode() != ARM::tSUBspi &&
387 MI->getOpcode() != ARM::SUBri)
390 // Make sure the offset fits in 8 bits.
391 if (Bytes <= 0 || (Limit && Bytes >= Limit))
394 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
395 return (MI->getOperand(0).getReg() == Base &&
396 MI->getOperand(1).getReg() == Base &&
397 (MI->getOperand(2).getImm()*Scale) == Bytes &&
398 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
399 MyPredReg == PredReg);
402 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
403 unsigned Bytes, unsigned Limit,
404 ARMCC::CondCodes Pred, unsigned PredReg){
405 unsigned MyPredReg = 0;
408 if (MI->getOpcode() != ARM::t2ADDri &&
409 MI->getOpcode() != ARM::t2ADDrSPi &&
410 MI->getOpcode() != ARM::t2ADDrSPi12 &&
411 MI->getOpcode() != ARM::tADDspi &&
412 MI->getOpcode() != ARM::ADDri)
415 if (Bytes <= 0 || (Limit && Bytes >= Limit))
416 // Make sure the offset fits in 8 bits.
419 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
420 return (MI->getOperand(0).getReg() == Base &&
421 MI->getOperand(1).getReg() == Base &&
422 (MI->getOperand(2).getImm()*Scale) == Bytes &&
423 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
424 MyPredReg == PredReg);
427 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
428 switch (MI->getOpcode()) {
446 return (MI->getNumOperands() - 5) * 4;
451 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
455 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
456 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
458 /// stmia rn, <ra, rb, rc>
459 /// rn := rn + 4 * 3;
461 /// stmia rn!, <ra, rb, rc>
463 /// rn := rn - 4 * 3;
464 /// ldmia rn, <ra, rb, rc>
466 /// ldmdb rn!, <ra, rb, rc>
467 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
468 MachineBasicBlock::iterator MBBI,
470 MachineBasicBlock::iterator &I) {
471 MachineInstr *MI = MBBI;
472 unsigned Base = MI->getOperand(0).getReg();
473 unsigned Bytes = getLSMultipleTransferSize(MI);
474 unsigned PredReg = 0;
475 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
476 int Opcode = MI->getOpcode();
477 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
478 Opcode == ARM::STM || Opcode == ARM::t2STM;
481 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
484 // Can't use the updating AM4 sub-mode if the base register is also a dest
485 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
486 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
487 if (MI->getOperand(i).getReg() == Base)
491 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
492 if (MBBI != MBB.begin()) {
493 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
494 if (Mode == ARM_AM::ia &&
495 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
496 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
497 MI->getOperand(4).setReg(Base);
498 MI->getOperand(4).setIsDef();
501 } else if (Mode == ARM_AM::ib &&
502 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
503 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
504 MI->getOperand(4).setReg(Base); // WB to base
505 MI->getOperand(4).setIsDef();
511 if (MBBI != MBB.end()) {
512 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
513 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
514 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
515 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
516 MI->getOperand(4).setReg(Base); // WB to base
517 MI->getOperand(4).setIsDef();
524 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
525 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
526 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
527 MI->getOperand(4).setReg(Base); // WB to base
528 MI->getOperand(4).setIsDef();
538 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
539 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
542 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
543 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
544 if (MBBI != MBB.begin()) {
545 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
546 if (Mode == ARM_AM::ia &&
547 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
548 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
549 MI->getOperand(4).setReg(Base); // WB to base
550 MI->getOperand(4).setIsDef();
556 if (MBBI != MBB.end()) {
557 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
558 if (Mode == ARM_AM::ia &&
559 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
560 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
561 MI->getOperand(4).setReg(Base); // WB to base
562 MI->getOperand(4).setIsDef();
576 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
578 case ARM::LDR: return ARM::LDR_PRE;
579 case ARM::STR: return ARM::STR_PRE;
580 case ARM::VLDRS: return ARM::VLDMS;
581 case ARM::VLDRD: return ARM::VLDMD;
582 case ARM::VSTRS: return ARM::VSTMS;
583 case ARM::VSTRD: return ARM::VSTMD;
586 return ARM::t2LDR_PRE;
589 return ARM::t2STR_PRE;
590 default: llvm_unreachable("Unhandled opcode!");
595 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
597 case ARM::LDR: return ARM::LDR_POST;
598 case ARM::STR: return ARM::STR_POST;
599 case ARM::VLDRS: return ARM::VLDMS;
600 case ARM::VLDRD: return ARM::VLDMD;
601 case ARM::VSTRS: return ARM::VSTMS;
602 case ARM::VSTRD: return ARM::VSTMD;
605 return ARM::t2LDR_POST;
608 return ARM::t2STR_POST;
609 default: llvm_unreachable("Unhandled opcode!");
614 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
615 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
616 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
617 MachineBasicBlock::iterator MBBI,
618 const TargetInstrInfo *TII,
620 MachineBasicBlock::iterator &I) {
621 MachineInstr *MI = MBBI;
622 unsigned Base = MI->getOperand(1).getReg();
623 bool BaseKill = MI->getOperand(1).isKill();
624 unsigned Bytes = getLSMultipleTransferSize(MI);
625 int Opcode = MI->getOpcode();
626 DebugLoc dl = MI->getDebugLoc();
627 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
628 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
629 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
630 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
632 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
634 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
635 if (MI->getOperand(2).getImm() != 0)
638 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
639 // Can't do the merge if the destination register is the same as the would-be
640 // writeback register.
641 if (isLd && MI->getOperand(0).getReg() == Base)
644 unsigned PredReg = 0;
645 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
646 bool DoMerge = false;
647 ARM_AM::AddrOpc AddSub = ARM_AM::add;
649 // AM2 - 12 bits, thumb2 - 8 bits.
650 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
651 if (MBBI != MBB.begin()) {
652 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
653 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
655 AddSub = ARM_AM::sub;
656 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
658 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
660 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
666 if (!DoMerge && MBBI != MBB.end()) {
667 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
669 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
671 AddSub = ARM_AM::sub;
672 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
673 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
675 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
689 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
692 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
694 : ARM_AM::ia, true, (isDPR ? 2 : 1));
696 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
698 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
702 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
703 .addReg(Base, getKillRegState(BaseKill))
704 .addImm(Offset).addImm(Pred).addReg(PredReg)
705 .addReg(Base, getDefRegState(true)) // WB base register
706 .addReg(MI->getOperand(0).getReg(), RegState::Define);
708 // LDR_PRE, LDR_POST,
709 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
710 .addReg(Base, RegState::Define)
711 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
713 // t2LDR_PRE, t2LDR_POST
714 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
715 .addReg(Base, RegState::Define)
716 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
718 MachineOperand &MO = MI->getOperand(0);
721 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
722 .addImm(Pred).addReg(PredReg)
723 .addReg(Base, getDefRegState(true)) // WB base register
724 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
727 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
728 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
729 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
731 // t2STR_PRE, t2STR_POST
732 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
733 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
734 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
741 /// isMemoryOp - Returns true if instruction is a memory operations (that this
742 /// pass is capable of operating on).
743 static bool isMemoryOp(const MachineInstr *MI) {
744 if (MI->hasOneMemOperand()) {
745 const MachineMemOperand *MMO = *MI->memoperands_begin();
747 // Don't touch volatile memory accesses - we may be changing their order.
748 if (MMO->isVolatile())
751 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
753 if (MMO->getAlignment() < 4)
757 // str <undef> could probably be eliminated entirely, but for now we just want
758 // to avoid making a mess of it.
759 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
760 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
761 MI->getOperand(0).isUndef())
764 int Opcode = MI->getOpcode();
769 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
772 return MI->getOperand(1).isReg();
775 return MI->getOperand(1).isReg();
780 return MI->getOperand(1).isReg();
785 /// AdvanceRS - Advance register scavenger to just before the earliest memory
786 /// op that is being merged.
787 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
788 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
789 unsigned Position = MemOps[0].Position;
790 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
791 if (MemOps[i].Position < Position) {
792 Position = MemOps[i].Position;
793 Loc = MemOps[i].MBBI;
797 if (Loc != MBB.begin())
798 RS->forward(prior(Loc));
801 static int getMemoryOpOffset(const MachineInstr *MI) {
802 int Opcode = MI->getOpcode();
803 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
804 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
805 unsigned NumOperands = MI->getDesc().getNumOperands();
806 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
808 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
809 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
810 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
814 ? ARM_AM::getAM2Offset(OffField)
815 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
816 : ARM_AM::getAM5Offset(OffField) * 4);
818 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
821 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
824 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
830 static void InsertLDR_STR(MachineBasicBlock &MBB,
831 MachineBasicBlock::iterator &MBBI,
832 int OffImm, bool isDef,
833 DebugLoc dl, unsigned NewOpc,
834 unsigned Reg, bool RegDeadKill, bool RegUndef,
835 unsigned BaseReg, bool BaseKill, bool BaseUndef,
836 unsigned OffReg, bool OffKill, bool OffUndef,
837 ARMCC::CondCodes Pred, unsigned PredReg,
838 const TargetInstrInfo *TII, bool isT2) {
842 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
844 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
847 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
849 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
850 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
852 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
853 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
855 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
857 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
858 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
860 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
861 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
865 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
866 MachineBasicBlock::iterator &MBBI) {
867 MachineInstr *MI = &*MBBI;
868 unsigned Opcode = MI->getOpcode();
869 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
870 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
871 unsigned EvenReg = MI->getOperand(0).getReg();
872 unsigned OddReg = MI->getOperand(1).getReg();
873 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
874 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
875 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
878 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
879 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
880 bool EvenDeadKill = isLd ?
881 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
882 bool EvenUndef = MI->getOperand(0).isUndef();
883 bool OddDeadKill = isLd ?
884 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
885 bool OddUndef = MI->getOperand(1).isUndef();
886 const MachineOperand &BaseOp = MI->getOperand(2);
887 unsigned BaseReg = BaseOp.getReg();
888 bool BaseKill = BaseOp.isKill();
889 bool BaseUndef = BaseOp.isUndef();
890 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
891 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
892 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
893 int OffImm = getMemoryOpOffset(MI);
894 unsigned PredReg = 0;
895 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
897 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
898 // Ascending register numbers and no offset. It's safe to change it to a
900 unsigned NewOpc = (isLd)
901 ? (isT2 ? ARM::t2LDM : ARM::LDM)
902 : (isT2 ? ARM::t2STM : ARM::STM);
904 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
905 .addReg(BaseReg, getKillRegState(BaseKill))
906 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
907 .addImm(Pred).addReg(PredReg)
909 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
910 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
913 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
914 .addReg(BaseReg, getKillRegState(BaseKill))
915 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
916 .addImm(Pred).addReg(PredReg)
919 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
921 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
925 // Split into two instructions.
926 assert((!isT2 || !OffReg) &&
927 "Thumb2 ldrd / strd does not encode offset register!");
928 unsigned NewOpc = (isLd)
929 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
930 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
931 DebugLoc dl = MBBI->getDebugLoc();
932 // If this is a load and base register is killed, it may have been
933 // re-defed by the load, make sure the first load does not clobber it.
935 (BaseKill || OffKill) &&
936 (TRI->regsOverlap(EvenReg, BaseReg) ||
937 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
938 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
939 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
940 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
941 OddReg, OddDeadKill, false,
942 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
943 Pred, PredReg, TII, isT2);
944 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
945 EvenReg, EvenDeadKill, false,
946 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
947 Pred, PredReg, TII, isT2);
949 if (OddReg == EvenReg && EvenDeadKill) {
950 // If the two source operands are the same, the kill marker is probably
951 // on the first one. e.g.
952 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
953 EvenDeadKill = false;
956 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
957 EvenReg, EvenDeadKill, EvenUndef,
958 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
959 Pred, PredReg, TII, isT2);
960 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
961 OddReg, OddDeadKill, OddUndef,
962 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
963 Pred, PredReg, TII, isT2);
977 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
978 /// ops of the same base and incrementing offset into LDM / STM ops.
979 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
980 unsigned NumMerges = 0;
981 unsigned NumMemOps = 0;
983 unsigned CurrBase = 0;
985 unsigned CurrSize = 0;
986 ARMCC::CondCodes CurrPred = ARMCC::AL;
987 unsigned CurrPredReg = 0;
988 unsigned Position = 0;
989 SmallVector<MachineBasicBlock::iterator,4> Merges;
991 RS->enterBasicBlock(&MBB);
992 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
994 if (FixInvalidRegPairOp(MBB, MBBI))
997 bool Advance = false;
998 bool TryMerge = false;
999 bool Clobber = false;
1001 bool isMemOp = isMemoryOp(MBBI);
1003 int Opcode = MBBI->getOpcode();
1004 unsigned Size = getLSMultipleTransferSize(MBBI);
1005 unsigned Base = MBBI->getOperand(1).getReg();
1006 unsigned PredReg = 0;
1007 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1008 int Offset = getMemoryOpOffset(MBBI);
1011 // r5 := ldr [r5, #4]
1012 // r6 := ldr [r5, #8]
1014 // The second ldr has effectively broken the chain even though it
1015 // looks like the later ldr(s) use the same base register. Try to
1016 // merge the ldr's so far, including this one. But don't try to
1017 // combine the following ldr(s).
1018 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1019 if (CurrBase == 0 && !Clobber) {
1020 // Start of a new chain.
1025 CurrPredReg = PredReg;
1026 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1035 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1036 // No need to match PredReg.
1037 // Continue adding to the queue.
1038 if (Offset > MemOps.back().Offset) {
1039 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1043 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1045 if (Offset < I->Offset) {
1046 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1050 } else if (Offset == I->Offset) {
1051 // Collision! This can't be merged!
1064 // Reach the end of the block, try merging the memory instructions.
1070 if (NumMemOps > 1) {
1071 // Try to find a free register to use as a new base in case it's needed.
1072 // First advance to the instruction just before the start of the chain.
1073 AdvanceRS(MBB, MemOps);
1074 // Find a scratch register.
1075 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1076 // Process the load / store instructions.
1077 RS->forward(prior(MBBI));
1081 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1082 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1084 // Try folding preceeding/trailing base inc/dec into the generated
1086 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1087 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1089 NumMerges += Merges.size();
1091 // Try folding preceeding/trailing base inc/dec into those load/store
1092 // that were not merged to form LDM/STM ops.
1093 for (unsigned i = 0; i != NumMemOps; ++i)
1094 if (!MemOps[i].Merged)
1095 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1098 // RS may be pointing to an instruction that's deleted.
1099 RS->skipTo(prior(MBBI));
1100 } else if (NumMemOps == 1) {
1101 // Try folding preceeding/trailing base inc/dec into the single
1103 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1105 RS->forward(prior(MBBI));
1112 CurrPred = ARMCC::AL;
1119 // If iterator hasn't been advanced and this is not a memory op, skip it.
1120 // It can't start a new chain anyway.
1121 if (!Advance && !isMemOp && MBBI != E) {
1127 return NumMerges > 0;
1131 struct OffsetCompare {
1132 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1133 int LOffset = getMemoryOpOffset(LHS);
1134 int ROffset = getMemoryOpOffset(RHS);
1135 assert(LHS == RHS || LOffset != ROffset);
1136 return LOffset > ROffset;
1141 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1142 /// (bx lr) into the preceeding stack restore so it directly restore the value
1144 /// ldmfd sp!, {r7, lr}
1147 /// ldmfd sp!, {r7, pc}
1148 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1149 if (MBB.empty()) return false;
1151 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1152 if (MBBI != MBB.begin() &&
1153 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1154 MachineInstr *PrevMI = prior(MBBI);
1155 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1156 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1157 if (MO.getReg() != ARM::LR)
1159 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1160 PrevMI->setDesc(TII->get(NewOpc));
1169 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1170 const TargetMachine &TM = Fn.getTarget();
1171 AFI = Fn.getInfo<ARMFunctionInfo>();
1172 TII = TM.getInstrInfo();
1173 TRI = TM.getRegisterInfo();
1174 RS = new RegScavenger();
1175 isThumb2 = AFI->isThumb2Function();
1177 bool Modified = false;
1178 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1180 MachineBasicBlock &MBB = *MFI;
1181 Modified |= LoadStoreMultipleOpti(MBB);
1182 Modified |= MergeReturnIntoLDM(MBB);
1190 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1191 /// load / stores from consecutive locations close to make it more
1192 /// likely they will be combined later.
1195 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1197 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1199 const TargetData *TD;
1200 const TargetInstrInfo *TII;
1201 const TargetRegisterInfo *TRI;
1202 const ARMSubtarget *STI;
1203 MachineRegisterInfo *MRI;
1204 MachineFunction *MF;
1206 virtual bool runOnMachineFunction(MachineFunction &Fn);
1208 virtual const char *getPassName() const {
1209 return "ARM pre- register allocation load / store optimization pass";
1213 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1214 unsigned &NewOpc, unsigned &EvenReg,
1215 unsigned &OddReg, unsigned &BaseReg,
1216 unsigned &OffReg, int &Offset,
1217 unsigned &PredReg, ARMCC::CondCodes &Pred,
1219 bool RescheduleOps(MachineBasicBlock *MBB,
1220 SmallVector<MachineInstr*, 4> &Ops,
1221 unsigned Base, bool isLd,
1222 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1223 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1225 char ARMPreAllocLoadStoreOpt::ID = 0;
1228 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1229 TD = Fn.getTarget().getTargetData();
1230 TII = Fn.getTarget().getInstrInfo();
1231 TRI = Fn.getTarget().getRegisterInfo();
1232 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1233 MRI = &Fn.getRegInfo();
1236 bool Modified = false;
1237 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1239 Modified |= RescheduleLoadStoreInstrs(MFI);
1244 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1245 MachineBasicBlock::iterator I,
1246 MachineBasicBlock::iterator E,
1247 SmallPtrSet<MachineInstr*, 4> &MemOps,
1248 SmallSet<unsigned, 4> &MemRegs,
1249 const TargetRegisterInfo *TRI) {
1250 // Are there stores / loads / calls between them?
1251 // FIXME: This is overly conservative. We should make use of alias information
1253 SmallSet<unsigned, 4> AddedRegPressure;
1255 if (MemOps.count(&*I))
1257 const TargetInstrDesc &TID = I->getDesc();
1258 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1260 if (isLd && TID.mayStore())
1265 // It's not safe to move the first 'str' down.
1268 // str r4, [r0, #+4]
1272 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1273 MachineOperand &MO = I->getOperand(j);
1276 unsigned Reg = MO.getReg();
1277 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1279 if (Reg != Base && !MemRegs.count(Reg))
1280 AddedRegPressure.insert(Reg);
1284 // Estimate register pressure increase due to the transformation.
1285 if (MemRegs.size() <= 4)
1286 // Ok if we are moving small number of instructions.
1288 return AddedRegPressure.size() <= MemRegs.size() * 2;
1292 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1294 unsigned &NewOpc, unsigned &EvenReg,
1295 unsigned &OddReg, unsigned &BaseReg,
1296 unsigned &OffReg, int &Offset,
1298 ARMCC::CondCodes &Pred,
1300 // Make sure we're allowed to generate LDRD/STRD.
1301 if (!STI->hasV5TEOps())
1304 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1306 unsigned Opcode = Op0->getOpcode();
1307 if (Opcode == ARM::LDR)
1309 else if (Opcode == ARM::STR)
1311 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1312 NewOpc = ARM::t2LDRDi8;
1315 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1316 NewOpc = ARM::t2STRDi8;
1322 // Make sure the offset registers match.
1324 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1327 // Must sure the base address satisfies i64 ld / st alignment requirement.
1328 if (!Op0->hasOneMemOperand() ||
1329 !(*Op0->memoperands_begin())->getValue() ||
1330 (*Op0->memoperands_begin())->isVolatile())
1333 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1334 Function *Func = MF->getFunction();
1335 unsigned ReqAlign = STI->hasV6Ops()
1336 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1337 : 8; // Pre-v6 need 8-byte align
1338 if (Align < ReqAlign)
1341 // Then make sure the immediate offset fits.
1342 int OffImm = getMemoryOpOffset(Op0);
1346 // Can't fall back to t2LDRi8 / t2STRi8.
1349 int Limit = (1 << 8) * Scale;
1350 if (OffImm >= Limit || (OffImm & (Scale-1)))
1355 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1357 AddSub = ARM_AM::sub;
1360 int Limit = (1 << 8) * Scale;
1361 if (OffImm >= Limit || (OffImm & (Scale-1)))
1363 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1365 EvenReg = Op0->getOperand(0).getReg();
1366 OddReg = Op1->getOperand(0).getReg();
1367 if (EvenReg == OddReg)
1369 BaseReg = Op0->getOperand(1).getReg();
1371 OffReg = Op0->getOperand(2).getReg();
1372 Pred = llvm::getInstrPredicate(Op0, PredReg);
1373 dl = Op0->getDebugLoc();
1377 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1378 SmallVector<MachineInstr*, 4> &Ops,
1379 unsigned Base, bool isLd,
1380 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1381 bool RetVal = false;
1383 // Sort by offset (in reverse order).
1384 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1386 // The loads / stores of the same base are in order. Scan them from first to
1387 // last and check for the followins:
1388 // 1. Any def of base.
1390 while (Ops.size() > 1) {
1391 unsigned FirstLoc = ~0U;
1392 unsigned LastLoc = 0;
1393 MachineInstr *FirstOp = 0;
1394 MachineInstr *LastOp = 0;
1396 unsigned LastOpcode = 0;
1397 unsigned LastBytes = 0;
1398 unsigned NumMove = 0;
1399 for (int i = Ops.size() - 1; i >= 0; --i) {
1400 MachineInstr *Op = Ops[i];
1401 unsigned Loc = MI2LocMap[Op];
1402 if (Loc <= FirstLoc) {
1406 if (Loc >= LastLoc) {
1411 unsigned Opcode = Op->getOpcode();
1412 if (LastOpcode && Opcode != LastOpcode)
1415 int Offset = getMemoryOpOffset(Op);
1416 unsigned Bytes = getLSMultipleTransferSize(Op);
1418 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1421 LastOffset = Offset;
1423 LastOpcode = Opcode;
1424 if (++NumMove == 8) // FIXME: Tune this limit.
1431 SmallPtrSet<MachineInstr*, 4> MemOps;
1432 SmallSet<unsigned, 4> MemRegs;
1433 for (int i = NumMove-1; i >= 0; --i) {
1434 MemOps.insert(Ops[i]);
1435 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1438 // Be conservative, if the instructions are too far apart, don't
1439 // move them. We want to limit the increase of register pressure.
1440 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1442 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1443 MemOps, MemRegs, TRI);
1445 for (unsigned i = 0; i != NumMove; ++i)
1448 // This is the new location for the loads / stores.
1449 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1450 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1453 // If we are moving a pair of loads / stores, see if it makes sense
1454 // to try to allocate a pair of registers that can form register pairs.
1455 MachineInstr *Op0 = Ops.back();
1456 MachineInstr *Op1 = Ops[Ops.size()-2];
1457 unsigned EvenReg = 0, OddReg = 0;
1458 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1459 ARMCC::CondCodes Pred = ARMCC::AL;
1461 unsigned NewOpc = 0;
1464 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1465 EvenReg, OddReg, BaseReg, OffReg,
1466 Offset, PredReg, Pred, isT2)) {
1470 // Form the pair instruction.
1472 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1473 dl, TII->get(NewOpc))
1474 .addReg(EvenReg, RegState::Define)
1475 .addReg(OddReg, RegState::Define)
1479 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1482 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1483 dl, TII->get(NewOpc))
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1495 // Add register allocation hints to form register pairs.
1496 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1497 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1499 for (unsigned i = 0; i != NumMove; ++i) {
1500 MachineInstr *Op = Ops.back();
1502 MBB->splice(InsertPos, MBB, Op);
1506 NumLdStMoved += NumMove;
1516 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1517 bool RetVal = false;
1519 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1520 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1521 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1522 SmallVector<unsigned, 4> LdBases;
1523 SmallVector<unsigned, 4> StBases;
1526 MachineBasicBlock::iterator MBBI = MBB->begin();
1527 MachineBasicBlock::iterator E = MBB->end();
1529 for (; MBBI != E; ++MBBI) {
1530 MachineInstr *MI = MBBI;
1531 const TargetInstrDesc &TID = MI->getDesc();
1532 if (TID.isCall() || TID.isTerminator()) {
1533 // Stop at barriers.
1538 MI2LocMap[MI] = Loc++;
1539 if (!isMemoryOp(MI))
1541 unsigned PredReg = 0;
1542 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1545 int Opc = MI->getOpcode();
1546 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1547 unsigned Base = MI->getOperand(1).getReg();
1548 int Offset = getMemoryOpOffset(MI);
1550 bool StopHere = false;
1552 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1553 Base2LdsMap.find(Base);
1554 if (BI != Base2LdsMap.end()) {
1555 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1556 if (Offset == getMemoryOpOffset(BI->second[i])) {
1562 BI->second.push_back(MI);
1564 SmallVector<MachineInstr*, 4> MIs;
1566 Base2LdsMap[Base] = MIs;
1567 LdBases.push_back(Base);
1570 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1571 Base2StsMap.find(Base);
1572 if (BI != Base2StsMap.end()) {
1573 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1574 if (Offset == getMemoryOpOffset(BI->second[i])) {
1580 BI->second.push_back(MI);
1582 SmallVector<MachineInstr*, 4> MIs;
1584 Base2StsMap[Base] = MIs;
1585 StBases.push_back(Base);
1590 // Found a duplicate (a base+offset combination that's seen earlier).
1597 // Re-schedule loads.
1598 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1599 unsigned Base = LdBases[i];
1600 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1602 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1605 // Re-schedule stores.
1606 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1607 unsigned Base = StBases[i];
1608 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1610 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1614 Base2LdsMap.clear();
1615 Base2StsMap.clear();
1625 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1626 /// optimization pass.
1627 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1629 return new ARMPreAllocLoadStoreOpt();
1630 return new ARMLoadStoreOpt();