1 //===--- HexagonSplitDouble.cpp -------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #define DEBUG_TYPE "hsdr"
12 #include "HexagonInstrInfo.h"
13 #include "HexagonRegisterInfo.h"
14 #include "HexagonSubtarget.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineLoopInfo.h"
25 #include "llvm/CodeGen/MachineMemOperand.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/IR/DebugLoc.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compiler.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Target/TargetRegisterInfo.h"
49 FunctionPass *createHexagonSplitDoubleRegs();
50 void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
52 } // end namespace llvm
56 static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
57 cl::desc("Maximum number of split partitions"));
58 static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
59 cl::desc("Do not split loads or stores"));
61 class HexagonSplitDoubleRegs : public MachineFunctionPass {
65 HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
67 initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
70 StringRef getPassName() const override {
71 return "Hexagon Split Double Registers";
74 void getAnalysisUsage(AnalysisUsage &AU) const override {
75 AU.addRequired<MachineLoopInfo>();
76 AU.addPreserved<MachineLoopInfo>();
77 MachineFunctionPass::getAnalysisUsage(AU);
80 bool runOnMachineFunction(MachineFunction &MF) override;
83 static const TargetRegisterClass *const DoubleRC;
85 const HexagonRegisterInfo *TRI;
86 const HexagonInstrInfo *TII;
87 const MachineLoopInfo *MLI;
88 MachineRegisterInfo *MRI;
90 typedef std::set<unsigned> USet;
91 typedef std::map<unsigned,USet> UUSetMap;
92 typedef std::pair<unsigned,unsigned> UUPair;
93 typedef std::map<unsigned,UUPair> UUPairMap;
94 typedef std::map<const MachineLoop*,USet> LoopRegMap;
96 bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
97 bool isVolatileInstr(const MachineInstr *MI) const;
98 bool isFixedInstr(const MachineInstr *MI) const;
99 void partitionRegisters(UUSetMap &P2Rs);
100 int32_t profit(const MachineInstr *MI) const;
101 bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
103 void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
104 void collectIndRegs(LoopRegMap &IRM);
106 void createHalfInstr(unsigned Opc, MachineInstr *MI,
107 const UUPairMap &PairMap, unsigned SubR);
108 void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
109 void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
110 void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
111 void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
112 void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
113 void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
114 bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
115 void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
116 void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
117 bool splitPartition(const USet &Part);
120 static void dump_partition(raw_ostream&, const USet&,
121 const TargetRegisterInfo&);
124 char HexagonSplitDoubleRegs::ID;
125 int HexagonSplitDoubleRegs::Counter = 0;
126 const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
127 = &Hexagon::DoubleRegsRegClass;
129 } // end anonymous namespace
131 INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
132 "Hexagon Split Double Registers", false, false)
134 void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
135 const USet &Part, const TargetRegisterInfo &TRI) {
138 dbgs() << ' ' << PrintReg(I, &TRI);
142 bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
144 const USet &Rs = I.second;
145 if (Rs.find(Reg) != Rs.end())
151 bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
152 for (auto &I : MI->memoperands())
158 bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
159 if (MI->mayLoad() || MI->mayStore())
160 if (MemRefsFixed || isVolatileInstr(MI))
162 if (MI->isDebugValue())
165 unsigned Opc = MI->getOpcode();
170 case TargetOpcode::PHI:
171 case TargetOpcode::COPY:
174 case Hexagon::L2_loadrd_io:
175 // Not handling stack stores (only reg-based addresses).
176 if (MI->getOperand(1).isReg())
179 case Hexagon::S2_storerd_io:
180 // Not handling stack stores (only reg-based addresses).
181 if (MI->getOperand(0).isReg())
184 case Hexagon::L2_loadrd_pi:
185 case Hexagon::S2_storerd_pi:
187 case Hexagon::A2_tfrpi:
188 case Hexagon::A2_combineii:
189 case Hexagon::A4_combineir:
190 case Hexagon::A4_combineii:
191 case Hexagon::A4_combineri:
192 case Hexagon::A2_combinew:
193 case Hexagon::CONST64:
195 case Hexagon::A2_sxtw:
197 case Hexagon::A2_andp:
198 case Hexagon::A2_orp:
199 case Hexagon::A2_xorp:
200 case Hexagon::S2_asl_i_p_or:
201 case Hexagon::S2_asl_i_p:
202 case Hexagon::S2_asr_i_p:
203 case Hexagon::S2_lsr_i_p:
207 for (auto &Op : MI->operands()) {
210 unsigned R = Op.getReg();
211 if (!TargetRegisterInfo::isVirtualRegister(R))
217 void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
218 typedef std::map<unsigned,unsigned> UUMap;
219 typedef std::vector<unsigned> UVect;
221 unsigned NumRegs = MRI->getNumVirtRegs();
222 BitVector DoubleRegs(NumRegs);
223 for (unsigned i = 0; i < NumRegs; ++i) {
224 unsigned R = TargetRegisterInfo::index2VirtReg(i);
225 if (MRI->getRegClass(R) == DoubleRC)
229 BitVector FixedRegs(NumRegs);
230 for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
231 unsigned R = TargetRegisterInfo::index2VirtReg(x);
232 MachineInstr *DefI = MRI->getVRegDef(R);
233 // In some cases a register may exist, but never be defined or used.
234 // It should never appear anywhere, but mark it as "fixed", just to be
236 if (!DefI || isFixedInstr(DefI))
241 for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
244 unsigned R = TargetRegisterInfo::index2VirtReg(x);
245 DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
246 USet &Asc = AssocMap[R];
247 for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
249 MachineOperand &Op = *U;
250 MachineInstr *UseI = Op.getParent();
251 if (isFixedInstr(UseI))
253 for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
254 MachineOperand &MO = UseI->getOperand(i);
255 // Skip non-registers or registers with subregisters.
256 if (&MO == &Op || !MO.isReg() || MO.getSubReg())
258 unsigned T = MO.getReg();
259 if (!TargetRegisterInfo::isVirtualRegister(T)) {
263 if (MRI->getRegClass(T) != DoubleRC)
265 unsigned u = TargetRegisterInfo::virtReg2Index(T);
268 DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
270 // Make it symmetric.
271 AssocMap[T].insert(R);
274 DEBUG(dbgs() << '\n');
280 for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
281 unsigned R = TargetRegisterInfo::index2VirtReg(x);
282 if (Visited.count(R))
284 // Create a new partition for R.
285 unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
288 for (unsigned i = 0; i < WorkQ.size(); ++i) {
289 unsigned T = WorkQ[i];
290 if (Visited.count(T))
294 // Add all registers associated with T.
295 USet &Asc = AssocMap[T];
296 for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
302 P2Rs[I.second].insert(I.first);
305 static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
307 bool LoZ1 = false, HiZ1 = false;
308 if (Lo == 0 || Lo == 0xFFFFFFFF)
309 P += 10, LoZ1 = true;
310 if (Hi == 0 || Hi == 0xFFFFFFFF)
311 P += 10, HiZ1 = true;
312 if (!LoZ1 && !HiZ1 && Lo == Hi)
317 int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
319 unsigned Opc = MI->getOpcode();
321 case TargetOpcode::PHI:
322 for (const auto &Op : MI->operands())
326 case TargetOpcode::COPY:
327 if (MI->getOperand(1).getSubReg() != 0)
331 case Hexagon::L2_loadrd_io:
332 case Hexagon::S2_storerd_io:
334 case Hexagon::L2_loadrd_pi:
335 case Hexagon::S2_storerd_pi:
338 case Hexagon::A2_tfrpi:
339 case Hexagon::CONST64: {
340 uint64_t D = MI->getOperand(1).getImm();
341 unsigned Lo = D & 0xFFFFFFFFULL;
342 unsigned Hi = D >> 32;
343 return profitImm(Lo, Hi);
345 case Hexagon::A2_combineii:
346 case Hexagon::A4_combineii:
347 return profitImm(MI->getOperand(1).getImm(),
348 MI->getOperand(2).getImm());
349 case Hexagon::A4_combineri:
351 case Hexagon::A4_combineir: {
353 int64_t V = MI->getOperand(ImmX).getImm();
354 if (V == 0 || V == -1)
356 // Fall through into A2_combinew.
359 case Hexagon::A2_combinew:
362 case Hexagon::A2_sxtw:
365 case Hexagon::A2_andp:
366 case Hexagon::A2_orp:
367 case Hexagon::A2_xorp:
370 case Hexagon::S2_asl_i_p_or: {
371 unsigned S = MI->getOperand(3).getImm();
372 if (S == 0 || S == 32)
376 case Hexagon::S2_asl_i_p:
377 case Hexagon::S2_asr_i_p:
378 case Hexagon::S2_lsr_i_p:
379 unsigned S = MI->getOperand(2).getImm();
380 if (S == 0 || S == 32)
392 bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
394 unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
397 for (unsigned DR : Part) {
398 MachineInstr *DefI = MRI->getVRegDef(DR);
399 int32_t P = profit(DefI);
400 if (P == std::numeric_limits<int>::min())
403 // Reduce the profitability of splitting induction registers.
404 if (isInduction(DR, IRM))
407 for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
409 MachineInstr *UseI = U->getParent();
410 if (isFixedInstr(UseI)) {
412 // Calculate the cost of generating REG_SEQUENCE instructions.
413 for (auto &Op : UseI->operands()) {
414 if (Op.isReg() && Part.count(Op.getReg()))
420 // If a register from this partition is used in a fixed instruction,
421 // and there is also a register in this partition that is used in
422 // a loop phi node, then decrease the splitting profit as this can
423 // confuse the modulo scheduler.
425 const MachineBasicBlock *PB = UseI->getParent();
426 const MachineLoop *L = MLI->getLoopFor(PB);
427 if (L && L->getHeader() == PB)
430 // Splittable instruction.
432 int32_t P = profit(UseI);
433 if (P == std::numeric_limits<int>::min())
439 if (FixedNum > 0 && LoopPhiNum > 0)
440 TotalP -= 20*LoopPhiNum;
442 DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
446 void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
448 const MachineBasicBlock *HB = L->getHeader();
449 const MachineBasicBlock *LB = L->getLoopLatch();
453 // Examine the latch branch. Expect it to be a conditional branch to
454 // the header (either "br-cond header" or "br-cond exit; br header").
455 MachineBasicBlock *TB = nullptr, *FB = nullptr;
456 MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
457 SmallVector<MachineOperand,2> Cond;
458 bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false);
459 // Only analyzable conditional branches. HII::analyzeBranch will put
460 // the branch opcode as the first element of Cond, and the predicate
461 // operand as the second.
462 if (BadLB || Cond.size() != 2)
464 // Only simple jump-conditional (with or without negation).
465 if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
467 // Must go to the header.
468 if (TB != HB && FB != HB)
470 assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch");
471 // Expect a predicate register.
472 unsigned PR = Cond[1].getReg();
473 assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
475 // Get the registers on which the loop controlling compare instruction
477 unsigned CmpR1 = 0, CmpR2 = 0;
478 const MachineInstr *CmpI = MRI->getVRegDef(PR);
479 while (CmpI->getOpcode() == Hexagon::C2_not)
480 CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
482 int Mask = 0, Val = 0;
483 bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val);
486 // Eliminate non-double input registers.
487 if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
489 if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
491 if (!CmpR1 && !CmpR2)
494 // Now examine the top of the loop: the phi nodes that could poten-
495 // tially define loop induction registers. The registers defined by
496 // such a phi node would be used in a 64-bit add, which then would
497 // be used in the loop compare instruction.
499 // Get the set of all double registers defined by phi nodes in the
501 typedef std::vector<unsigned> UVect;
503 for (auto &MI : *HB) {
506 const MachineOperand &MD = MI.getOperand(0);
507 unsigned R = MD.getReg();
508 if (MRI->getRegClass(R) == DoubleRC)
514 auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
515 for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
517 const MachineInstr *UseI = I->getParent();
518 if (UseI->getOpcode() != Hexagon::A2_addp)
520 // Get the output from the add. If it is one of the inputs to the
521 // loop-controlling compare instruction, then R is likely an induc-
523 unsigned T = UseI->getOperand(0).getReg();
524 if (T == CmpR1 || T == CmpR2)
529 UVect::iterator End = llvm::remove_if(DP, NoIndOp);
530 Rs.insert(DP.begin(), End);
535 dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
536 dump_partition(dbgs(), Rs, *TRI);
541 void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
542 typedef std::vector<MachineLoop*> LoopVector;
547 for (unsigned i = 0; i < WorkQ.size(); ++i) {
548 for (auto I : *WorkQ[i])
553 for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
554 MachineLoop *L = WorkQ[i];
556 collectIndRegsForLoop(L, Rs);
558 IRM.insert(std::make_pair(L, Rs));
562 void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
563 const UUPairMap &PairMap, unsigned SubR) {
564 MachineBasicBlock &B = *MI->getParent();
565 DebugLoc DL = MI->getDebugLoc();
566 MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
568 for (auto &Op : MI->operands()) {
570 NewI->addOperand(Op);
573 // For register operands, set the subregister.
574 unsigned R = Op.getReg();
575 unsigned SR = Op.getSubReg();
576 bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
577 bool isKill = Op.isKill();
578 if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
580 UUPairMap::const_iterator F = PairMap.find(R);
581 if (F == PairMap.end()) {
584 const UUPair &P = F->second;
585 R = (SubR == Hexagon::isub_lo) ? P.first : P.second;
589 auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
590 Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
591 Op.isInternalRead());
592 NewI->addOperand(CO);
596 void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
597 const UUPairMap &PairMap) {
598 bool Load = MI->mayLoad();
599 unsigned OrigOpc = MI->getOpcode();
600 bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
601 OrigOpc == Hexagon::S2_storerd_pi);
602 MachineInstr *LowI, *HighI;
603 MachineBasicBlock &B = *MI->getParent();
604 DebugLoc DL = MI->getDebugLoc();
606 // Index of the base-address-register operand.
607 unsigned AdrX = PostInc ? (Load ? 2 : 1)
609 MachineOperand &AdrOp = MI->getOperand(AdrX);
610 unsigned RSA = getRegState(AdrOp);
611 MachineOperand &ValOp = Load ? MI->getOperand(0)
612 : (PostInc ? MI->getOperand(3)
613 : MI->getOperand(2));
614 UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
615 assert(F != PairMap.end());
618 const UUPair &P = F->second;
619 int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
620 LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
621 .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
623 HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
624 .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
627 const UUPair &P = F->second;
628 int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
629 LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
630 .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
633 HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
634 .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
640 // Create the increment of the address register.
641 int64_t Inc = Load ? MI->getOperand(3).getImm()
642 : MI->getOperand(2).getImm();
643 MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
644 const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
645 unsigned NewR = MRI->createVirtualRegister(RC);
646 assert(!UpdOp.getSubReg() && "Def operand with subreg");
647 BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
648 .addReg(AdrOp.getReg(), RSA)
650 MRI->replaceRegWith(UpdOp.getReg(), NewR);
651 // The original instruction will be deleted later.
654 // Generate a new pair of memory-operands.
655 MachineFunction &MF = *B.getParent();
656 for (auto &MO : MI->memoperands()) {
657 const MachinePointerInfo &Ptr = MO->getPointerInfo();
658 MachineMemOperand::Flags F = MO->getFlags();
659 int A = MO->getAlignment();
661 auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
662 LowI->addMemOperand(MF, Tmp1);
663 auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
664 HighI->addMemOperand(MF, Tmp2);
668 void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
669 const UUPairMap &PairMap) {
670 MachineOperand &Op0 = MI->getOperand(0);
671 MachineOperand &Op1 = MI->getOperand(1);
672 assert(Op0.isReg() && Op1.isImm());
673 uint64_t V = Op1.getImm();
675 MachineBasicBlock &B = *MI->getParent();
676 DebugLoc DL = MI->getDebugLoc();
677 UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
678 assert(F != PairMap.end());
679 const UUPair &P = F->second;
681 // The operand to A2_tfrsi can only have 32 significant bits. Immediate
682 // values in MachineOperand are stored as 64-bit integers, and so the
683 // value -1 may be represented either as 64-bit -1, or 4294967295. Both
684 // will have the 32 higher bits truncated in the end, but -1 will remain
685 // as -1, while the latter may appear to be a large unsigned value
686 // requiring a constant extender. The casting to int32_t will select the
687 // former representation. (The same reasoning applies to all 32-bit
689 BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
690 .addImm(int32_t(V & 0xFFFFFFFFULL));
691 BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
692 .addImm(int32_t(V >> 32));
695 void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
696 const UUPairMap &PairMap) {
697 MachineOperand &Op0 = MI->getOperand(0);
698 MachineOperand &Op1 = MI->getOperand(1);
699 MachineOperand &Op2 = MI->getOperand(2);
702 MachineBasicBlock &B = *MI->getParent();
703 DebugLoc DL = MI->getDebugLoc();
704 UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
705 assert(F != PairMap.end());
706 const UUPair &P = F->second;
709 BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
710 .addImm(Op1.getImm());
711 } else if (Op1.isReg()) {
712 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
713 .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
715 llvm_unreachable("Unexpected operand");
718 BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
719 .addImm(Op2.getImm());
720 } else if (Op2.isReg()) {
721 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
722 .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
724 llvm_unreachable("Unexpected operand");
727 void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
728 const UUPairMap &PairMap) {
729 MachineOperand &Op0 = MI->getOperand(0);
730 MachineOperand &Op1 = MI->getOperand(1);
731 assert(Op0.isReg() && Op1.isReg());
733 MachineBasicBlock &B = *MI->getParent();
734 DebugLoc DL = MI->getDebugLoc();
735 UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
736 assert(F != PairMap.end());
737 const UUPair &P = F->second;
738 unsigned RS = getRegState(Op1);
740 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
741 .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
742 BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
743 .addReg(Op1.getReg(), RS, Op1.getSubReg())
747 void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
748 const UUPairMap &PairMap) {
749 using namespace Hexagon;
751 MachineOperand &Op0 = MI->getOperand(0);
752 MachineOperand &Op1 = MI->getOperand(1);
753 MachineOperand &Op2 = MI->getOperand(2);
754 assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
755 int64_t Sh64 = Op2.getImm();
756 assert(Sh64 >= 0 && Sh64 < 64);
759 UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
760 assert(F != PairMap.end());
761 const UUPair &P = F->second;
762 unsigned LoR = P.first;
763 unsigned HiR = P.second;
765 unsigned Opc = MI->getOpcode();
766 bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
768 bool Signed = (Opc == S2_asr_i_p);
770 MachineBasicBlock &B = *MI->getParent();
771 DebugLoc DL = MI->getDebugLoc();
772 unsigned RS = getRegState(Op1);
773 unsigned ShiftOpc = Left ? S2_asl_i_r
774 : (Signed ? S2_asr_i_r : S2_lsr_i_r);
775 unsigned LoSR = isub_lo;
776 unsigned HiSR = isub_hi;
779 // No shift, subregister copy.
780 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
781 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
782 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
783 .addReg(Op1.getReg(), RS, HiSR);
785 const TargetRegisterClass *IntRC = &IntRegsRegClass;
786 unsigned TmpR = MRI->createVirtualRegister(IntRC);
788 // Shift left: DR = shl R, #s
789 // LoR = shl R.lo, #s
790 // TmpR = extractu R.lo, #s, #32-s
791 // HiR = or (TmpR, asl(R.hi, #s))
792 // Shift right: DR = shr R, #s
793 // HiR = shr R.hi, #s
794 // TmpR = shr R.lo, #s
795 // LoR = insert TmpR, R.hi, #s, #32-s
798 // LoR = shl R.lo, #s
800 // TmpR = shr R.lo, #s
802 // Make a special case for A2_aslh and A2_asrh (they are predicable as
803 // opposed to S2_asl_i_r/S2_asr_i_r).
805 BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
806 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
807 else if (S == 16 && Signed)
808 BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
809 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
811 BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
812 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
816 // TmpR = extractu R.lo, #s, #32-s
817 BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
818 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
821 // HiR = or (TmpR, asl(R.hi, #s))
822 BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
824 .addReg(Op1.getReg(), RS, HiSR)
827 // HiR = shr R.hi, #s
828 BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
829 .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
831 // LoR = insert TmpR, R.hi, #s, #32-s
832 BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
834 .addReg(Op1.getReg(), RS, HiSR)
838 } else if (S == 32) {
839 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
840 .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
842 BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
844 else // Must be right shift.
845 BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
846 .addReg(Op1.getReg(), RS, HiSR)
851 BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
852 .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
853 else if (S == 16 && Signed)
854 BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
855 .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
857 BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
858 .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
862 BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
863 .addReg(Op1.getReg(), RS, HiSR)
866 BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
871 void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
872 const UUPairMap &PairMap) {
873 using namespace Hexagon;
875 MachineOperand &Op0 = MI->getOperand(0);
876 MachineOperand &Op1 = MI->getOperand(1);
877 MachineOperand &Op2 = MI->getOperand(2);
878 MachineOperand &Op3 = MI->getOperand(3);
879 assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
880 int64_t Sh64 = Op3.getImm();
881 assert(Sh64 >= 0 && Sh64 < 64);
884 UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
885 assert(F != PairMap.end());
886 const UUPair &P = F->second;
887 unsigned LoR = P.first;
888 unsigned HiR = P.second;
890 MachineBasicBlock &B = *MI->getParent();
891 DebugLoc DL = MI->getDebugLoc();
892 unsigned RS1 = getRegState(Op1);
893 unsigned RS2 = getRegState(Op2);
894 const TargetRegisterClass *IntRC = &IntRegsRegClass;
896 unsigned LoSR = isub_lo;
897 unsigned HiSR = isub_hi;
899 // Op0 = S2_asl_i_p_or Op1, Op2, Op3
900 // means: Op0 = or (Op1, asl(Op2, Op3))
903 // DR = or (R1, asl(R2, #s))
905 // LoR = or (R1.lo, asl(R2.lo, #s))
906 // Tmp1 = extractu R2.lo, #s, #32-s
907 // Tmp2 = or R1.hi, Tmp1
908 // HiR = or (Tmp2, asl(R2.hi, #s))
911 // DR = or (R1, asl(R2, #0))
913 // i.e. LoR = or R1.lo, R2.lo
914 // HiR = or R1.hi, R2.hi
915 BuildMI(B, MI, DL, TII->get(A2_or), LoR)
916 .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
917 .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
918 BuildMI(B, MI, DL, TII->get(A2_or), HiR)
919 .addReg(Op1.getReg(), RS1, HiSR)
920 .addReg(Op2.getReg(), RS2, HiSR);
922 BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
923 .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
924 .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
926 unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
927 BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
928 .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
931 unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
932 BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
933 .addReg(Op1.getReg(), RS1, HiSR)
935 BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
937 .addReg(Op2.getReg(), RS2, HiSR)
939 } else if (S == 32) {
940 // DR = or (R1, asl(R2, #32))
943 // HiR = or R1.hi, R2.lo
944 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
945 .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
946 BuildMI(B, MI, DL, TII->get(A2_or), HiR)
947 .addReg(Op1.getReg(), RS1, HiSR)
948 .addReg(Op2.getReg(), RS2, LoSR);
950 // DR = or (R1, asl(R2, #s))
953 // HiR = or (R1:hi, asl(R2:lo, #s-32))
955 BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
956 .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
957 BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
958 .addReg(Op1.getReg(), RS1, HiSR)
959 .addReg(Op2.getReg(), RS2, LoSR)
964 bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
965 const UUPairMap &PairMap) {
966 using namespace Hexagon;
968 DEBUG(dbgs() << "Splitting: " << *MI);
970 unsigned Opc = MI->getOpcode();
973 case TargetOpcode::PHI:
974 case TargetOpcode::COPY: {
975 unsigned DstR = MI->getOperand(0).getReg();
976 if (MRI->getRegClass(DstR) == DoubleRC) {
977 createHalfInstr(Opc, MI, PairMap, isub_lo);
978 createHalfInstr(Opc, MI, PairMap, isub_hi);
984 createHalfInstr(A2_and, MI, PairMap, isub_lo);
985 createHalfInstr(A2_and, MI, PairMap, isub_hi);
989 createHalfInstr(A2_or, MI, PairMap, isub_lo);
990 createHalfInstr(A2_or, MI, PairMap, isub_hi);
994 createHalfInstr(A2_xor, MI, PairMap, isub_lo);
995 createHalfInstr(A2_xor, MI, PairMap, isub_hi);
1003 splitMemRef(MI, PairMap);
1009 splitImmediate(MI, PairMap);
1018 splitCombine(MI, PairMap);
1023 splitExt(MI, PairMap);
1030 splitShift(MI, PairMap);
1035 splitAslOr(MI, PairMap);
1040 llvm_unreachable("Instruction not splitable");
1047 void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
1048 const UUPairMap &PairMap) {
1049 for (auto &Op : MI->operands()) {
1050 if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
1052 unsigned R = Op.getReg();
1053 UUPairMap::const_iterator F = PairMap.find(R);
1054 if (F == PairMap.end())
1056 const UUPair &P = F->second;
1057 switch (Op.getSubReg()) {
1058 case Hexagon::isub_lo:
1061 case Hexagon::isub_hi:
1062 Op.setReg(P.second);
1069 void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
1070 const UUPairMap &PairMap) {
1071 MachineBasicBlock &B = *MI->getParent();
1072 DebugLoc DL = MI->getDebugLoc();
1074 for (auto &Op : MI->operands()) {
1075 if (!Op.isReg() || !Op.isUse())
1077 unsigned R = Op.getReg();
1078 if (!TargetRegisterInfo::isVirtualRegister(R))
1080 if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
1082 UUPairMap::const_iterator F = PairMap.find(R);
1083 if (F == PairMap.end())
1085 const UUPair &Pr = F->second;
1086 unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
1087 BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
1089 .addImm(Hexagon::isub_lo)
1091 .addImm(Hexagon::isub_hi);
1096 bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
1097 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
1098 typedef std::set<MachineInstr*> MISet;
1099 bool Changed = false;
1101 DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
1107 for (unsigned DR : Part) {
1108 MachineInstr *DefI = MRI->getVRegDef(DR);
1109 SplitIns.insert(DefI);
1111 // Collect all instructions, including fixed ones. We won't split them,
1112 // but we need to visit them again to insert the REG_SEQUENCE instructions.
1113 for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
1115 SplitIns.insert(U->getParent());
1117 unsigned LoR = MRI->createVirtualRegister(IntRC);
1118 unsigned HiR = MRI->createVirtualRegister(IntRC);
1119 DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
1120 << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
1121 PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
1125 for (auto MI : SplitIns) {
1126 if (isFixedInstr(MI)) {
1127 collapseRegPairs(MI, PairMap);
1129 bool Done = splitInstr(MI, PairMap);
1136 for (unsigned DR : Part) {
1137 // Before erasing "double" instructions, revisit all uses of the double
1138 // registers in this partition, and replace all uses of them with subre-
1139 // gisters, with the corresponding single registers.
1141 for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
1143 Uses.insert(U->getParent());
1145 replaceSubregUses(M, PairMap);
1148 for (auto MI : Erase) {
1149 MachineBasicBlock *B = MI->getParent();
1156 bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
1157 DEBUG(dbgs() << "Splitting double registers in function: "
1158 << MF.getName() << '\n');
1160 if (skipFunction(*MF.getFunction()))
1163 auto &ST = MF.getSubtarget<HexagonSubtarget>();
1164 TRI = ST.getRegisterInfo();
1165 TII = ST.getInstrInfo();
1166 MRI = &MF.getRegInfo();
1167 MLI = &getAnalysis<MachineLoopInfo>();
1172 collectIndRegs(IRM);
1173 partitionRegisters(P2Rs);
1176 dbgs() << "Register partitioning: (partition #0 is fixed)\n";
1177 for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
1178 dbgs() << '#' << I->first << " -> ";
1179 dump_partition(dbgs(), I->second, *TRI);
1184 bool Changed = false;
1185 int Limit = MaxHSDR;
1187 for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
1190 if (Limit >= 0 && Counter >= Limit)
1192 USet &Part = I->second;
1193 DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
1194 if (!isProfitable(Part, IRM))
1197 Changed |= splitPartition(Part);
1203 FunctionPass *llvm::createHexagonSplitDoubleRegs() {
1204 return new HexagonSplitDoubleRegs();