1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
13 //===----------------------------------------------------------------------===//
15 #include "HexagonMachineScheduler.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/CodeGen/MachineLoopInfo.h"
18 #include "llvm/CodeGen/ScheduleDAGMutation.h"
19 #include "llvm/IR/Function.h"
24 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
25 cl::Hidden, cl::ZeroOrMore, cl::init(false));
27 static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
28 cl::Hidden, cl::ZeroOrMore, cl::init(true));
30 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
31 cl::Hidden, cl::ZeroOrMore, cl::init(1));
33 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
34 cl::Hidden, cl::ZeroOrMore, cl::init(false));
36 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
37 cl::Hidden, cl::ZeroOrMore, cl::init(false));
39 static cl::opt<bool> DisableTCTie("disable-tc-tie",
40 cl::Hidden, cl::ZeroOrMore, cl::init(false));
42 static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
43 cl::Hidden, cl::ZeroOrMore, cl::init(true));
45 // Check if the scheduler should penalize instructions that are available to
46 // early due to a zero-latency dependence.
47 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
48 cl::ZeroOrMore, cl::init(true));
52 #define DEBUG_TYPE "misched"
55 class HexagonCallMutation : public ScheduleDAGMutation {
57 void apply(ScheduleDAGInstrs *DAG) override;
59 bool shouldTFRICallBind(const HexagonInstrInfo &HII,
60 const SUnit &Inst1, const SUnit &Inst2) const;
62 } // end anonymous namespace
64 // Check if a call and subsequent A2_tfrpi instructions should maintain
65 // scheduling affinity. We are looking for the TFRI to be consumed in
66 // the next instruction. This should help reduce the instances of
67 // double register pairs being allocated and scheduled before a call
68 // when not used until after the call. This situation is exacerbated
69 // by the fact that we allocate the pair from the callee saves list,
70 // leading to excess spills and restores.
71 bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
72 const SUnit &Inst1, const SUnit &Inst2) const {
73 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
76 // TypeXTYPE are 64 bit operations.
77 if (HII.getType(*Inst2.getInstr()) == HexagonII::TypeXTYPE)
82 void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
83 SUnit* LastSequentialCall = nullptr;
84 unsigned VRegHoldingRet = 0;
86 SUnit* LastUseOfRet = nullptr;
87 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
88 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
90 // Currently we only catch the situation when compare gets scheduled
91 // before preceding call.
92 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
94 if (DAG->SUnits[su].getInstr()->isCall())
95 LastSequentialCall = &DAG->SUnits[su];
96 // Look for a compare that defines a predicate.
97 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
98 DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
99 // Look for call and tfri* instructions.
100 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
101 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
102 DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
103 // Prevent redundant register copies between two calls, which are caused by
104 // both the return value and the argument for the next call being in %R0.
107 // 2: %VregX = COPY %R0
108 // 3: <use of %VregX>
111 // The scheduler would often swap 3 and 4, so an additional register is
112 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
113 // this. The same applies for %D0 and %V0/%W0, which are also handled.
114 else if (SchedRetvalOptimization) {
115 const MachineInstr *MI = DAG->SUnits[su].getInstr();
116 if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
117 MI->readsRegister(Hexagon::V0, &TRI))) {
119 VRegHoldingRet = MI->getOperand(0).getReg();
120 RetRegister = MI->getOperand(1).getReg();
121 LastUseOfRet = nullptr;
122 } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
124 LastUseOfRet = &DAG->SUnits[su];
125 else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
127 DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
133 /// Save the last formed packet
134 void VLIWResourceModel::savePacket() {
138 /// Check if scheduling of this SU is possible
139 /// in the current packet.
140 /// It is _not_ precise (statefull), it is more like
141 /// another heuristic. Many corner cases are figured
143 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
144 if (!SU || !SU->getInstr())
147 // First see if the pipeline could receive this instruction
148 // in the current cycle.
149 switch (SU->getInstr()->getOpcode()) {
151 if (!ResourcesModel->canReserveResources(*SU->getInstr()))
153 case TargetOpcode::EXTRACT_SUBREG:
154 case TargetOpcode::INSERT_SUBREG:
155 case TargetOpcode::SUBREG_TO_REG:
156 case TargetOpcode::REG_SEQUENCE:
157 case TargetOpcode::IMPLICIT_DEF:
158 case TargetOpcode::COPY:
159 case TargetOpcode::INLINEASM:
163 MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
164 auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
166 // Now see if there are no other dependencies to instructions already
168 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
169 if (Packet[i]->Succs.size() == 0)
172 // Enable .cur formation.
173 if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
176 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
177 E = Packet[i]->Succs.end(); I != E; ++I) {
178 // Since we do not add pseudos to packets, might as well
179 // ignore order dependencies.
183 if (I->getSUnit() == SU)
190 /// Keep track of available resources.
191 bool VLIWResourceModel::reserveResources(SUnit *SU) {
192 bool startNewCycle = false;
193 // Artificially reset state.
195 ResourcesModel->clearResources();
201 // If this SU does not fit in the packet
203 if (!isResourceAvailable(SU)) {
204 ResourcesModel->clearResources();
208 startNewCycle = true;
211 switch (SU->getInstr()->getOpcode()) {
213 ResourcesModel->reserveResources(*SU->getInstr());
215 case TargetOpcode::EXTRACT_SUBREG:
216 case TargetOpcode::INSERT_SUBREG:
217 case TargetOpcode::SUBREG_TO_REG:
218 case TargetOpcode::REG_SEQUENCE:
219 case TargetOpcode::IMPLICIT_DEF:
220 case TargetOpcode::KILL:
221 case TargetOpcode::CFI_INSTRUCTION:
222 case TargetOpcode::EH_LABEL:
223 case TargetOpcode::COPY:
224 case TargetOpcode::INLINEASM:
227 Packet.push_back(SU);
230 DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
231 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
232 DEBUG(dbgs() << "\t[" << i << "] SU(");
233 DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
234 DEBUG(Packet[i]->getInstr()->dump());
238 // If packet is now full, reset the state so in the next cycle
240 if (Packet.size() >= SchedModel->getIssueWidth()) {
241 ResourcesModel->clearResources();
245 startNewCycle = true;
248 return startNewCycle;
251 /// schedule - Called back from MachineScheduler::runOnMachineFunction
252 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
253 /// only includes instructions that have DAG nodes, not scheduling boundaries.
254 void VLIWMachineScheduler::schedule() {
256 << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
257 << " " << BB->getName()
258 << " in_func " << BB->getParent()->getFunction()->getName()
259 << " at loop depth " << MLI->getLoopDepth(BB)
262 buildDAGWithRegPressure();
264 SmallVector<SUnit*, 8> TopRoots, BotRoots;
265 findRootsAndBiasEdges(TopRoots, BotRoots);
267 // Initialize the strategy before modifying the DAG.
268 SchedImpl->initialize(this);
270 DEBUG(unsigned maxH = 0;
271 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
272 if (SUnits[su].getHeight() > maxH)
273 maxH = SUnits[su].getHeight();
274 dbgs() << "Max Height " << maxH << "\n";);
275 DEBUG(unsigned maxD = 0;
276 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
277 if (SUnits[su].getDepth() > maxD)
278 maxD = SUnits[su].getDepth();
279 dbgs() << "Max Depth " << maxD << "\n";);
280 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
281 SUnits[su].dumpAll(this));
283 initQueues(TopRoots, BotRoots);
285 bool IsTopNode = false;
287 DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
288 SUnit *SU = SchedImpl->pickNode(IsTopNode);
291 if (!checkSchedLimit())
294 scheduleMI(SU, IsTopNode);
296 updateQueues(SU, IsTopNode);
298 // Notify the scheduling strategy after updating the DAG.
299 SchedImpl->schedNode(SU, IsTopNode);
301 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
306 unsigned BBNum = begin()->getParent()->getNumber();
307 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
313 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
314 DAG = static_cast<VLIWMachineScheduler*>(dag);
315 SchedModel = DAG->getSchedModel();
317 Top.init(DAG, SchedModel);
318 Bot.init(DAG, SchedModel);
320 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
321 // are disabled, then these HazardRecs will be disabled.
322 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
323 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
324 const TargetInstrInfo *TII = STI.getInstrInfo();
325 delete Top.HazardRec;
326 delete Bot.HazardRec;
327 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
328 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
330 delete Top.ResourceModel;
331 delete Bot.ResourceModel;
332 Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
333 Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
335 assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
336 "-misched-topdown incompatible with -misched-bottomup");
338 DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
339 DAG->addMutation(make_unique<HexagonCallMutation>());
342 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
346 for (const SDep &PI : SU->Preds) {
347 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
348 unsigned MinLatency = PI.getLatency();
350 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
352 if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
353 SU->TopReadyCycle = PredReadyCycle + MinLatency;
355 Top.releaseNode(SU, SU->TopReadyCycle);
358 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
362 assert(SU->getInstr() && "Scheduled SUnit must have instr");
364 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
366 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
367 unsigned MinLatency = I->getLatency();
369 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
371 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
372 SU->BotReadyCycle = SuccReadyCycle + MinLatency;
374 Bot.releaseNode(SU, SU->BotReadyCycle);
377 /// Does this SU have a hazard within the current instruction group.
379 /// The scheduler supports two modes of hazard recognition. The first is the
380 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
381 /// supports highly complicated in-order reservation tables
382 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
384 /// The second is a streamlined mechanism that checks for hazards based on
385 /// simple counters that the scheduler itself maintains. It explicitly checks
386 /// for instruction dispatch limitations, including the number of micro-ops that
387 /// can dispatch per cycle.
389 /// TODO: Also check whether the SU must start a new group.
390 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
391 if (HazardRec->isEnabled())
392 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
394 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
395 if (IssueCount + uops > SchedModel->getIssueWidth())
401 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
402 unsigned ReadyCycle) {
403 if (ReadyCycle < MinReadyCycle)
404 MinReadyCycle = ReadyCycle;
406 // Check for interlocks first. For the purpose of other heuristics, an
407 // instruction that cannot issue appears as if it's not in the ReadyQueue.
408 if (ReadyCycle > CurrCycle || checkHazard(SU))
415 /// Move the boundary of scheduled code by one cycle.
416 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
417 unsigned Width = SchedModel->getIssueWidth();
418 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
420 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
421 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
423 if (!HazardRec->isEnabled()) {
424 // Bypass HazardRec virtual calls.
425 CurrCycle = NextCycle;
427 // Bypass getHazardType calls in case of long latency.
428 for (; CurrCycle != NextCycle; ++CurrCycle) {
430 HazardRec->AdvanceCycle();
432 HazardRec->RecedeCycle();
437 DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
438 << CurrCycle << '\n');
441 /// Move the boundary of scheduled code by one SUnit.
442 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
443 bool startNewCycle = false;
445 // Update the reservation table.
446 if (HazardRec->isEnabled()) {
447 if (!isTop() && SU->isCall) {
448 // Calls are scheduled with their preceding instructions. For bottom-up
449 // scheduling, clear the pipeline state before emitting.
452 HazardRec->EmitInstruction(SU);
456 startNewCycle = ResourceModel->reserveResources(SU);
458 // Check the instruction group dispatch limit.
459 // TODO: Check if this SU must end a dispatch group.
460 IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
462 DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
466 DEBUG(dbgs() << "*** IssueCount " << IssueCount
467 << " at cycle " << CurrCycle << '\n');
470 /// Release pending ready nodes in to the available queue. This makes them
471 /// visible to heuristics.
472 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
473 // If the available queue is empty, it is safe to reset MinReadyCycle.
474 if (Available.empty())
475 MinReadyCycle = UINT_MAX;
477 // Check to see if any of the pending instructions are ready to issue. If
478 // so, add them to the available queue.
479 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
480 SUnit *SU = *(Pending.begin()+i);
481 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
483 if (ReadyCycle < MinReadyCycle)
484 MinReadyCycle = ReadyCycle;
486 if (ReadyCycle > CurrCycle)
493 Pending.remove(Pending.begin()+i);
496 CheckPending = false;
499 /// Remove SU from the ready set for this boundary.
500 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
501 if (Available.isInQueue(SU))
502 Available.remove(Available.find(SU));
504 assert(Pending.isInQueue(SU) && "bad ready count");
505 Pending.remove(Pending.find(SU));
509 /// If this queue only has one ready candidate, return it. As a side effect,
510 /// advance the cycle until at least one node is ready. If multiple instructions
511 /// are ready, return NULL.
512 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
516 for (unsigned i = 0; Available.empty(); ++i) {
517 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
518 "permanent hazard"); (void)i;
519 ResourceModel->reserveResources(nullptr);
523 if (Available.size() == 1)
524 return *Available.begin();
529 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
530 const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
531 dbgs() << Label << " " << Q.getName() << " ";
533 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
534 << P.getUnitInc() << " ";
537 dbgs() << "cost(" << Cost << ")\t";
541 // Very detailed queue dump, to be used with higher verbosity levels.
542 void ConvergingVLIWScheduler::readyQueueVerboseDump(
543 const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
545 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
547 dbgs() << ">>> " << Q.getName() << "\n";
548 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
549 RegPressureDelta RPDelta;
550 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
551 DAG->getRegionCriticalPSets(),
552 DAG->getRegPressure().MaxSetPressure);
553 std::stringstream dbgstr;
554 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
555 dbgs() << dbgstr.str();
556 SchedulingCost(Q, *I, Candidate, RPDelta, true);
558 (*I)->getInstr()->dump();
564 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
565 /// of SU, return it, otherwise return null.
566 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
567 SUnit *OnlyAvailablePred = nullptr;
568 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
570 SUnit &Pred = *I->getSUnit();
571 if (!Pred.isScheduled) {
572 // We found an available, but not scheduled, predecessor. If it's the
573 // only one we have found, keep track of it... otherwise give up.
574 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
576 OnlyAvailablePred = &Pred;
579 return OnlyAvailablePred;
582 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
583 /// of SU, return it, otherwise return null.
584 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
585 SUnit *OnlyAvailableSucc = nullptr;
586 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
588 SUnit &Succ = *I->getSUnit();
589 if (!Succ.isScheduled) {
590 // We found an available, but not scheduled, successor. If it's the
591 // only one we have found, keep track of it... otherwise give up.
592 if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
594 OnlyAvailableSucc = &Succ;
597 return OnlyAvailableSucc;
600 // Constants used to denote relative importance of
601 // heuristic components for cost computation.
602 static const unsigned PriorityOne = 200;
603 static const unsigned PriorityTwo = 50;
604 static const unsigned PriorityThree = 75;
605 static const unsigned ScaleTwo = 10;
606 static const unsigned FactorOne = 2;
608 /// Single point to compute overall scheduling cost.
609 /// TODO: More heuristics will be used soon.
610 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
611 SchedCandidate &Candidate,
612 RegPressureDelta &Delta,
614 // Initial trivial priority.
617 // Do not waste time on a node that is already scheduled.
618 if (!SU || SU->isScheduled)
621 MachineInstr &Instr = *SU->getInstr();
623 DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
624 // Forced priority is high.
625 if (SU->isScheduleHigh) {
626 ResCount += PriorityOne;
627 DEBUG(dbgs() << "H|");
630 // Critical path first.
631 if (Q.getID() == TopQID) {
632 ResCount += (SU->getHeight() * ScaleTwo);
635 std::stringstream dbgstr;
636 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
637 dbgs() << dbgstr.str();
640 // If resources are available for it, multiply the
641 // chance of scheduling.
642 if (Top.ResourceModel->isResourceAvailable(SU)) {
643 ResCount <<= FactorOne;
644 ResCount += PriorityThree;
645 DEBUG(if (verbose) dbgs() << "A|");
647 DEBUG(if (verbose) dbgs() << " |");
649 ResCount += (SU->getDepth() * ScaleTwo);
652 std::stringstream dbgstr;
653 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
654 dbgs() << dbgstr.str();
657 // If resources are available for it, multiply the
658 // chance of scheduling.
659 if (Bot.ResourceModel->isResourceAvailable(SU)) {
660 ResCount <<= FactorOne;
661 ResCount += PriorityThree;
662 DEBUG(if (verbose) dbgs() << "A|");
664 DEBUG(if (verbose) dbgs() << " |");
667 unsigned NumNodesBlocking = 0;
668 if (Q.getID() == TopQID) {
669 // How many SUs does it block from scheduling?
670 // Look at all of the successors of this node.
671 // Count the number of nodes that
672 // this node is the sole unscheduled node for.
673 for (const SDep &SI : SU->Succs)
674 if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
677 // How many unscheduled predecessors block this node?
678 for (const SDep &PI : SU->Preds)
679 if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
682 ResCount += (NumNodesBlocking * ScaleTwo);
685 std::stringstream dbgstr;
686 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
687 dbgs() << dbgstr.str();
690 // Factor in reg pressure as a heuristic.
691 if (!IgnoreBBRegPressure) {
692 // Decrease priority by the amount that register pressure exceeds the limit.
693 ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
694 // Decrease priority if register pressure exceeds the limit.
695 ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
696 // Decrease priority slightly if register pressure would increase over the
698 ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
700 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
701 << Delta.CriticalMax.getUnitInc() <<"/"
702 << Delta.CurrentMax.getUnitInc() << ")|";
706 // Give a little extra priority to a .cur instruction if there is a resource
708 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
709 auto &QII = *QST.getInstrInfo();
710 if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
711 if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
712 ResCount += PriorityTwo;
713 DEBUG(if (verbose) dbgs() << "C|");
714 } else if (Q.getID() == BotQID &&
715 Bot.ResourceModel->isResourceAvailable(SU)) {
716 ResCount += PriorityTwo;
717 DEBUG(if (verbose) dbgs() << "C|");
721 // Give preference to a zero latency instruction if the dependent
722 // instruction is in the current packet.
723 if (Q.getID() == TopQID) {
724 for (const SDep &PI : SU->Preds) {
725 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
726 PI.getLatency() == 0 &&
727 Top.ResourceModel->isInPacket(PI.getSUnit())) {
728 ResCount += PriorityThree;
729 DEBUG(if (verbose) dbgs() << "Z|");
733 for (const SDep &SI : SU->Succs) {
734 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
735 SI.getLatency() == 0 &&
736 Bot.ResourceModel->isInPacket(SI.getSUnit())) {
737 ResCount += PriorityThree;
738 DEBUG(if (verbose) dbgs() << "Z|");
743 // Give less preference to an instruction that will cause a stall with
744 // an instruction in the previous packet.
745 if (QII.isV60VectorInstruction(Instr)) {
746 // Check for stalls in the previous packet.
747 if (Q.getID() == TopQID) {
748 for (auto J : Top.ResourceModel->OldPacket)
749 if (QII.producesStall(*J->getInstr(), Instr))
750 ResCount -= PriorityOne;
752 for (auto J : Bot.ResourceModel->OldPacket)
753 if (QII.producesStall(Instr, *J->getInstr()))
754 ResCount -= PriorityOne;
758 // If the instruction has a non-zero latency dependence with an instruction in
759 // the current packet, then it should not be scheduled yet. The case occurs
760 // when the dependent instruction is scheduled in a new packet, so the
761 // scheduler updates the current cycle and pending instructions become
763 if (CheckEarlyAvail) {
764 if (Q.getID() == TopQID) {
765 for (const auto &PI : SU->Preds) {
766 if (PI.getLatency() > 0 &&
767 Top.ResourceModel->isInPacket(PI.getSUnit())) {
768 ResCount -= PriorityOne;
769 DEBUG(if (verbose) dbgs() << "D|");
773 for (const auto &SI : SU->Succs) {
774 if (SI.getLatency() > 0 &&
775 Bot.ResourceModel->isInPacket(SI.getSUnit())) {
776 ResCount -= PriorityOne;
777 DEBUG(if (verbose) dbgs() << "D|");
784 std::stringstream dbgstr;
785 dbgstr << "Total " << std::setw(4) << ResCount << ")";
786 dbgs() << dbgstr.str();
792 /// Pick the best candidate from the top queue.
794 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
795 /// DAG building. To adjust for the current scheduling location we need to
796 /// maintain the number of vreg uses remaining to be top-scheduled.
797 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
798 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
799 SchedCandidate &Candidate) {
800 DEBUG(if (SchedDebugVerboseLevel > 1)
801 readyQueueVerboseDump(RPTracker, Candidate, Q);
804 // getMaxPressureDelta temporarily modifies the tracker.
805 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
807 // BestSU remains NULL if no top candidates beat the best existing candidate.
808 CandResult FoundCandidate = NoCand;
809 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
810 RegPressureDelta RPDelta;
811 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
812 DAG->getRegionCriticalPSets(),
813 DAG->getRegPressure().MaxSetPressure);
815 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
817 // Initialize the candidate if needed.
819 DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
821 Candidate.RPDelta = RPDelta;
822 Candidate.SCost = CurrentCost;
823 FoundCandidate = NodeOrder;
828 if (CurrentCost > Candidate.SCost) {
829 DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
831 Candidate.RPDelta = RPDelta;
832 Candidate.SCost = CurrentCost;
833 FoundCandidate = BestCost;
837 // Tie breaker using Timing Class.
839 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
840 auto &QII = *QST.getInstrInfo();
842 const MachineInstr *MI = (*I)->getInstr();
843 const MachineInstr *CandI = Candidate.SU->getInstr();
844 const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
846 unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
847 unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
848 DEBUG(dbgs() << "TC Tie Breaker Cand: "
849 << CandLatency << " Instr:" << InstrLatency << "\n"
850 << *MI << *CandI << "\n");
851 if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
852 if (InstrLatency < CandLatency && TopUseShorterTie) {
854 Candidate.RPDelta = RPDelta;
855 Candidate.SCost = CurrentCost;
856 FoundCandidate = BestCost;
857 DEBUG(dbgs() << "Used top shorter tie breaker\n");
859 } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
861 Candidate.RPDelta = RPDelta;
862 Candidate.SCost = CurrentCost;
863 FoundCandidate = BestCost;
864 DEBUG(dbgs() << "Used top longer tie breaker\n");
867 } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
868 if (InstrLatency < CandLatency && BotUseShorterTie) {
870 Candidate.RPDelta = RPDelta;
871 Candidate.SCost = CurrentCost;
872 FoundCandidate = BestCost;
873 DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
875 } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
877 Candidate.RPDelta = RPDelta;
878 Candidate.SCost = CurrentCost;
879 FoundCandidate = BestCost;
880 DEBUG(dbgs() << "Used Bot longer tie breaker\n");
886 if (CurrentCost == Candidate.SCost) {
887 if ((Q.getID() == TopQID &&
888 (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
889 (Q.getID() == BotQID &&
890 (*I)->Preds.size() < Candidate.SU->Preds.size())) {
891 DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
893 Candidate.RPDelta = RPDelta;
894 Candidate.SCost = CurrentCost;
895 FoundCandidate = BestCost;
900 // Fall through to original instruction order.
901 // Only consider node order if Candidate was chosen from this Q.
902 if (FoundCandidate == NoCand)
905 return FoundCandidate;
908 /// Pick the best candidate node from either the top or bottom queue.
909 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
910 // Schedule as far as possible in the direction of no choice. This is most
911 // efficient, but also provides the best heuristics for CriticalPSets.
912 if (SUnit *SU = Bot.pickOnlyChoice()) {
913 DEBUG(dbgs() << "Picked only Bottom\n");
917 if (SUnit *SU = Top.pickOnlyChoice()) {
918 DEBUG(dbgs() << "Picked only Top\n");
922 SchedCandidate BotCand;
923 // Prefer bottom scheduling when heuristics are silent.
924 CandResult BotResult = pickNodeFromQueue(Bot.Available,
925 DAG->getBotRPTracker(), BotCand);
926 assert(BotResult != NoCand && "failed to find the first candidate");
928 // If either Q has a single candidate that provides the least increase in
929 // Excess pressure, we can immediately schedule from that Q.
931 // RegionCriticalPSets summarizes the pressure within the scheduled region and
932 // affects picking from either Q. If scheduling in one direction must
933 // increase pressure for one of the excess PSets, then schedule in that
934 // direction first to provide more freedom in the other direction.
935 if (BotResult == SingleExcess || BotResult == SingleCritical) {
936 DEBUG(dbgs() << "Prefered Bottom Node\n");
940 // Check if the top Q has a better candidate.
941 SchedCandidate TopCand;
942 CandResult TopResult = pickNodeFromQueue(Top.Available,
943 DAG->getTopRPTracker(), TopCand);
944 assert(TopResult != NoCand && "failed to find the first candidate");
946 if (TopResult == SingleExcess || TopResult == SingleCritical) {
947 DEBUG(dbgs() << "Prefered Top Node\n");
951 // If either Q has a single candidate that minimizes pressure above the
952 // original region's pressure pick it.
953 if (BotResult == SingleMax) {
954 DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
958 if (TopResult == SingleMax) {
959 DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
963 if (TopCand.SCost > BotCand.SCost) {
964 DEBUG(dbgs() << "Prefered Top Node Cost\n");
968 // Otherwise prefer the bottom candidate in node order.
969 DEBUG(dbgs() << "Prefered Bottom in Node order\n");
974 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
975 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
976 if (DAG->top() == DAG->bottom()) {
977 assert(Top.Available.empty() && Top.Pending.empty() &&
978 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
982 if (llvm::ForceTopDown) {
983 SU = Top.pickOnlyChoice();
985 SchedCandidate TopCand;
986 CandResult TopResult =
987 pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
988 assert(TopResult != NoCand && "failed to find the first candidate");
993 } else if (llvm::ForceBottomUp) {
994 SU = Bot.pickOnlyChoice();
996 SchedCandidate BotCand;
997 CandResult BotResult =
998 pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
999 assert(BotResult != NoCand && "failed to find the first candidate");
1005 SU = pickNodeBidrectional(IsTopNode);
1007 if (SU->isTopReady())
1008 Top.removeReady(SU);
1009 if (SU->isBottomReady())
1010 Bot.removeReady(SU);
1012 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1013 << " Scheduling Instruction in cycle "
1014 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1019 /// Update the scheduler's state after scheduling a node. This is the same node
1020 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
1021 /// to update it's state based on the current cycle before MachineSchedStrategy
1023 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1025 SU->TopReadyCycle = Top.CurrCycle;
1028 SU->BotReadyCycle = Bot.CurrCycle;