1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
13 //===----------------------------------------------------------------------===//
15 #include "HexagonMachineScheduler.h"
16 #include "HexagonSubtarget.h"
17 #include "llvm/CodeGen/MachineLoopInfo.h"
18 #include "llvm/CodeGen/ScheduleDAGMutation.h"
19 #include "llvm/IR/Function.h"
24 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
25 cl::Hidden, cl::ZeroOrMore, cl::init(false));
27 static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
28 cl::Hidden, cl::ZeroOrMore, cl::init(true));
30 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
31 cl::Hidden, cl::ZeroOrMore, cl::init(1));
33 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
34 cl::Hidden, cl::ZeroOrMore, cl::init(false));
36 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
37 cl::Hidden, cl::ZeroOrMore, cl::init(false));
39 static cl::opt<bool> DisableTCTie("disable-tc-tie",
40 cl::Hidden, cl::ZeroOrMore, cl::init(false));
42 static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
43 cl::Hidden, cl::ZeroOrMore, cl::init(true));
45 // Check if the scheduler should penalize instructions that are available to
46 // early due to a zero-latency dependence.
47 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
48 cl::ZeroOrMore, cl::init(true));
52 #define DEBUG_TYPE "misched"
55 class HexagonCallMutation : public ScheduleDAGMutation {
57 void apply(ScheduleDAGInstrs *DAG) override;
59 bool shouldTFRICallBind(const HexagonInstrInfo &HII,
60 const SUnit &Inst1, const SUnit &Inst2) const;
62 } // end anonymous namespace
64 // Check if a call and subsequent A2_tfrpi instructions should maintain
65 // scheduling affinity. We are looking for the TFRI to be consumed in
66 // the next instruction. This should help reduce the instances of
67 // double register pairs being allocated and scheduled before a call
68 // when not used until after the call. This situation is exacerbated
69 // by the fact that we allocate the pair from the callee saves list,
70 // leading to excess spills and restores.
71 bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
72 const SUnit &Inst1, const SUnit &Inst2) const {
73 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
76 // TypeXTYPE are 64 bit operations.
77 unsigned Type = HII.getType(*Inst2.getInstr());
78 if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
79 Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
84 void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
85 SUnit* LastSequentialCall = nullptr;
86 unsigned VRegHoldingRet = 0;
88 SUnit* LastUseOfRet = nullptr;
89 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
90 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
92 // Currently we only catch the situation when compare gets scheduled
93 // before preceding call.
94 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
96 if (DAG->SUnits[su].getInstr()->isCall())
97 LastSequentialCall = &DAG->SUnits[su];
98 // Look for a compare that defines a predicate.
99 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
100 DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
101 // Look for call and tfri* instructions.
102 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
103 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
104 DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
105 // Prevent redundant register copies between two calls, which are caused by
106 // both the return value and the argument for the next call being in %R0.
109 // 2: %VregX = COPY %R0
110 // 3: <use of %VregX>
113 // The scheduler would often swap 3 and 4, so an additional register is
114 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
115 // this. The same applies for %D0 and %V0/%W0, which are also handled.
116 else if (SchedRetvalOptimization) {
117 const MachineInstr *MI = DAG->SUnits[su].getInstr();
118 if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
119 MI->readsRegister(Hexagon::V0, &TRI))) {
121 VRegHoldingRet = MI->getOperand(0).getReg();
122 RetRegister = MI->getOperand(1).getReg();
123 LastUseOfRet = nullptr;
124 } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
126 LastUseOfRet = &DAG->SUnits[su];
127 else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
129 DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
135 /// Save the last formed packet
136 void VLIWResourceModel::savePacket() {
140 /// Check if scheduling of this SU is possible
141 /// in the current packet.
142 /// It is _not_ precise (statefull), it is more like
143 /// another heuristic. Many corner cases are figured
145 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
146 if (!SU || !SU->getInstr())
149 // First see if the pipeline could receive this instruction
150 // in the current cycle.
151 switch (SU->getInstr()->getOpcode()) {
153 if (!ResourcesModel->canReserveResources(*SU->getInstr()))
155 case TargetOpcode::EXTRACT_SUBREG:
156 case TargetOpcode::INSERT_SUBREG:
157 case TargetOpcode::SUBREG_TO_REG:
158 case TargetOpcode::REG_SEQUENCE:
159 case TargetOpcode::IMPLICIT_DEF:
160 case TargetOpcode::COPY:
161 case TargetOpcode::INLINEASM:
165 MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
166 auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
168 // Now see if there are no other dependencies to instructions already
170 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
171 if (Packet[i]->Succs.size() == 0)
174 // Enable .cur formation.
175 if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
178 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
179 E = Packet[i]->Succs.end(); I != E; ++I) {
180 // Since we do not add pseudos to packets, might as well
181 // ignore order dependencies.
185 if (I->getSUnit() == SU)
192 /// Keep track of available resources.
193 bool VLIWResourceModel::reserveResources(SUnit *SU) {
194 bool startNewCycle = false;
195 // Artificially reset state.
197 ResourcesModel->clearResources();
203 // If this SU does not fit in the packet
205 if (!isResourceAvailable(SU)) {
206 ResourcesModel->clearResources();
210 startNewCycle = true;
213 switch (SU->getInstr()->getOpcode()) {
215 ResourcesModel->reserveResources(*SU->getInstr());
217 case TargetOpcode::EXTRACT_SUBREG:
218 case TargetOpcode::INSERT_SUBREG:
219 case TargetOpcode::SUBREG_TO_REG:
220 case TargetOpcode::REG_SEQUENCE:
221 case TargetOpcode::IMPLICIT_DEF:
222 case TargetOpcode::KILL:
223 case TargetOpcode::CFI_INSTRUCTION:
224 case TargetOpcode::EH_LABEL:
225 case TargetOpcode::COPY:
226 case TargetOpcode::INLINEASM:
229 Packet.push_back(SU);
232 DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
233 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
234 DEBUG(dbgs() << "\t[" << i << "] SU(");
235 DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
236 DEBUG(Packet[i]->getInstr()->dump());
240 // If packet is now full, reset the state so in the next cycle
242 if (Packet.size() >= SchedModel->getIssueWidth()) {
243 ResourcesModel->clearResources();
247 startNewCycle = true;
250 return startNewCycle;
253 /// schedule - Called back from MachineScheduler::runOnMachineFunction
254 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
255 /// only includes instructions that have DAG nodes, not scheduling boundaries.
256 void VLIWMachineScheduler::schedule() {
258 << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
259 << " " << BB->getName()
260 << " in_func " << BB->getParent()->getFunction()->getName()
261 << " at loop depth " << MLI->getLoopDepth(BB)
264 buildDAGWithRegPressure();
266 SmallVector<SUnit*, 8> TopRoots, BotRoots;
267 findRootsAndBiasEdges(TopRoots, BotRoots);
269 // Initialize the strategy before modifying the DAG.
270 SchedImpl->initialize(this);
272 DEBUG(unsigned maxH = 0;
273 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
274 if (SUnits[su].getHeight() > maxH)
275 maxH = SUnits[su].getHeight();
276 dbgs() << "Max Height " << maxH << "\n";);
277 DEBUG(unsigned maxD = 0;
278 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
279 if (SUnits[su].getDepth() > maxD)
280 maxD = SUnits[su].getDepth();
281 dbgs() << "Max Depth " << maxD << "\n";);
282 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
283 SUnits[su].dumpAll(this));
285 initQueues(TopRoots, BotRoots);
287 bool IsTopNode = false;
289 DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
290 SUnit *SU = SchedImpl->pickNode(IsTopNode);
293 if (!checkSchedLimit())
296 scheduleMI(SU, IsTopNode);
298 updateQueues(SU, IsTopNode);
300 // Notify the scheduling strategy after updating the DAG.
301 SchedImpl->schedNode(SU, IsTopNode);
303 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
308 unsigned BBNum = begin()->getParent()->getNumber();
309 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
315 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
316 DAG = static_cast<VLIWMachineScheduler*>(dag);
317 SchedModel = DAG->getSchedModel();
319 Top.init(DAG, SchedModel);
320 Bot.init(DAG, SchedModel);
322 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
323 // are disabled, then these HazardRecs will be disabled.
324 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
325 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
326 const TargetInstrInfo *TII = STI.getInstrInfo();
327 delete Top.HazardRec;
328 delete Bot.HazardRec;
329 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
330 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
332 delete Top.ResourceModel;
333 delete Bot.ResourceModel;
334 Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
335 Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
337 assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
338 "-misched-topdown incompatible with -misched-bottomup");
340 DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
341 DAG->addMutation(make_unique<HexagonCallMutation>());
344 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
348 for (const SDep &PI : SU->Preds) {
349 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
350 unsigned MinLatency = PI.getLatency();
352 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
354 if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
355 SU->TopReadyCycle = PredReadyCycle + MinLatency;
357 Top.releaseNode(SU, SU->TopReadyCycle);
360 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
364 assert(SU->getInstr() && "Scheduled SUnit must have instr");
366 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
368 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
369 unsigned MinLatency = I->getLatency();
371 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
373 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
374 SU->BotReadyCycle = SuccReadyCycle + MinLatency;
376 Bot.releaseNode(SU, SU->BotReadyCycle);
379 /// Does this SU have a hazard within the current instruction group.
381 /// The scheduler supports two modes of hazard recognition. The first is the
382 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
383 /// supports highly complicated in-order reservation tables
384 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
386 /// The second is a streamlined mechanism that checks for hazards based on
387 /// simple counters that the scheduler itself maintains. It explicitly checks
388 /// for instruction dispatch limitations, including the number of micro-ops that
389 /// can dispatch per cycle.
391 /// TODO: Also check whether the SU must start a new group.
392 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
393 if (HazardRec->isEnabled())
394 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
396 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
397 if (IssueCount + uops > SchedModel->getIssueWidth())
403 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
404 unsigned ReadyCycle) {
405 if (ReadyCycle < MinReadyCycle)
406 MinReadyCycle = ReadyCycle;
408 // Check for interlocks first. For the purpose of other heuristics, an
409 // instruction that cannot issue appears as if it's not in the ReadyQueue.
410 if (ReadyCycle > CurrCycle || checkHazard(SU))
417 /// Move the boundary of scheduled code by one cycle.
418 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
419 unsigned Width = SchedModel->getIssueWidth();
420 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
422 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
423 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
425 if (!HazardRec->isEnabled()) {
426 // Bypass HazardRec virtual calls.
427 CurrCycle = NextCycle;
429 // Bypass getHazardType calls in case of long latency.
430 for (; CurrCycle != NextCycle; ++CurrCycle) {
432 HazardRec->AdvanceCycle();
434 HazardRec->RecedeCycle();
439 DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
440 << CurrCycle << '\n');
443 /// Move the boundary of scheduled code by one SUnit.
444 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
445 bool startNewCycle = false;
447 // Update the reservation table.
448 if (HazardRec->isEnabled()) {
449 if (!isTop() && SU->isCall) {
450 // Calls are scheduled with their preceding instructions. For bottom-up
451 // scheduling, clear the pipeline state before emitting.
454 HazardRec->EmitInstruction(SU);
458 startNewCycle = ResourceModel->reserveResources(SU);
460 // Check the instruction group dispatch limit.
461 // TODO: Check if this SU must end a dispatch group.
462 IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
464 DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
468 DEBUG(dbgs() << "*** IssueCount " << IssueCount
469 << " at cycle " << CurrCycle << '\n');
472 /// Release pending ready nodes in to the available queue. This makes them
473 /// visible to heuristics.
474 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
475 // If the available queue is empty, it is safe to reset MinReadyCycle.
476 if (Available.empty())
477 MinReadyCycle = UINT_MAX;
479 // Check to see if any of the pending instructions are ready to issue. If
480 // so, add them to the available queue.
481 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
482 SUnit *SU = *(Pending.begin()+i);
483 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
485 if (ReadyCycle < MinReadyCycle)
486 MinReadyCycle = ReadyCycle;
488 if (ReadyCycle > CurrCycle)
495 Pending.remove(Pending.begin()+i);
498 CheckPending = false;
501 /// Remove SU from the ready set for this boundary.
502 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
503 if (Available.isInQueue(SU))
504 Available.remove(Available.find(SU));
506 assert(Pending.isInQueue(SU) && "bad ready count");
507 Pending.remove(Pending.find(SU));
511 /// If this queue only has one ready candidate, return it. As a side effect,
512 /// advance the cycle until at least one node is ready. If multiple instructions
513 /// are ready, return NULL.
514 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
518 for (unsigned i = 0; Available.empty(); ++i) {
519 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
520 "permanent hazard"); (void)i;
521 ResourceModel->reserveResources(nullptr);
525 if (Available.size() == 1)
526 return *Available.begin();
531 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
532 const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
533 dbgs() << Label << " " << Q.getName() << " ";
535 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
536 << P.getUnitInc() << " ";
539 dbgs() << "cost(" << Cost << ")\t";
543 // Very detailed queue dump, to be used with higher verbosity levels.
544 void ConvergingVLIWScheduler::readyQueueVerboseDump(
545 const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
547 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
549 dbgs() << ">>> " << Q.getName() << "\n";
550 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
551 RegPressureDelta RPDelta;
552 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
553 DAG->getRegionCriticalPSets(),
554 DAG->getRegPressure().MaxSetPressure);
555 std::stringstream dbgstr;
556 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
557 dbgs() << dbgstr.str();
558 SchedulingCost(Q, *I, Candidate, RPDelta, true);
560 (*I)->getInstr()->dump();
566 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
567 /// of SU, return it, otherwise return null.
568 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
569 SUnit *OnlyAvailablePred = nullptr;
570 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
572 SUnit &Pred = *I->getSUnit();
573 if (!Pred.isScheduled) {
574 // We found an available, but not scheduled, predecessor. If it's the
575 // only one we have found, keep track of it... otherwise give up.
576 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
578 OnlyAvailablePred = &Pred;
581 return OnlyAvailablePred;
584 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
585 /// of SU, return it, otherwise return null.
586 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
587 SUnit *OnlyAvailableSucc = nullptr;
588 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
590 SUnit &Succ = *I->getSUnit();
591 if (!Succ.isScheduled) {
592 // We found an available, but not scheduled, successor. If it's the
593 // only one we have found, keep track of it... otherwise give up.
594 if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
596 OnlyAvailableSucc = &Succ;
599 return OnlyAvailableSucc;
602 // Constants used to denote relative importance of
603 // heuristic components for cost computation.
604 static const unsigned PriorityOne = 200;
605 static const unsigned PriorityTwo = 50;
606 static const unsigned PriorityThree = 75;
607 static const unsigned ScaleTwo = 10;
608 static const unsigned FactorOne = 2;
610 /// Single point to compute overall scheduling cost.
611 /// TODO: More heuristics will be used soon.
612 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
613 SchedCandidate &Candidate,
614 RegPressureDelta &Delta,
616 // Initial trivial priority.
619 // Do not waste time on a node that is already scheduled.
620 if (!SU || SU->isScheduled)
623 MachineInstr &Instr = *SU->getInstr();
625 DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
626 // Forced priority is high.
627 if (SU->isScheduleHigh) {
628 ResCount += PriorityOne;
629 DEBUG(dbgs() << "H|");
632 // Critical path first.
633 if (Q.getID() == TopQID) {
634 ResCount += (SU->getHeight() * ScaleTwo);
637 std::stringstream dbgstr;
638 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
639 dbgs() << dbgstr.str();
642 // If resources are available for it, multiply the
643 // chance of scheduling.
644 if (Top.ResourceModel->isResourceAvailable(SU)) {
645 ResCount <<= FactorOne;
646 ResCount += PriorityThree;
647 DEBUG(if (verbose) dbgs() << "A|");
649 DEBUG(if (verbose) dbgs() << " |");
651 ResCount += (SU->getDepth() * ScaleTwo);
654 std::stringstream dbgstr;
655 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
656 dbgs() << dbgstr.str();
659 // If resources are available for it, multiply the
660 // chance of scheduling.
661 if (Bot.ResourceModel->isResourceAvailable(SU)) {
662 ResCount <<= FactorOne;
663 ResCount += PriorityThree;
664 DEBUG(if (verbose) dbgs() << "A|");
666 DEBUG(if (verbose) dbgs() << " |");
669 unsigned NumNodesBlocking = 0;
670 if (Q.getID() == TopQID) {
671 // How many SUs does it block from scheduling?
672 // Look at all of the successors of this node.
673 // Count the number of nodes that
674 // this node is the sole unscheduled node for.
675 for (const SDep &SI : SU->Succs)
676 if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
679 // How many unscheduled predecessors block this node?
680 for (const SDep &PI : SU->Preds)
681 if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
684 ResCount += (NumNodesBlocking * ScaleTwo);
687 std::stringstream dbgstr;
688 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
689 dbgs() << dbgstr.str();
692 // Factor in reg pressure as a heuristic.
693 if (!IgnoreBBRegPressure) {
694 // Decrease priority by the amount that register pressure exceeds the limit.
695 ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
696 // Decrease priority if register pressure exceeds the limit.
697 ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
698 // Decrease priority slightly if register pressure would increase over the
700 ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
702 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
703 << Delta.CriticalMax.getUnitInc() <<"/"
704 << Delta.CurrentMax.getUnitInc() << ")|";
708 // Give a little extra priority to a .cur instruction if there is a resource
710 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
711 auto &QII = *QST.getInstrInfo();
712 if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
713 if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
714 ResCount += PriorityTwo;
715 DEBUG(if (verbose) dbgs() << "C|");
716 } else if (Q.getID() == BotQID &&
717 Bot.ResourceModel->isResourceAvailable(SU)) {
718 ResCount += PriorityTwo;
719 DEBUG(if (verbose) dbgs() << "C|");
723 // Give preference to a zero latency instruction if the dependent
724 // instruction is in the current packet.
725 if (Q.getID() == TopQID) {
726 for (const SDep &PI : SU->Preds) {
727 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
728 PI.getLatency() == 0 &&
729 Top.ResourceModel->isInPacket(PI.getSUnit())) {
730 ResCount += PriorityThree;
731 DEBUG(if (verbose) dbgs() << "Z|");
735 for (const SDep &SI : SU->Succs) {
736 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
737 SI.getLatency() == 0 &&
738 Bot.ResourceModel->isInPacket(SI.getSUnit())) {
739 ResCount += PriorityThree;
740 DEBUG(if (verbose) dbgs() << "Z|");
745 // Give less preference to an instruction that will cause a stall with
746 // an instruction in the previous packet.
747 if (QII.isHVXVec(Instr)) {
748 // Check for stalls in the previous packet.
749 if (Q.getID() == TopQID) {
750 for (auto J : Top.ResourceModel->OldPacket)
751 if (QII.producesStall(*J->getInstr(), Instr))
752 ResCount -= PriorityOne;
754 for (auto J : Bot.ResourceModel->OldPacket)
755 if (QII.producesStall(Instr, *J->getInstr()))
756 ResCount -= PriorityOne;
760 // If the instruction has a non-zero latency dependence with an instruction in
761 // the current packet, then it should not be scheduled yet. The case occurs
762 // when the dependent instruction is scheduled in a new packet, so the
763 // scheduler updates the current cycle and pending instructions become
765 if (CheckEarlyAvail) {
766 if (Q.getID() == TopQID) {
767 for (const auto &PI : SU->Preds) {
768 if (PI.getLatency() > 0 &&
769 Top.ResourceModel->isInPacket(PI.getSUnit())) {
770 ResCount -= PriorityOne;
771 DEBUG(if (verbose) dbgs() << "D|");
775 for (const auto &SI : SU->Succs) {
776 if (SI.getLatency() > 0 &&
777 Bot.ResourceModel->isInPacket(SI.getSUnit())) {
778 ResCount -= PriorityOne;
779 DEBUG(if (verbose) dbgs() << "D|");
786 std::stringstream dbgstr;
787 dbgstr << "Total " << std::setw(4) << ResCount << ")";
788 dbgs() << dbgstr.str();
794 /// Pick the best candidate from the top queue.
796 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
797 /// DAG building. To adjust for the current scheduling location we need to
798 /// maintain the number of vreg uses remaining to be top-scheduled.
799 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
800 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
801 SchedCandidate &Candidate) {
802 DEBUG(if (SchedDebugVerboseLevel > 1)
803 readyQueueVerboseDump(RPTracker, Candidate, Q);
806 // getMaxPressureDelta temporarily modifies the tracker.
807 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
809 // BestSU remains NULL if no top candidates beat the best existing candidate.
810 CandResult FoundCandidate = NoCand;
811 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
812 RegPressureDelta RPDelta;
813 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
814 DAG->getRegionCriticalPSets(),
815 DAG->getRegPressure().MaxSetPressure);
817 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
819 // Initialize the candidate if needed.
821 DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
823 Candidate.RPDelta = RPDelta;
824 Candidate.SCost = CurrentCost;
825 FoundCandidate = NodeOrder;
830 if (CurrentCost > Candidate.SCost) {
831 DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
833 Candidate.RPDelta = RPDelta;
834 Candidate.SCost = CurrentCost;
835 FoundCandidate = BestCost;
839 // Tie breaker using Timing Class.
841 auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
842 auto &QII = *QST.getInstrInfo();
844 const MachineInstr *MI = (*I)->getInstr();
845 const MachineInstr *CandI = Candidate.SU->getInstr();
846 const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
848 unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
849 unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
850 DEBUG(dbgs() << "TC Tie Breaker Cand: "
851 << CandLatency << " Instr:" << InstrLatency << "\n"
852 << *MI << *CandI << "\n");
853 if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
854 if (InstrLatency < CandLatency && TopUseShorterTie) {
856 Candidate.RPDelta = RPDelta;
857 Candidate.SCost = CurrentCost;
858 FoundCandidate = BestCost;
859 DEBUG(dbgs() << "Used top shorter tie breaker\n");
861 } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
863 Candidate.RPDelta = RPDelta;
864 Candidate.SCost = CurrentCost;
865 FoundCandidate = BestCost;
866 DEBUG(dbgs() << "Used top longer tie breaker\n");
869 } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
870 if (InstrLatency < CandLatency && BotUseShorterTie) {
872 Candidate.RPDelta = RPDelta;
873 Candidate.SCost = CurrentCost;
874 FoundCandidate = BestCost;
875 DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
877 } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
879 Candidate.RPDelta = RPDelta;
880 Candidate.SCost = CurrentCost;
881 FoundCandidate = BestCost;
882 DEBUG(dbgs() << "Used Bot longer tie breaker\n");
888 if (CurrentCost == Candidate.SCost) {
889 if ((Q.getID() == TopQID &&
890 (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
891 (Q.getID() == BotQID &&
892 (*I)->Preds.size() < Candidate.SU->Preds.size())) {
893 DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
895 Candidate.RPDelta = RPDelta;
896 Candidate.SCost = CurrentCost;
897 FoundCandidate = BestCost;
902 // Fall through to original instruction order.
903 // Only consider node order if Candidate was chosen from this Q.
904 if (FoundCandidate == NoCand)
907 return FoundCandidate;
910 /// Pick the best candidate node from either the top or bottom queue.
911 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
912 // Schedule as far as possible in the direction of no choice. This is most
913 // efficient, but also provides the best heuristics for CriticalPSets.
914 if (SUnit *SU = Bot.pickOnlyChoice()) {
915 DEBUG(dbgs() << "Picked only Bottom\n");
919 if (SUnit *SU = Top.pickOnlyChoice()) {
920 DEBUG(dbgs() << "Picked only Top\n");
924 SchedCandidate BotCand;
925 // Prefer bottom scheduling when heuristics are silent.
926 CandResult BotResult = pickNodeFromQueue(Bot.Available,
927 DAG->getBotRPTracker(), BotCand);
928 assert(BotResult != NoCand && "failed to find the first candidate");
930 // If either Q has a single candidate that provides the least increase in
931 // Excess pressure, we can immediately schedule from that Q.
933 // RegionCriticalPSets summarizes the pressure within the scheduled region and
934 // affects picking from either Q. If scheduling in one direction must
935 // increase pressure for one of the excess PSets, then schedule in that
936 // direction first to provide more freedom in the other direction.
937 if (BotResult == SingleExcess || BotResult == SingleCritical) {
938 DEBUG(dbgs() << "Prefered Bottom Node\n");
942 // Check if the top Q has a better candidate.
943 SchedCandidate TopCand;
944 CandResult TopResult = pickNodeFromQueue(Top.Available,
945 DAG->getTopRPTracker(), TopCand);
946 assert(TopResult != NoCand && "failed to find the first candidate");
948 if (TopResult == SingleExcess || TopResult == SingleCritical) {
949 DEBUG(dbgs() << "Prefered Top Node\n");
953 // If either Q has a single candidate that minimizes pressure above the
954 // original region's pressure pick it.
955 if (BotResult == SingleMax) {
956 DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
960 if (TopResult == SingleMax) {
961 DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
965 if (TopCand.SCost > BotCand.SCost) {
966 DEBUG(dbgs() << "Prefered Top Node Cost\n");
970 // Otherwise prefer the bottom candidate in node order.
971 DEBUG(dbgs() << "Prefered Bottom in Node order\n");
976 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
977 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
978 if (DAG->top() == DAG->bottom()) {
979 assert(Top.Available.empty() && Top.Pending.empty() &&
980 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
984 if (llvm::ForceTopDown) {
985 SU = Top.pickOnlyChoice();
987 SchedCandidate TopCand;
988 CandResult TopResult =
989 pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
990 assert(TopResult != NoCand && "failed to find the first candidate");
995 } else if (llvm::ForceBottomUp) {
996 SU = Bot.pickOnlyChoice();
998 SchedCandidate BotCand;
999 CandResult BotResult =
1000 pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
1001 assert(BotResult != NoCand && "failed to find the first candidate");
1007 SU = pickNodeBidrectional(IsTopNode);
1009 if (SU->isTopReady())
1010 Top.removeReady(SU);
1011 if (SU->isBottomReady())
1012 Bot.removeReady(SU);
1014 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1015 << " Scheduling Instruction in cycle "
1016 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1021 /// Update the scheduler's state after scheduling a node. This is the same node
1022 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
1023 /// to update it's state based on the current cycle before MachineSchedStrategy
1025 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1027 SU->TopReadyCycle = Top.CurrCycle;
1030 SU->BotReadyCycle = Bot.CurrCycle;