contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp

   1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "HexagonMachineScheduler.h"
  16 #include "HexagonInstrInfo.h"
  17 #include "HexagonSubtarget.h"
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/CodeGen/DFAPacketizer.h"
  20 #include "llvm/CodeGen/MachineBasicBlock.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstr.h"
  23 #include "llvm/CodeGen/MachineLoopInfo.h"
  24 #include "llvm/CodeGen/RegisterPressure.h"
  25 #include "llvm/CodeGen/ScheduleDAG.h"
  26 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  27 #include "llvm/CodeGen/TargetInstrInfo.h"
  28 #include "llvm/CodeGen/TargetOpcodes.h"
  29 #include "llvm/CodeGen/TargetRegisterInfo.h"
  30 #include "llvm/CodeGen/TargetSchedule.h"
  31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  32 #include "llvm/IR/Function.h"
  33 #include "llvm/Support/CommandLine.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include <algorithm>
  37 #include <cassert>
  38 #include <iomanip>
  39 #include <limits>
  40 #include <memory>
  41 #include <sstream>
  42
  43 using namespace llvm;
  44
  45 #define DEBUG_TYPE "machine-scheduler"
  46
  47 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
  48     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  49
  50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  51     cl::Hidden, cl::ZeroOrMore, cl::init(1));
  52
  53 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
  54     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  55
  56 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
  57     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  58
  59 static cl::opt<bool> DisableTCTie("disable-tc-tie",
  60     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  61
  62 // Check if the scheduler should penalize instructions that are available to
  63 // early due to a zero-latency dependence.
  64 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  65     cl::ZeroOrMore, cl::init(true));
  66
  67 /// Save the last formed packet
  68 void VLIWResourceModel::savePacket() {
  69   OldPacket = Packet;
  70 }
  71
  72 /// Check if scheduling of this SU is possible
  73 /// in the current packet.
  74 /// It is _not_ precise (statefull), it is more like
  75 /// another heuristic. Many corner cases are figured
  76 /// empirically.
  77 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
  78   if (!SU || !SU->getInstr())
  79     return false;
  80
  81   // First see if the pipeline could receive this instruction
  82   // in the current cycle.
  83   switch (SU->getInstr()->getOpcode()) {
  84   default:
  85     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
  86       return false;
  87   case TargetOpcode::EXTRACT_SUBREG:
  88   case TargetOpcode::INSERT_SUBREG:
  89   case TargetOpcode::SUBREG_TO_REG:
  90   case TargetOpcode::REG_SEQUENCE:
  91   case TargetOpcode::IMPLICIT_DEF:
  92   case TargetOpcode::COPY:
  93   case TargetOpcode::INLINEASM:
  94     break;
  95   }
  96
  97   MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
  98   auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
  99
 100   // Now see if there are no other dependencies to instructions already
 101   // in the packet.
 102   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 103     if (Packet[i]->Succs.size() == 0)
 104       continue;
 105
 106     // Enable .cur formation.
 107     if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
 108       continue;
 109
 110     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
 111          E = Packet[i]->Succs.end(); I != E; ++I) {
 112       // Since we do not add pseudos to packets, might as well
 113       // ignore order dependencies.
 114       if (I->isCtrl())
 115         continue;
 116
 117       if (I->getSUnit() == SU)
 118         return false;
 119     }
 120   }
 121   return true;
 122 }
 123
 124 /// Keep track of available resources.
 125 bool VLIWResourceModel::reserveResources(SUnit *SU) {
 126   bool startNewCycle = false;
 127   // Artificially reset state.
 128   if (!SU) {
 129     ResourcesModel->clearResources();
 130     savePacket();
 131     Packet.clear();
 132     TotalPackets++;
 133     return false;
 134   }
 135   // If this SU does not fit in the packet
 136   // start a new one.
 137   if (!isResourceAvailable(SU)) {
 138     ResourcesModel->clearResources();
 139     savePacket();
 140     Packet.clear();
 141     TotalPackets++;
 142     startNewCycle = true;
 143   }
 144
 145   switch (SU->getInstr()->getOpcode()) {
 146   default:
 147     ResourcesModel->reserveResources(*SU->getInstr());
 148     break;
 149   case TargetOpcode::EXTRACT_SUBREG:
 150   case TargetOpcode::INSERT_SUBREG:
 151   case TargetOpcode::SUBREG_TO_REG:
 152   case TargetOpcode::REG_SEQUENCE:
 153   case TargetOpcode::IMPLICIT_DEF:
 154   case TargetOpcode::KILL:
 155   case TargetOpcode::CFI_INSTRUCTION:
 156   case TargetOpcode::EH_LABEL:
 157   case TargetOpcode::COPY:
 158   case TargetOpcode::INLINEASM:
 159     break;
 160   }
 161   Packet.push_back(SU);
 162
 163 #ifndef NDEBUG
 164   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
 165   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 166     DEBUG(dbgs() << "\t[" << i << "] SU(");
 167     DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
 168     DEBUG(Packet[i]->getInstr()->dump());
 169   }
 170 #endif
 171
 172   // If packet is now full, reset the state so in the next cycle
 173   // we start fresh.
 174   if (Packet.size() >= SchedModel->getIssueWidth()) {
 175     ResourcesModel->clearResources();
 176     savePacket();
 177     Packet.clear();
 178     TotalPackets++;
 179     startNewCycle = true;
 180   }
 181
 182   return startNewCycle;
 183 }
 184
 185 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 186 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 187 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 188 void VLIWMachineScheduler::schedule() {
 189   DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
 190                << printMBBReference(*BB) << " " << BB->getName() << " in_func "
 191                << BB->getParent()->getName() << " at loop depth "
 192                << MLI->getLoopDepth(BB) << " \n");
 193
 194   buildDAGWithRegPressure();
 195
 196   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 197   findRootsAndBiasEdges(TopRoots, BotRoots);
 198
 199   // Initialize the strategy before modifying the DAG.
 200   SchedImpl->initialize(this);
 201
 202   DEBUG(unsigned maxH = 0;
 203         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 204           if (SUnits[su].getHeight() > maxH)
 205             maxH = SUnits[su].getHeight();
 206         dbgs() << "Max Height " << maxH << "\n";);
 207   DEBUG(unsigned maxD = 0;
 208         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 209           if (SUnits[su].getDepth() > maxD)
 210             maxD = SUnits[su].getDepth();
 211         dbgs() << "Max Depth " << maxD << "\n";);
 212   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 213           SUnits[su].dumpAll(this));
 214
 215   initQueues(TopRoots, BotRoots);
 216
 217   bool IsTopNode = false;
 218   while (true) {
 219     DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
 220     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 221     if (!SU) break;
 222
 223     if (!checkSchedLimit())
 224       break;
 225
 226     scheduleMI(SU, IsTopNode);
 227
 228     updateQueues(SU, IsTopNode);
 229
 230     // Notify the scheduling strategy after updating the DAG.
 231     SchedImpl->schedNode(SU, IsTopNode);
 232   }
 233   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 234
 235   placeDebugValues();
 236
 237   DEBUG({
 238     dbgs() << "*** Final schedule for "
 239            << printMBBReference(*begin()->getParent()) << " ***\n";
 240     dumpSchedule();
 241     dbgs() << '\n';
 242   });
 243 }
 244
 245 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
 246   DAG = static_cast<VLIWMachineScheduler*>(dag);
 247   SchedModel = DAG->getSchedModel();
 248
 249   Top.init(DAG, SchedModel);
 250   Bot.init(DAG, SchedModel);
 251
 252   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
 253   // are disabled, then these HazardRecs will be disabled.
 254   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
 255   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
 256   const TargetInstrInfo *TII = STI.getInstrInfo();
 257   delete Top.HazardRec;
 258   delete Bot.HazardRec;
 259   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 260   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 261
 262   delete Top.ResourceModel;
 263   delete Bot.ResourceModel;
 264   Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 265   Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 266
 267   assert((!ForceTopDown || !ForceBottomUp) &&
 268          "-misched-topdown incompatible with -misched-bottomup");
 269 }
 270
 271 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
 272   if (SU->isScheduled)
 273     return;
 274
 275   for (const SDep &PI : SU->Preds) {
 276     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
 277     unsigned MinLatency = PI.getLatency();
 278 #ifndef NDEBUG
 279     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 280 #endif
 281     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
 282       SU->TopReadyCycle = PredReadyCycle + MinLatency;
 283   }
 284   Top.releaseNode(SU, SU->TopReadyCycle);
 285 }
 286
 287 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
 288   if (SU->isScheduled)
 289     return;
 290
 291   assert(SU->getInstr() && "Scheduled SUnit must have instr");
 292
 293   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 294        I != E; ++I) {
 295     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
 296     unsigned MinLatency = I->getLatency();
 297 #ifndef NDEBUG
 298     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 299 #endif
 300     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
 301       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
 302   }
 303   Bot.releaseNode(SU, SU->BotReadyCycle);
 304 }
 305
 306 /// Does this SU have a hazard within the current instruction group.
 307 ///
 308 /// The scheduler supports two modes of hazard recognition. The first is the
 309 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
 310 /// supports highly complicated in-order reservation tables
 311 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
 312 ///
 313 /// The second is a streamlined mechanism that checks for hazards based on
 314 /// simple counters that the scheduler itself maintains. It explicitly checks
 315 /// for instruction dispatch limitations, including the number of micro-ops that
 316 /// can dispatch per cycle.
 317 ///
 318 /// TODO: Also check whether the SU must start a new group.
 319 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
 320   if (HazardRec->isEnabled())
 321     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 322
 323   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
 324   if (IssueCount + uops > SchedModel->getIssueWidth())
 325     return true;
 326
 327   return false;
 328 }
 329
 330 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
 331                                                      unsigned ReadyCycle) {
 332   if (ReadyCycle < MinReadyCycle)
 333     MinReadyCycle = ReadyCycle;
 334
 335   // Check for interlocks first. For the purpose of other heuristics, an
 336   // instruction that cannot issue appears as if it's not in the ReadyQueue.
 337   if (ReadyCycle > CurrCycle || checkHazard(SU))
 338
 339     Pending.push(SU);
 340   else
 341     Available.push(SU);
 342 }
 343
 344 /// Move the boundary of scheduled code by one cycle.
 345 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
 346   unsigned Width = SchedModel->getIssueWidth();
 347   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 348
 349   assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
 350          "MinReadyCycle uninitialized");
 351   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
 352
 353   if (!HazardRec->isEnabled()) {
 354     // Bypass HazardRec virtual calls.
 355     CurrCycle = NextCycle;
 356   } else {
 357     // Bypass getHazardType calls in case of long latency.
 358     for (; CurrCycle != NextCycle; ++CurrCycle) {
 359       if (isTop())
 360         HazardRec->AdvanceCycle();
 361       else
 362         HazardRec->RecedeCycle();
 363     }
 364   }
 365   CheckPending = true;
 366
 367   DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
 368                << CurrCycle << '\n');
 369 }
 370
 371 /// Move the boundary of scheduled code by one SUnit.
 372 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
 373   bool startNewCycle = false;
 374
 375   // Update the reservation table.
 376   if (HazardRec->isEnabled()) {
 377     if (!isTop() && SU->isCall) {
 378       // Calls are scheduled with their preceding instructions. For bottom-up
 379       // scheduling, clear the pipeline state before emitting.
 380       HazardRec->Reset();
 381     }
 382     HazardRec->EmitInstruction(SU);
 383   }
 384
 385   // Update DFA model.
 386   startNewCycle = ResourceModel->reserveResources(SU);
 387
 388   // Check the instruction group dispatch limit.
 389   // TODO: Check if this SU must end a dispatch group.
 390   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
 391   if (startNewCycle) {
 392     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
 393     bumpCycle();
 394   }
 395   else
 396     DEBUG(dbgs() << "*** IssueCount " << IssueCount
 397           << " at cycle " << CurrCycle << '\n');
 398 }
 399
 400 /// Release pending ready nodes in to the available queue. This makes them
 401 /// visible to heuristics.
 402 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
 403   // If the available queue is empty, it is safe to reset MinReadyCycle.
 404   if (Available.empty())
 405     MinReadyCycle = std::numeric_limits<unsigned>::max();
 406
 407   // Check to see if any of the pending instructions are ready to issue.  If
 408   // so, add them to the available queue.
 409   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
 410     SUnit *SU = *(Pending.begin()+i);
 411     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
 412
 413     if (ReadyCycle < MinReadyCycle)
 414       MinReadyCycle = ReadyCycle;
 415
 416     if (ReadyCycle > CurrCycle)
 417       continue;
 418
 419     if (checkHazard(SU))
 420       continue;
 421
 422     Available.push(SU);
 423     Pending.remove(Pending.begin()+i);
 424     --i; --e;
 425   }
 426   CheckPending = false;
 427 }
 428
 429 /// Remove SU from the ready set for this boundary.
 430 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
 431   if (Available.isInQueue(SU))
 432     Available.remove(Available.find(SU));
 433   else {
 434     assert(Pending.isInQueue(SU) && "bad ready count");
 435     Pending.remove(Pending.find(SU));
 436   }
 437 }
 438
 439 /// If this queue only has one ready candidate, return it. As a side effect,
 440 /// advance the cycle until at least one node is ready. If multiple instructions
 441 /// are ready, return NULL.
 442 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
 443   if (CheckPending)
 444     releasePending();
 445
 446   for (unsigned i = 0; Available.empty(); ++i) {
 447     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
 448            "permanent hazard"); (void)i;
 449     ResourceModel->reserveResources(nullptr);
 450     bumpCycle();
 451     releasePending();
 452   }
 453   if (Available.size() == 1)
 454     return *Available.begin();
 455   return nullptr;
 456 }
 457
 458 #ifndef NDEBUG
 459 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
 460       const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
 461   dbgs() << Label << " " << Q.getName() << " ";
 462   if (P.isValid())
 463     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
 464            << P.getUnitInc() << " ";
 465   else
 466     dbgs() << "     ";
 467   dbgs() << "cost(" << Cost << ")\t";
 468   SU->dump(DAG);
 469 }
 470
 471 // Very detailed queue dump, to be used with higher verbosity levels.
 472 void ConvergingVLIWScheduler::readyQueueVerboseDump(
 473       const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
 474       ReadyQueue &Q) {
 475   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
 476
 477   dbgs() << ">>> " << Q.getName() << "\n";
 478   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 479     RegPressureDelta RPDelta;
 480     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 481                                     DAG->getRegionCriticalPSets(),
 482                                     DAG->getRegPressure().MaxSetPressure);
 483     std::stringstream dbgstr;
 484     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
 485     dbgs() << dbgstr.str();
 486     SchedulingCost(Q, *I, Candidate, RPDelta, true);
 487     dbgs() << "\t";
 488     (*I)->getInstr()->dump();
 489   }
 490   dbgs() << "\n";
 491 }
 492 #endif
 493
 494 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
 495 /// of SU, return true (we may have duplicates)
 496 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
 497   if (SU->NumPredsLeft == 0)
 498     return false;
 499
 500   for (auto &Pred : SU->Preds) {
 501     // We found an available, but not scheduled, predecessor.
 502     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
 503       return false;
 504   }
 505
 506   return true;
 507 }
 508
 509 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
 510 /// of SU, return true (we may have duplicates)
 511 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
 512   if (SU->NumSuccsLeft == 0)
 513     return false;
 514
 515   for (auto &Succ : SU->Succs) {
 516     // We found an available, but not scheduled, successor.
 517     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
 518       return false;
 519   }
 520   return true;
 521 }
 522
 523 // Constants used to denote relative importance of
 524 // heuristic components for cost computation.
 525 static const unsigned PriorityOne = 200;
 526 static const unsigned PriorityTwo = 50;
 527 static const unsigned PriorityThree = 75;
 528 static const unsigned ScaleTwo = 10;
 529 static const unsigned FactorOne = 2;
 530
 531 /// Single point to compute overall scheduling cost.
 532 /// TODO: More heuristics will be used soon.
 533 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
 534                                             SchedCandidate &Candidate,
 535                                             RegPressureDelta &Delta,
 536                                             bool verbose) {
 537   // Initial trivial priority.
 538   int ResCount = 1;
 539
 540   // Do not waste time on a node that is already scheduled.
 541   if (!SU || SU->isScheduled)
 542     return ResCount;
 543
 544   MachineInstr &Instr = *SU->getInstr();
 545
 546   DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
 547   // Forced priority is high.
 548   if (SU->isScheduleHigh) {
 549     ResCount += PriorityOne;
 550     DEBUG(dbgs() << "H|");
 551   }
 552
 553   // Critical path first.
 554   if (Q.getID() == TopQID) {
 555     ResCount += (SU->getHeight() * ScaleTwo);
 556
 557     DEBUG(if (verbose) {
 558       std::stringstream dbgstr;
 559       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
 560       dbgs() << dbgstr.str();
 561     });
 562
 563     // If resources are available for it, multiply the
 564     // chance of scheduling.
 565     if (Top.ResourceModel->isResourceAvailable(SU)) {
 566       ResCount <<= FactorOne;
 567       ResCount += PriorityThree;
 568       DEBUG(if (verbose) dbgs() << "A|");
 569     } else
 570       DEBUG(if (verbose) dbgs() << " |");
 571   } else {
 572     ResCount += (SU->getDepth() * ScaleTwo);
 573
 574     DEBUG(if (verbose) {
 575       std::stringstream dbgstr;
 576       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
 577       dbgs() << dbgstr.str();
 578     });
 579
 580     // If resources are available for it, multiply the
 581     // chance of scheduling.
 582     if (Bot.ResourceModel->isResourceAvailable(SU)) {
 583       ResCount <<= FactorOne;
 584       ResCount += PriorityThree;
 585       DEBUG(if (verbose) dbgs() << "A|");
 586     } else
 587       DEBUG(if (verbose) dbgs() << " |");
 588   }
 589
 590   unsigned NumNodesBlocking = 0;
 591   if (Q.getID() == TopQID) {
 592     // How many SUs does it block from scheduling?
 593     // Look at all of the successors of this node.
 594     // Count the number of nodes that
 595     // this node is the sole unscheduled node for.
 596     for (const SDep &SI : SU->Succs)
 597       if (isSingleUnscheduledPred(SI.getSUnit(), SU))
 598         ++NumNodesBlocking;
 599   } else {
 600     // How many unscheduled predecessors block this node?
 601     for (const SDep &PI : SU->Preds)
 602       if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
 603         ++NumNodesBlocking;
 604   }
 605   ResCount += (NumNodesBlocking * ScaleTwo);
 606
 607   DEBUG(if (verbose) {
 608     std::stringstream dbgstr;
 609     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
 610     dbgs() << dbgstr.str();
 611   });
 612
 613   // Factor in reg pressure as a heuristic.
 614   if (!IgnoreBBRegPressure) {
 615     // Decrease priority by the amount that register pressure exceeds the limit.
 616     ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
 617     // Decrease priority if register pressure exceeds the limit.
 618     ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
 619     // Decrease priority slightly if register pressure would increase over the
 620     // current maximum.
 621     ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
 622     DEBUG(if (verbose) {
 623         dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
 624                << Delta.CriticalMax.getUnitInc() <<"/"
 625                << Delta.CurrentMax.getUnitInc() << ")|";
 626     });
 627   }
 628
 629   // Give a little extra priority to a .cur instruction if there is a resource
 630   // available for it.
 631   auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 632   auto &QII = *QST.getInstrInfo();
 633   if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
 634     if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
 635       ResCount += PriorityTwo;
 636       DEBUG(if (verbose) dbgs() << "C|");
 637     } else if (Q.getID() == BotQID &&
 638                Bot.ResourceModel->isResourceAvailable(SU)) {
 639       ResCount += PriorityTwo;
 640       DEBUG(if (verbose) dbgs() << "C|");
 641     }
 642   }
 643
 644   // Give preference to a zero latency instruction if the dependent
 645   // instruction is in the current packet.
 646   if (Q.getID() == TopQID) {
 647     for (const SDep &PI : SU->Preds) {
 648       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
 649           PI.getLatency() == 0 &&
 650           Top.ResourceModel->isInPacket(PI.getSUnit())) {
 651         ResCount += PriorityThree;
 652         DEBUG(if (verbose) dbgs() << "Z|");
 653       }
 654     }
 655   } else {
 656     for (const SDep &SI : SU->Succs) {
 657       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
 658           SI.getLatency() == 0 &&
 659           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 660         ResCount += PriorityThree;
 661         DEBUG(if (verbose) dbgs() << "Z|");
 662       }
 663     }
 664   }
 665
 666   // Give less preference to an instruction that will cause a stall with
 667   // an instruction in the previous packet.
 668   if (QII.isHVXVec(Instr)) {
 669     // Check for stalls in the previous packet.
 670     if (Q.getID() == TopQID) {
 671       for (auto J : Top.ResourceModel->OldPacket)
 672         if (QII.producesStall(*J->getInstr(), Instr))
 673           ResCount -= PriorityOne;
 674     } else {
 675       for (auto J : Bot.ResourceModel->OldPacket)
 676         if (QII.producesStall(Instr, *J->getInstr()))
 677           ResCount -= PriorityOne;
 678     }
 679   }
 680
 681   // If the instruction has a non-zero latency dependence with an instruction in
 682   // the current packet, then it should not be scheduled yet. The case occurs
 683   // when the dependent instruction is scheduled in a new packet, so the
 684   // scheduler updates the current cycle and pending instructions become
 685   // available.
 686   if (CheckEarlyAvail) {
 687     if (Q.getID() == TopQID) {
 688       for (const auto &PI : SU->Preds) {
 689         if (PI.getLatency() > 0 &&
 690             Top.ResourceModel->isInPacket(PI.getSUnit())) {
 691           ResCount -= PriorityOne;
 692           DEBUG(if (verbose) dbgs() << "D|");
 693         }
 694       }
 695     } else {
 696       for (const auto &SI : SU->Succs) {
 697         if (SI.getLatency() > 0 &&
 698             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 699           ResCount -= PriorityOne;
 700           DEBUG(if (verbose) dbgs() << "D|");
 701         }
 702       }
 703     }
 704   }
 705
 706   DEBUG(if (verbose) {
 707     std::stringstream dbgstr;
 708     dbgstr << "Total " << std::setw(4) << ResCount << ")";
 709     dbgs() << dbgstr.str();
 710   });
 711
 712   return ResCount;
 713 }
 714
 715 /// Pick the best candidate from the top queue.
 716 ///
 717 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 718 /// DAG building. To adjust for the current scheduling location we need to
 719 /// maintain the number of vreg uses remaining to be top-scheduled.
 720 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
 721 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
 722                   SchedCandidate &Candidate) {
 723   DEBUG(if (SchedDebugVerboseLevel > 1)
 724         readyQueueVerboseDump(RPTracker, Candidate, Q);
 725         else Q.dump(););
 726
 727   // getMaxPressureDelta temporarily modifies the tracker.
 728   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
 729
 730   // BestSU remains NULL if no top candidates beat the best existing candidate.
 731   CandResult FoundCandidate = NoCand;
 732   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 733     RegPressureDelta RPDelta;
 734     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 735                                     DAG->getRegionCriticalPSets(),
 736                                     DAG->getRegPressure().MaxSetPressure);
 737
 738     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
 739
 740     // Initialize the candidate if needed.
 741     if (!Candidate.SU) {
 742       DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
 743       Candidate.SU = *I;
 744       Candidate.RPDelta = RPDelta;
 745       Candidate.SCost = CurrentCost;
 746       FoundCandidate = NodeOrder;
 747       continue;
 748     }
 749
 750     // Best cost.
 751     if (CurrentCost > Candidate.SCost) {
 752       DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
 753       Candidate.SU = *I;
 754       Candidate.RPDelta = RPDelta;
 755       Candidate.SCost = CurrentCost;
 756       FoundCandidate = BestCost;
 757       continue;
 758     }
 759
 760     // Tie breaker using Timing Class.
 761     if (!DisableTCTie) {
 762       auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 763       auto &QII = *QST.getInstrInfo();
 764
 765       const MachineInstr *MI = (*I)->getInstr();
 766       const MachineInstr *CandI = Candidate.SU->getInstr();
 767       const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
 768
 769       unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
 770       unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
 771       DEBUG(dbgs() << "TC Tie Breaker Cand: "
 772                    << CandLatency << " Instr:" << InstrLatency << "\n"
 773                    << *MI << *CandI << "\n");
 774       if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
 775         if (InstrLatency < CandLatency && TopUseShorterTie) {
 776           Candidate.SU = *I;
 777           Candidate.RPDelta = RPDelta;
 778           Candidate.SCost = CurrentCost;
 779           FoundCandidate = BestCost;
 780           DEBUG(dbgs() << "Used top shorter tie breaker\n");
 781           continue;
 782         } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
 783           Candidate.SU = *I;
 784           Candidate.RPDelta = RPDelta;
 785           Candidate.SCost = CurrentCost;
 786           FoundCandidate = BestCost;
 787           DEBUG(dbgs() << "Used top longer tie breaker\n");
 788           continue;
 789         }
 790       } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
 791         if (InstrLatency < CandLatency && BotUseShorterTie) {
 792           Candidate.SU = *I;
 793           Candidate.RPDelta = RPDelta;
 794           Candidate.SCost = CurrentCost;
 795           FoundCandidate = BestCost;
 796           DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
 797           continue;
 798         } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
 799           Candidate.SU = *I;
 800           Candidate.RPDelta = RPDelta;
 801           Candidate.SCost = CurrentCost;
 802           FoundCandidate = BestCost;
 803           DEBUG(dbgs() << "Used Bot longer tie breaker\n");
 804           continue;
 805         }
 806       }
 807     }
 808
 809     if (CurrentCost == Candidate.SCost) {
 810       if ((Q.getID() == TopQID &&
 811            (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
 812           (Q.getID() == BotQID &&
 813            (*I)->Preds.size() < Candidate.SU->Preds.size())) {
 814         DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
 815         Candidate.SU = *I;
 816         Candidate.RPDelta = RPDelta;
 817         Candidate.SCost = CurrentCost;
 818         FoundCandidate = BestCost;
 819         continue;
 820       }
 821     }
 822
 823     // Fall through to original instruction order.
 824     // Only consider node order if Candidate was chosen from this Q.
 825     if (FoundCandidate == NoCand)
 826       continue;
 827   }
 828   return FoundCandidate;
 829 }
 830
 831 /// Pick the best candidate node from either the top or bottom queue.
 832 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
 833   // Schedule as far as possible in the direction of no choice. This is most
 834   // efficient, but also provides the best heuristics for CriticalPSets.
 835   if (SUnit *SU = Bot.pickOnlyChoice()) {
 836     DEBUG(dbgs() << "Picked only Bottom\n");
 837     IsTopNode = false;
 838     return SU;
 839   }
 840   if (SUnit *SU = Top.pickOnlyChoice()) {
 841     DEBUG(dbgs() << "Picked only Top\n");
 842     IsTopNode = true;
 843     return SU;
 844   }
 845   SchedCandidate BotCand;
 846   // Prefer bottom scheduling when heuristics are silent.
 847   CandResult BotResult = pickNodeFromQueue(Bot.Available,
 848                                            DAG->getBotRPTracker(), BotCand);
 849   assert(BotResult != NoCand && "failed to find the first candidate");
 850
 851   // If either Q has a single candidate that provides the least increase in
 852   // Excess pressure, we can immediately schedule from that Q.
 853   //
 854   // RegionCriticalPSets summarizes the pressure within the scheduled region and
 855   // affects picking from either Q. If scheduling in one direction must
 856   // increase pressure for one of the excess PSets, then schedule in that
 857   // direction first to provide more freedom in the other direction.
 858   if (BotResult == SingleExcess || BotResult == SingleCritical) {
 859     DEBUG(dbgs() << "Prefered Bottom Node\n");
 860     IsTopNode = false;
 861     return BotCand.SU;
 862   }
 863   // Check if the top Q has a better candidate.
 864   SchedCandidate TopCand;
 865   CandResult TopResult = pickNodeFromQueue(Top.Available,
 866                                            DAG->getTopRPTracker(), TopCand);
 867   assert(TopResult != NoCand && "failed to find the first candidate");
 868
 869   if (TopResult == SingleExcess || TopResult == SingleCritical) {
 870     DEBUG(dbgs() << "Prefered Top Node\n");
 871     IsTopNode = true;
 872     return TopCand.SU;
 873   }
 874   // If either Q has a single candidate that minimizes pressure above the
 875   // original region's pressure pick it.
 876   if (BotResult == SingleMax) {
 877     DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
 878     IsTopNode = false;
 879     return BotCand.SU;
 880   }
 881   if (TopResult == SingleMax) {
 882     DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
 883     IsTopNode = true;
 884     return TopCand.SU;
 885   }
 886   if (TopCand.SCost > BotCand.SCost) {
 887     DEBUG(dbgs() << "Prefered Top Node Cost\n");
 888     IsTopNode = true;
 889     return TopCand.SU;
 890   }
 891   // Otherwise prefer the bottom candidate in node order.
 892   DEBUG(dbgs() << "Prefered Bottom in Node order\n");
 893   IsTopNode = false;
 894   return BotCand.SU;
 895 }
 896
 897 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
 898 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
 899   if (DAG->top() == DAG->bottom()) {
 900     assert(Top.Available.empty() && Top.Pending.empty() &&
 901            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
 902     return nullptr;
 903   }
 904   SUnit *SU;
 905   if (ForceTopDown) {
 906     SU = Top.pickOnlyChoice();
 907     if (!SU) {
 908       SchedCandidate TopCand;
 909       CandResult TopResult =
 910         pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
 911       assert(TopResult != NoCand && "failed to find the first candidate");
 912       (void)TopResult;
 913       SU = TopCand.SU;
 914     }
 915     IsTopNode = true;
 916   } else if (ForceBottomUp) {
 917     SU = Bot.pickOnlyChoice();
 918     if (!SU) {
 919       SchedCandidate BotCand;
 920       CandResult BotResult =
 921         pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
 922       assert(BotResult != NoCand && "failed to find the first candidate");
 923       (void)BotResult;
 924       SU = BotCand.SU;
 925     }
 926     IsTopNode = false;
 927   } else {
 928     SU = pickNodeBidrectional(IsTopNode);
 929   }
 930   if (SU->isTopReady())
 931     Top.removeReady(SU);
 932   if (SU->isBottomReady())
 933     Bot.removeReady(SU);
 934
 935   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
 936         << " Scheduling Instruction in cycle "
 937         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
 938         SU->dump(DAG));
 939   return SU;
 940 }
 941
 942 /// Update the scheduler's state after scheduling a node. This is the same node
 943 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
 944 /// to update it's state based on the current cycle before MachineSchedStrategy
 945 /// does.
 946 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 947   if (IsTopNode) {
 948     SU->TopReadyCycle = Top.CurrCycle;
 949     Top.bumpNode(SU);
 950   } else {
 951     SU->BotReadyCycle = Bot.CurrCycle;
 952     Bot.bumpNode(SU);
 953   }
 954 }