contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp

   1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "HexagonMachineScheduler.h"
  16 #include "HexagonSubtarget.h"
  17 #include "llvm/CodeGen/MachineLoopInfo.h"
  18 #include "llvm/CodeGen/ScheduleDAGMutation.h"
  19 #include "llvm/IR/Function.h"
  20
  21 #include <iomanip>
  22 #include <sstream>
  23
  24 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
  25     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  26
  27 static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
  28     cl::Hidden, cl::ZeroOrMore, cl::init(true));
  29
  30 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  31     cl::Hidden, cl::ZeroOrMore, cl::init(1));
  32
  33 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
  34     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  35
  36 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
  37     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  38
  39 static cl::opt<bool> DisableTCTie("disable-tc-tie",
  40     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  41
  42 static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
  43     cl::Hidden, cl::ZeroOrMore, cl::init(true));
  44
  45 // Check if the scheduler should penalize instructions that are available to
  46 // early due to a zero-latency dependence.
  47 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  48     cl::ZeroOrMore, cl::init(true));
  49
  50 using namespace llvm;
  51
  52 #define DEBUG_TYPE "misched"
  53
  54 namespace {
  55 class HexagonCallMutation : public ScheduleDAGMutation {
  56 public:
  57   void apply(ScheduleDAGInstrs *DAG) override;
  58 private:
  59   bool shouldTFRICallBind(const HexagonInstrInfo &HII,
  60                           const SUnit &Inst1, const SUnit &Inst2) const;
  61 };
  62 } // end anonymous namespace
  63
  64 // Check if a call and subsequent A2_tfrpi instructions should maintain
  65 // scheduling affinity. We are looking for the TFRI to be consumed in
  66 // the next instruction. This should help reduce the instances of
  67 // double register pairs being allocated and scheduled before a call
  68 // when not used until after the call. This situation is exacerbated
  69 // by the fact that we allocate the pair from the callee saves list,
  70 // leading to excess spills and restores.
  71 bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
  72       const SUnit &Inst1, const SUnit &Inst2) const {
  73   if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
  74     return false;
  75
  76   // TypeXTYPE are 64 bit operations.
  77   unsigned Type = HII.getType(*Inst2.getInstr());
  78   if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
  79     Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
  80     return true;
  81   return false;
  82 }
  83
  84 void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
  85   SUnit* LastSequentialCall = nullptr;
  86   unsigned VRegHoldingRet = 0;
  87   unsigned RetRegister;
  88   SUnit* LastUseOfRet = nullptr;
  89   auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
  90   auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
  91
  92   // Currently we only catch the situation when compare gets scheduled
  93   // before preceding call.
  94   for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
  95     // Remember the call.
  96     if (DAG->SUnits[su].getInstr()->isCall())
  97       LastSequentialCall = &DAG->SUnits[su];
  98     // Look for a compare that defines a predicate.
  99     else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
 100       DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
 101     // Look for call and tfri* instructions.
 102     else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
 103              shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
 104       DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
 105     // Prevent redundant register copies between two calls, which are caused by
 106     // both the return value and the argument for the next call being in %R0.
 107     // Example:
 108     //   1: <call1>
 109     //   2: %VregX = COPY %R0
 110     //   3: <use of %VregX>
 111     //   4: %R0 = ...
 112     //   5: <call2>
 113     // The scheduler would often swap 3 and 4, so an additional register is
 114     // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
 115     // this. The same applies for %D0 and %V0/%W0, which are also handled.
 116     else if (SchedRetvalOptimization) {
 117       const MachineInstr *MI = DAG->SUnits[su].getInstr();
 118       if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
 119                            MI->readsRegister(Hexagon::V0, &TRI)))  {
 120         // %vregX = COPY %R0
 121         VRegHoldingRet = MI->getOperand(0).getReg();
 122         RetRegister = MI->getOperand(1).getReg();
 123         LastUseOfRet = nullptr;
 124       } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
 125         // <use of %vregX>
 126         LastUseOfRet = &DAG->SUnits[su];
 127       else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
 128         // %R0 = ...
 129         DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
 130     }
 131   }
 132 }
 133
 134
 135 /// Save the last formed packet
 136 void VLIWResourceModel::savePacket() {
 137   OldPacket = Packet;
 138 }
 139
 140 /// Check if scheduling of this SU is possible
 141 /// in the current packet.
 142 /// It is _not_ precise (statefull), it is more like
 143 /// another heuristic. Many corner cases are figured
 144 /// empirically.
 145 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
 146   if (!SU || !SU->getInstr())
 147     return false;
 148
 149   // First see if the pipeline could receive this instruction
 150   // in the current cycle.
 151   switch (SU->getInstr()->getOpcode()) {
 152   default:
 153     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
 154       return false;
 155   case TargetOpcode::EXTRACT_SUBREG:
 156   case TargetOpcode::INSERT_SUBREG:
 157   case TargetOpcode::SUBREG_TO_REG:
 158   case TargetOpcode::REG_SEQUENCE:
 159   case TargetOpcode::IMPLICIT_DEF:
 160   case TargetOpcode::COPY:
 161   case TargetOpcode::INLINEASM:
 162     break;
 163   }
 164
 165   MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
 166   auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 167
 168   // Now see if there are no other dependencies to instructions already
 169   // in the packet.
 170   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 171     if (Packet[i]->Succs.size() == 0)
 172       continue;
 173
 174     // Enable .cur formation.
 175     if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
 176       continue;
 177
 178     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
 179          E = Packet[i]->Succs.end(); I != E; ++I) {
 180       // Since we do not add pseudos to packets, might as well
 181       // ignore order dependencies.
 182       if (I->isCtrl())
 183         continue;
 184
 185       if (I->getSUnit() == SU)
 186         return false;
 187     }
 188   }
 189   return true;
 190 }
 191
 192 /// Keep track of available resources.
 193 bool VLIWResourceModel::reserveResources(SUnit *SU) {
 194   bool startNewCycle = false;
 195   // Artificially reset state.
 196   if (!SU) {
 197     ResourcesModel->clearResources();
 198     savePacket();
 199     Packet.clear();
 200     TotalPackets++;
 201     return false;
 202   }
 203   // If this SU does not fit in the packet
 204   // start a new one.
 205   if (!isResourceAvailable(SU)) {
 206     ResourcesModel->clearResources();
 207     savePacket();
 208     Packet.clear();
 209     TotalPackets++;
 210     startNewCycle = true;
 211   }
 212
 213   switch (SU->getInstr()->getOpcode()) {
 214   default:
 215     ResourcesModel->reserveResources(*SU->getInstr());
 216     break;
 217   case TargetOpcode::EXTRACT_SUBREG:
 218   case TargetOpcode::INSERT_SUBREG:
 219   case TargetOpcode::SUBREG_TO_REG:
 220   case TargetOpcode::REG_SEQUENCE:
 221   case TargetOpcode::IMPLICIT_DEF:
 222   case TargetOpcode::KILL:
 223   case TargetOpcode::CFI_INSTRUCTION:
 224   case TargetOpcode::EH_LABEL:
 225   case TargetOpcode::COPY:
 226   case TargetOpcode::INLINEASM:
 227     break;
 228   }
 229   Packet.push_back(SU);
 230
 231 #ifndef NDEBUG
 232   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
 233   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 234     DEBUG(dbgs() << "\t[" << i << "] SU(");
 235     DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
 236     DEBUG(Packet[i]->getInstr()->dump());
 237   }
 238 #endif
 239
 240   // If packet is now full, reset the state so in the next cycle
 241   // we start fresh.
 242   if (Packet.size() >= SchedModel->getIssueWidth()) {
 243     ResourcesModel->clearResources();
 244     savePacket();
 245     Packet.clear();
 246     TotalPackets++;
 247     startNewCycle = true;
 248   }
 249
 250   return startNewCycle;
 251 }
 252
 253 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 254 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 255 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 256 void VLIWMachineScheduler::schedule() {
 257   DEBUG(dbgs()
 258         << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
 259         << " " << BB->getName()
 260         << " in_func " << BB->getParent()->getFunction()->getName()
 261         << " at loop depth "  << MLI->getLoopDepth(BB)
 262         << " \n");
 263
 264   buildDAGWithRegPressure();
 265
 266   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 267   findRootsAndBiasEdges(TopRoots, BotRoots);
 268
 269   // Initialize the strategy before modifying the DAG.
 270   SchedImpl->initialize(this);
 271
 272   DEBUG(unsigned maxH = 0;
 273         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 274           if (SUnits[su].getHeight() > maxH)
 275             maxH = SUnits[su].getHeight();
 276         dbgs() << "Max Height " << maxH << "\n";);
 277   DEBUG(unsigned maxD = 0;
 278         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 279           if (SUnits[su].getDepth() > maxD)
 280             maxD = SUnits[su].getDepth();
 281         dbgs() << "Max Depth " << maxD << "\n";);
 282   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 283           SUnits[su].dumpAll(this));
 284
 285   initQueues(TopRoots, BotRoots);
 286
 287   bool IsTopNode = false;
 288   while (true) {
 289     DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
 290     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 291     if (!SU) break;
 292
 293     if (!checkSchedLimit())
 294       break;
 295
 296     scheduleMI(SU, IsTopNode);
 297
 298     updateQueues(SU, IsTopNode);
 299
 300     // Notify the scheduling strategy after updating the DAG.
 301     SchedImpl->schedNode(SU, IsTopNode);
 302   }
 303   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 304
 305   placeDebugValues();
 306
 307   DEBUG({
 308     unsigned BBNum = begin()->getParent()->getNumber();
 309     dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 310     dumpSchedule();
 311     dbgs() << '\n';
 312   });
 313 }
 314
 315 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
 316   DAG = static_cast<VLIWMachineScheduler*>(dag);
 317   SchedModel = DAG->getSchedModel();
 318
 319   Top.init(DAG, SchedModel);
 320   Bot.init(DAG, SchedModel);
 321
 322   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
 323   // are disabled, then these HazardRecs will be disabled.
 324   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
 325   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
 326   const TargetInstrInfo *TII = STI.getInstrInfo();
 327   delete Top.HazardRec;
 328   delete Bot.HazardRec;
 329   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 330   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 331
 332   delete Top.ResourceModel;
 333   delete Bot.ResourceModel;
 334   Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 335   Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 336
 337   assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
 338          "-misched-topdown incompatible with -misched-bottomup");
 339
 340   DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
 341   DAG->addMutation(make_unique<HexagonCallMutation>());
 342 }
 343
 344 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
 345   if (SU->isScheduled)
 346     return;
 347
 348   for (const SDep &PI : SU->Preds) {
 349     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
 350     unsigned MinLatency = PI.getLatency();
 351 #ifndef NDEBUG
 352     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 353 #endif
 354     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
 355       SU->TopReadyCycle = PredReadyCycle + MinLatency;
 356   }
 357   Top.releaseNode(SU, SU->TopReadyCycle);
 358 }
 359
 360 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
 361   if (SU->isScheduled)
 362     return;
 363
 364   assert(SU->getInstr() && "Scheduled SUnit must have instr");
 365
 366   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 367        I != E; ++I) {
 368     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
 369     unsigned MinLatency = I->getLatency();
 370 #ifndef NDEBUG
 371     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 372 #endif
 373     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
 374       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
 375   }
 376   Bot.releaseNode(SU, SU->BotReadyCycle);
 377 }
 378
 379 /// Does this SU have a hazard within the current instruction group.
 380 ///
 381 /// The scheduler supports two modes of hazard recognition. The first is the
 382 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
 383 /// supports highly complicated in-order reservation tables
 384 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
 385 ///
 386 /// The second is a streamlined mechanism that checks for hazards based on
 387 /// simple counters that the scheduler itself maintains. It explicitly checks
 388 /// for instruction dispatch limitations, including the number of micro-ops that
 389 /// can dispatch per cycle.
 390 ///
 391 /// TODO: Also check whether the SU must start a new group.
 392 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
 393   if (HazardRec->isEnabled())
 394     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 395
 396   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
 397   if (IssueCount + uops > SchedModel->getIssueWidth())
 398     return true;
 399
 400   return false;
 401 }
 402
 403 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
 404                                                      unsigned ReadyCycle) {
 405   if (ReadyCycle < MinReadyCycle)
 406     MinReadyCycle = ReadyCycle;
 407
 408   // Check for interlocks first. For the purpose of other heuristics, an
 409   // instruction that cannot issue appears as if it's not in the ReadyQueue.
 410   if (ReadyCycle > CurrCycle || checkHazard(SU))
 411
 412     Pending.push(SU);
 413   else
 414     Available.push(SU);
 415 }
 416
 417 /// Move the boundary of scheduled code by one cycle.
 418 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
 419   unsigned Width = SchedModel->getIssueWidth();
 420   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 421
 422   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
 423   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
 424
 425   if (!HazardRec->isEnabled()) {
 426     // Bypass HazardRec virtual calls.
 427     CurrCycle = NextCycle;
 428   } else {
 429     // Bypass getHazardType calls in case of long latency.
 430     for (; CurrCycle != NextCycle; ++CurrCycle) {
 431       if (isTop())
 432         HazardRec->AdvanceCycle();
 433       else
 434         HazardRec->RecedeCycle();
 435     }
 436   }
 437   CheckPending = true;
 438
 439   DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
 440                << CurrCycle << '\n');
 441 }
 442
 443 /// Move the boundary of scheduled code by one SUnit.
 444 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
 445   bool startNewCycle = false;
 446
 447   // Update the reservation table.
 448   if (HazardRec->isEnabled()) {
 449     if (!isTop() && SU->isCall) {
 450       // Calls are scheduled with their preceding instructions. For bottom-up
 451       // scheduling, clear the pipeline state before emitting.
 452       HazardRec->Reset();
 453     }
 454     HazardRec->EmitInstruction(SU);
 455   }
 456
 457   // Update DFA model.
 458   startNewCycle = ResourceModel->reserveResources(SU);
 459
 460   // Check the instruction group dispatch limit.
 461   // TODO: Check if this SU must end a dispatch group.
 462   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
 463   if (startNewCycle) {
 464     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
 465     bumpCycle();
 466   }
 467   else
 468     DEBUG(dbgs() << "*** IssueCount " << IssueCount
 469           << " at cycle " << CurrCycle << '\n');
 470 }
 471
 472 /// Release pending ready nodes in to the available queue. This makes them
 473 /// visible to heuristics.
 474 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
 475   // If the available queue is empty, it is safe to reset MinReadyCycle.
 476   if (Available.empty())
 477     MinReadyCycle = UINT_MAX;
 478
 479   // Check to see if any of the pending instructions are ready to issue.  If
 480   // so, add them to the available queue.
 481   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
 482     SUnit *SU = *(Pending.begin()+i);
 483     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
 484
 485     if (ReadyCycle < MinReadyCycle)
 486       MinReadyCycle = ReadyCycle;
 487
 488     if (ReadyCycle > CurrCycle)
 489       continue;
 490
 491     if (checkHazard(SU))
 492       continue;
 493
 494     Available.push(SU);
 495     Pending.remove(Pending.begin()+i);
 496     --i; --e;
 497   }
 498   CheckPending = false;
 499 }
 500
 501 /// Remove SU from the ready set for this boundary.
 502 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
 503   if (Available.isInQueue(SU))
 504     Available.remove(Available.find(SU));
 505   else {
 506     assert(Pending.isInQueue(SU) && "bad ready count");
 507     Pending.remove(Pending.find(SU));
 508   }
 509 }
 510
 511 /// If this queue only has one ready candidate, return it. As a side effect,
 512 /// advance the cycle until at least one node is ready. If multiple instructions
 513 /// are ready, return NULL.
 514 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
 515   if (CheckPending)
 516     releasePending();
 517
 518   for (unsigned i = 0; Available.empty(); ++i) {
 519     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
 520            "permanent hazard"); (void)i;
 521     ResourceModel->reserveResources(nullptr);
 522     bumpCycle();
 523     releasePending();
 524   }
 525   if (Available.size() == 1)
 526     return *Available.begin();
 527   return nullptr;
 528 }
 529
 530 #ifndef NDEBUG
 531 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
 532       const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
 533   dbgs() << Label << " " << Q.getName() << " ";
 534   if (P.isValid())
 535     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
 536            << P.getUnitInc() << " ";
 537   else
 538     dbgs() << "     ";
 539   dbgs() << "cost(" << Cost << ")\t";
 540   SU->dump(DAG);
 541 }
 542
 543 // Very detailed queue dump, to be used with higher verbosity levels.
 544 void ConvergingVLIWScheduler::readyQueueVerboseDump(
 545       const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
 546       ReadyQueue &Q) {
 547   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
 548
 549   dbgs() << ">>> " << Q.getName() << "\n";
 550   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 551     RegPressureDelta RPDelta;
 552     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 553                                     DAG->getRegionCriticalPSets(),
 554                                     DAG->getRegPressure().MaxSetPressure);
 555     std::stringstream dbgstr;
 556     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
 557     dbgs() << dbgstr.str();
 558     SchedulingCost(Q, *I, Candidate, RPDelta, true);
 559     dbgs() << "\t";
 560     (*I)->getInstr()->dump();
 561   }
 562   dbgs() << "\n";
 563 }
 564 #endif
 565
 566 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
 567 /// of SU, return it, otherwise return null.
 568 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
 569   SUnit *OnlyAvailablePred = nullptr;
 570   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 571        I != E; ++I) {
 572     SUnit &Pred = *I->getSUnit();
 573     if (!Pred.isScheduled) {
 574       // We found an available, but not scheduled, predecessor.  If it's the
 575       // only one we have found, keep track of it... otherwise give up.
 576       if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
 577         return nullptr;
 578       OnlyAvailablePred = &Pred;
 579     }
 580   }
 581   return OnlyAvailablePred;
 582 }
 583
 584 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
 585 /// of SU, return it, otherwise return null.
 586 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
 587   SUnit *OnlyAvailableSucc = nullptr;
 588   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 589        I != E; ++I) {
 590     SUnit &Succ = *I->getSUnit();
 591     if (!Succ.isScheduled) {
 592       // We found an available, but not scheduled, successor.  If it's the
 593       // only one we have found, keep track of it... otherwise give up.
 594       if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
 595         return nullptr;
 596       OnlyAvailableSucc = &Succ;
 597     }
 598   }
 599   return OnlyAvailableSucc;
 600 }
 601
 602 // Constants used to denote relative importance of
 603 // heuristic components for cost computation.
 604 static const unsigned PriorityOne = 200;
 605 static const unsigned PriorityTwo = 50;
 606 static const unsigned PriorityThree = 75;
 607 static const unsigned ScaleTwo = 10;
 608 static const unsigned FactorOne = 2;
 609
 610 /// Single point to compute overall scheduling cost.
 611 /// TODO: More heuristics will be used soon.
 612 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
 613                                             SchedCandidate &Candidate,
 614                                             RegPressureDelta &Delta,
 615                                             bool verbose) {
 616   // Initial trivial priority.
 617   int ResCount = 1;
 618
 619   // Do not waste time on a node that is already scheduled.
 620   if (!SU || SU->isScheduled)
 621     return ResCount;
 622
 623   MachineInstr &Instr = *SU->getInstr();
 624
 625   DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
 626   // Forced priority is high.
 627   if (SU->isScheduleHigh) {
 628     ResCount += PriorityOne;
 629     DEBUG(dbgs() << "H|");
 630   }
 631
 632   // Critical path first.
 633   if (Q.getID() == TopQID) {
 634     ResCount += (SU->getHeight() * ScaleTwo);
 635
 636     DEBUG(if (verbose) {
 637       std::stringstream dbgstr;
 638       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
 639       dbgs() << dbgstr.str();
 640     });
 641
 642     // If resources are available for it, multiply the
 643     // chance of scheduling.
 644     if (Top.ResourceModel->isResourceAvailable(SU)) {
 645       ResCount <<= FactorOne;
 646       ResCount += PriorityThree;
 647       DEBUG(if (verbose) dbgs() << "A|");
 648     } else
 649       DEBUG(if (verbose) dbgs() << " |");
 650   } else {
 651     ResCount += (SU->getDepth() * ScaleTwo);
 652
 653     DEBUG(if (verbose) {
 654       std::stringstream dbgstr;
 655       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
 656       dbgs() << dbgstr.str();
 657     });
 658
 659     // If resources are available for it, multiply the
 660     // chance of scheduling.
 661     if (Bot.ResourceModel->isResourceAvailable(SU)) {
 662       ResCount <<= FactorOne;
 663       ResCount += PriorityThree;
 664       DEBUG(if (verbose) dbgs() << "A|");
 665     } else
 666       DEBUG(if (verbose) dbgs() << " |");
 667   }
 668
 669   unsigned NumNodesBlocking = 0;
 670   if (Q.getID() == TopQID) {
 671     // How many SUs does it block from scheduling?
 672     // Look at all of the successors of this node.
 673     // Count the number of nodes that
 674     // this node is the sole unscheduled node for.
 675     for (const SDep &SI : SU->Succs)
 676       if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
 677         ++NumNodesBlocking;
 678   } else {
 679     // How many unscheduled predecessors block this node?
 680     for (const SDep &PI : SU->Preds)
 681       if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
 682         ++NumNodesBlocking;
 683   }
 684   ResCount += (NumNodesBlocking * ScaleTwo);
 685
 686   DEBUG(if (verbose) {
 687     std::stringstream dbgstr;
 688     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
 689     dbgs() << dbgstr.str();
 690   });
 691
 692   // Factor in reg pressure as a heuristic.
 693   if (!IgnoreBBRegPressure) {
 694     // Decrease priority by the amount that register pressure exceeds the limit.
 695     ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
 696     // Decrease priority if register pressure exceeds the limit.
 697     ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
 698     // Decrease priority slightly if register pressure would increase over the
 699     // current maximum.
 700     ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
 701     DEBUG(if (verbose) {
 702         dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
 703                << Delta.CriticalMax.getUnitInc() <<"/"
 704                << Delta.CurrentMax.getUnitInc() << ")|";
 705     });
 706   }
 707
 708   // Give a little extra priority to a .cur instruction if there is a resource
 709   // available for it.
 710   auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 711   auto &QII = *QST.getInstrInfo();
 712   if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
 713     if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
 714       ResCount += PriorityTwo;
 715       DEBUG(if (verbose) dbgs() << "C|");
 716     } else if (Q.getID() == BotQID &&
 717                Bot.ResourceModel->isResourceAvailable(SU)) {
 718       ResCount += PriorityTwo;
 719       DEBUG(if (verbose) dbgs() << "C|");
 720     }
 721   }
 722
 723   // Give preference to a zero latency instruction if the dependent
 724   // instruction is in the current packet.
 725   if (Q.getID() == TopQID) {
 726     for (const SDep &PI : SU->Preds) {
 727       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
 728           PI.getLatency() == 0 &&
 729           Top.ResourceModel->isInPacket(PI.getSUnit())) {
 730         ResCount += PriorityThree;
 731         DEBUG(if (verbose) dbgs() << "Z|");
 732       }
 733     }
 734   } else {
 735     for (const SDep &SI : SU->Succs) {
 736       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
 737           SI.getLatency() == 0 &&
 738           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 739         ResCount += PriorityThree;
 740         DEBUG(if (verbose) dbgs() << "Z|");
 741       }
 742     }
 743   }
 744
 745   // Give less preference to an instruction that will cause a stall with
 746   // an instruction in the previous packet.
 747   if (QII.isHVXVec(Instr)) {
 748     // Check for stalls in the previous packet.
 749     if (Q.getID() == TopQID) {
 750       for (auto J : Top.ResourceModel->OldPacket)
 751         if (QII.producesStall(*J->getInstr(), Instr))
 752           ResCount -= PriorityOne;
 753     } else {
 754       for (auto J : Bot.ResourceModel->OldPacket)
 755         if (QII.producesStall(Instr, *J->getInstr()))
 756           ResCount -= PriorityOne;
 757     }
 758   }
 759
 760   // If the instruction has a non-zero latency dependence with an instruction in
 761   // the current packet, then it should not be scheduled yet. The case occurs
 762   // when the dependent instruction is scheduled in a new packet, so the
 763   // scheduler updates the current cycle and pending instructions become
 764   // available.
 765   if (CheckEarlyAvail) {
 766     if (Q.getID() == TopQID) {
 767       for (const auto &PI : SU->Preds) {
 768         if (PI.getLatency() > 0 &&
 769             Top.ResourceModel->isInPacket(PI.getSUnit())) {
 770           ResCount -= PriorityOne;
 771           DEBUG(if (verbose) dbgs() << "D|");
 772         }
 773       }
 774     } else {
 775       for (const auto &SI : SU->Succs) {
 776         if (SI.getLatency() > 0 &&
 777             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 778           ResCount -= PriorityOne;
 779           DEBUG(if (verbose) dbgs() << "D|");
 780         }
 781       }
 782     }
 783   }
 784
 785   DEBUG(if (verbose) {
 786     std::stringstream dbgstr;
 787     dbgstr << "Total " << std::setw(4) << ResCount << ")";
 788     dbgs() << dbgstr.str();
 789   });
 790
 791   return ResCount;
 792 }
 793
 794 /// Pick the best candidate from the top queue.
 795 ///
 796 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 797 /// DAG building. To adjust for the current scheduling location we need to
 798 /// maintain the number of vreg uses remaining to be top-scheduled.
 799 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
 800 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
 801                   SchedCandidate &Candidate) {
 802   DEBUG(if (SchedDebugVerboseLevel > 1)
 803         readyQueueVerboseDump(RPTracker, Candidate, Q);
 804         else Q.dump(););
 805
 806   // getMaxPressureDelta temporarily modifies the tracker.
 807   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
 808
 809   // BestSU remains NULL if no top candidates beat the best existing candidate.
 810   CandResult FoundCandidate = NoCand;
 811   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 812     RegPressureDelta RPDelta;
 813     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 814                                     DAG->getRegionCriticalPSets(),
 815                                     DAG->getRegPressure().MaxSetPressure);
 816
 817     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
 818
 819     // Initialize the candidate if needed.
 820     if (!Candidate.SU) {
 821       DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
 822       Candidate.SU = *I;
 823       Candidate.RPDelta = RPDelta;
 824       Candidate.SCost = CurrentCost;
 825       FoundCandidate = NodeOrder;
 826       continue;
 827     }
 828
 829     // Best cost.
 830     if (CurrentCost > Candidate.SCost) {
 831       DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
 832       Candidate.SU = *I;
 833       Candidate.RPDelta = RPDelta;
 834       Candidate.SCost = CurrentCost;
 835       FoundCandidate = BestCost;
 836       continue;
 837     }
 838
 839     // Tie breaker using Timing Class.
 840     if (!DisableTCTie) {
 841       auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 842       auto &QII = *QST.getInstrInfo();
 843
 844       const MachineInstr *MI = (*I)->getInstr();
 845       const MachineInstr *CandI = Candidate.SU->getInstr();
 846       const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
 847
 848       unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
 849       unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
 850       DEBUG(dbgs() << "TC Tie Breaker Cand: "
 851                    << CandLatency << " Instr:" << InstrLatency << "\n"
 852                    << *MI << *CandI << "\n");
 853       if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
 854         if (InstrLatency < CandLatency && TopUseShorterTie) {
 855           Candidate.SU = *I;
 856           Candidate.RPDelta = RPDelta;
 857           Candidate.SCost = CurrentCost;
 858           FoundCandidate = BestCost;
 859           DEBUG(dbgs() << "Used top shorter tie breaker\n");
 860           continue;
 861         } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
 862           Candidate.SU = *I;
 863           Candidate.RPDelta = RPDelta;
 864           Candidate.SCost = CurrentCost;
 865           FoundCandidate = BestCost;
 866           DEBUG(dbgs() << "Used top longer tie breaker\n");
 867           continue;
 868         }
 869       } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
 870         if (InstrLatency < CandLatency && BotUseShorterTie) {
 871           Candidate.SU = *I;
 872           Candidate.RPDelta = RPDelta;
 873           Candidate.SCost = CurrentCost;
 874           FoundCandidate = BestCost;
 875           DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
 876           continue;
 877         } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
 878           Candidate.SU = *I;
 879           Candidate.RPDelta = RPDelta;
 880           Candidate.SCost = CurrentCost;
 881           FoundCandidate = BestCost;
 882           DEBUG(dbgs() << "Used Bot longer tie breaker\n");
 883           continue;
 884         }
 885       }
 886     }
 887
 888     if (CurrentCost == Candidate.SCost) {
 889       if ((Q.getID() == TopQID &&
 890            (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
 891           (Q.getID() == BotQID &&
 892            (*I)->Preds.size() < Candidate.SU->Preds.size())) {
 893         DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
 894         Candidate.SU = *I;
 895         Candidate.RPDelta = RPDelta;
 896         Candidate.SCost = CurrentCost;
 897         FoundCandidate = BestCost;
 898         continue;
 899       }
 900     }
 901
 902     // Fall through to original instruction order.
 903     // Only consider node order if Candidate was chosen from this Q.
 904     if (FoundCandidate == NoCand)
 905       continue;
 906   }
 907   return FoundCandidate;
 908 }
 909
 910 /// Pick the best candidate node from either the top or bottom queue.
 911 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
 912   // Schedule as far as possible in the direction of no choice. This is most
 913   // efficient, but also provides the best heuristics for CriticalPSets.
 914   if (SUnit *SU = Bot.pickOnlyChoice()) {
 915     DEBUG(dbgs() << "Picked only Bottom\n");
 916     IsTopNode = false;
 917     return SU;
 918   }
 919   if (SUnit *SU = Top.pickOnlyChoice()) {
 920     DEBUG(dbgs() << "Picked only Top\n");
 921     IsTopNode = true;
 922     return SU;
 923   }
 924   SchedCandidate BotCand;
 925   // Prefer bottom scheduling when heuristics are silent.
 926   CandResult BotResult = pickNodeFromQueue(Bot.Available,
 927                                            DAG->getBotRPTracker(), BotCand);
 928   assert(BotResult != NoCand && "failed to find the first candidate");
 929
 930   // If either Q has a single candidate that provides the least increase in
 931   // Excess pressure, we can immediately schedule from that Q.
 932   //
 933   // RegionCriticalPSets summarizes the pressure within the scheduled region and
 934   // affects picking from either Q. If scheduling in one direction must
 935   // increase pressure for one of the excess PSets, then schedule in that
 936   // direction first to provide more freedom in the other direction.
 937   if (BotResult == SingleExcess || BotResult == SingleCritical) {
 938     DEBUG(dbgs() << "Prefered Bottom Node\n");
 939     IsTopNode = false;
 940     return BotCand.SU;
 941   }
 942   // Check if the top Q has a better candidate.
 943   SchedCandidate TopCand;
 944   CandResult TopResult = pickNodeFromQueue(Top.Available,
 945                                            DAG->getTopRPTracker(), TopCand);
 946   assert(TopResult != NoCand && "failed to find the first candidate");
 947
 948   if (TopResult == SingleExcess || TopResult == SingleCritical) {
 949     DEBUG(dbgs() << "Prefered Top Node\n");
 950     IsTopNode = true;
 951     return TopCand.SU;
 952   }
 953   // If either Q has a single candidate that minimizes pressure above the
 954   // original region's pressure pick it.
 955   if (BotResult == SingleMax) {
 956     DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
 957     IsTopNode = false;
 958     return BotCand.SU;
 959   }
 960   if (TopResult == SingleMax) {
 961     DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
 962     IsTopNode = true;
 963     return TopCand.SU;
 964   }
 965   if (TopCand.SCost > BotCand.SCost) {
 966     DEBUG(dbgs() << "Prefered Top Node Cost\n");
 967     IsTopNode = true;
 968     return TopCand.SU;
 969   }
 970   // Otherwise prefer the bottom candidate in node order.
 971   DEBUG(dbgs() << "Prefered Bottom in Node order\n");
 972   IsTopNode = false;
 973   return BotCand.SU;
 974 }
 975
 976 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
 977 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
 978   if (DAG->top() == DAG->bottom()) {
 979     assert(Top.Available.empty() && Top.Pending.empty() &&
 980            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
 981     return nullptr;
 982   }
 983   SUnit *SU;
 984   if (llvm::ForceTopDown) {
 985     SU = Top.pickOnlyChoice();
 986     if (!SU) {
 987       SchedCandidate TopCand;
 988       CandResult TopResult =
 989         pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
 990       assert(TopResult != NoCand && "failed to find the first candidate");
 991       (void)TopResult;
 992       SU = TopCand.SU;
 993     }
 994     IsTopNode = true;
 995   } else if (llvm::ForceBottomUp) {
 996     SU = Bot.pickOnlyChoice();
 997     if (!SU) {
 998       SchedCandidate BotCand;
 999       CandResult BotResult =
1000         pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
1001       assert(BotResult != NoCand && "failed to find the first candidate");
1002       (void)BotResult;
1003       SU = BotCand.SU;
1004     }
1005     IsTopNode = false;
1006   } else {
1007     SU = pickNodeBidrectional(IsTopNode);
1008   }
1009   if (SU->isTopReady())
1010     Top.removeReady(SU);
1011   if (SU->isBottomReady())
1012     Bot.removeReady(SU);
1013
1014   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1015         << " Scheduling Instruction in cycle "
1016         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1017         SU->dump(DAG));
1018   return SU;
1019 }
1020
1021 /// Update the scheduler's state after scheduling a node. This is the same node
1022 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
1023 /// to update it's state based on the current cycle before MachineSchedStrategy
1024 /// does.
1025 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1026   if (IsTopNode) {
1027     SU->TopReadyCycle = Top.CurrCycle;
1028     Top.bumpNode(SU);
1029   } else {
1030     SU->BotReadyCycle = Bot.CurrCycle;
1031     Bot.bumpNode(SU);
1032   }
1033 }