contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp

   1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "HexagonMachineScheduler.h"
  16 #include "HexagonSubtarget.h"
  17 #include "llvm/CodeGen/MachineLoopInfo.h"
  18 #include "llvm/CodeGen/ScheduleDAGMutation.h"
  19 #include "llvm/IR/Function.h"
  20
  21 #include <iomanip>
  22 #include <sstream>
  23
  24 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
  25     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  26
  27 static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
  28     cl::Hidden, cl::ZeroOrMore, cl::init(true));
  29
  30 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  31     cl::Hidden, cl::ZeroOrMore, cl::init(1));
  32
  33 static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
  34     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  35
  36 static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
  37     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  38
  39 static cl::opt<bool> DisableTCTie("disable-tc-tie",
  40     cl::Hidden, cl::ZeroOrMore, cl::init(false));
  41
  42 static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
  43     cl::Hidden, cl::ZeroOrMore, cl::init(true));
  44
  45 // Check if the scheduler should penalize instructions that are available to
  46 // early due to a zero-latency dependence.
  47 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  48     cl::ZeroOrMore, cl::init(true));
  49
  50 using namespace llvm;
  51
  52 #define DEBUG_TYPE "machine-scheduler"
  53
  54 namespace {
  55 class HexagonCallMutation : public ScheduleDAGMutation {
  56 public:
  57   void apply(ScheduleDAGInstrs *DAG) override;
  58 private:
  59   bool shouldTFRICallBind(const HexagonInstrInfo &HII,
  60                           const SUnit &Inst1, const SUnit &Inst2) const;
  61 };
  62 } // end anonymous namespace
  63
  64 // Check if a call and subsequent A2_tfrpi instructions should maintain
  65 // scheduling affinity. We are looking for the TFRI to be consumed in
  66 // the next instruction. This should help reduce the instances of
  67 // double register pairs being allocated and scheduled before a call
  68 // when not used until after the call. This situation is exacerbated
  69 // by the fact that we allocate the pair from the callee saves list,
  70 // leading to excess spills and restores.
  71 bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
  72       const SUnit &Inst1, const SUnit &Inst2) const {
  73   if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
  74     return false;
  75
  76   // TypeXTYPE are 64 bit operations.
  77   unsigned Type = HII.getType(*Inst2.getInstr());
  78   if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
  79     Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
  80     return true;
  81   return false;
  82 }
  83
  84 void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
  85   SUnit* LastSequentialCall = nullptr;
  86   unsigned VRegHoldingRet = 0;
  87   unsigned RetRegister;
  88   SUnit* LastUseOfRet = nullptr;
  89   auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
  90   auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
  91
  92   // Currently we only catch the situation when compare gets scheduled
  93   // before preceding call.
  94   for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
  95     // Remember the call.
  96     if (DAG->SUnits[su].getInstr()->isCall())
  97       LastSequentialCall = &DAG->SUnits[su];
  98     // Look for a compare that defines a predicate.
  99     else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
 100       DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
 101     // Look for call and tfri* instructions.
 102     else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
 103              shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
 104       DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
 105     // Prevent redundant register copies between two calls, which are caused by
 106     // both the return value and the argument for the next call being in %R0.
 107     // Example:
 108     //   1: <call1>
 109     //   2: %VregX = COPY %R0
 110     //   3: <use of %VregX>
 111     //   4: %R0 = ...
 112     //   5: <call2>
 113     // The scheduler would often swap 3 and 4, so an additional register is
 114     // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
 115     // this. The same applies for %D0 and %V0/%W0, which are also handled.
 116     else if (SchedRetvalOptimization) {
 117       const MachineInstr *MI = DAG->SUnits[su].getInstr();
 118       if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
 119                            MI->readsRegister(Hexagon::V0, &TRI)))  {
 120         // %vregX = COPY %R0
 121         VRegHoldingRet = MI->getOperand(0).getReg();
 122         RetRegister = MI->getOperand(1).getReg();
 123         LastUseOfRet = nullptr;
 124       } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
 125         // <use of %vregX>
 126         LastUseOfRet = &DAG->SUnits[su];
 127       else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
 128         // %R0 = ...
 129         DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
 130     }
 131   }
 132 }
 133
 134
 135 /// Save the last formed packet
 136 void VLIWResourceModel::savePacket() {
 137   OldPacket = Packet;
 138 }
 139
 140 /// Check if scheduling of this SU is possible
 141 /// in the current packet.
 142 /// It is _not_ precise (statefull), it is more like
 143 /// another heuristic. Many corner cases are figured
 144 /// empirically.
 145 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
 146   if (!SU || !SU->getInstr())
 147     return false;
 148
 149   // First see if the pipeline could receive this instruction
 150   // in the current cycle.
 151   switch (SU->getInstr()->getOpcode()) {
 152   default:
 153     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
 154       return false;
 155   case TargetOpcode::EXTRACT_SUBREG:
 156   case TargetOpcode::INSERT_SUBREG:
 157   case TargetOpcode::SUBREG_TO_REG:
 158   case TargetOpcode::REG_SEQUENCE:
 159   case TargetOpcode::IMPLICIT_DEF:
 160   case TargetOpcode::COPY:
 161   case TargetOpcode::INLINEASM:
 162     break;
 163   }
 164
 165   MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
 166   auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 167
 168   // Now see if there are no other dependencies to instructions already
 169   // in the packet.
 170   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 171     if (Packet[i]->Succs.size() == 0)
 172       continue;
 173
 174     // Enable .cur formation.
 175     if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
 176       continue;
 177
 178     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
 179          E = Packet[i]->Succs.end(); I != E; ++I) {
 180       // Since we do not add pseudos to packets, might as well
 181       // ignore order dependencies.
 182       if (I->isCtrl())
 183         continue;
 184
 185       if (I->getSUnit() == SU)
 186         return false;
 187     }
 188   }
 189   return true;
 190 }
 191
 192 /// Keep track of available resources.
 193 bool VLIWResourceModel::reserveResources(SUnit *SU) {
 194   bool startNewCycle = false;
 195   // Artificially reset state.
 196   if (!SU) {
 197     ResourcesModel->clearResources();
 198     savePacket();
 199     Packet.clear();
 200     TotalPackets++;
 201     return false;
 202   }
 203   // If this SU does not fit in the packet
 204   // start a new one.
 205   if (!isResourceAvailable(SU)) {
 206     ResourcesModel->clearResources();
 207     savePacket();
 208     Packet.clear();
 209     TotalPackets++;
 210     startNewCycle = true;
 211   }
 212
 213   switch (SU->getInstr()->getOpcode()) {
 214   default:
 215     ResourcesModel->reserveResources(*SU->getInstr());
 216     break;
 217   case TargetOpcode::EXTRACT_SUBREG:
 218   case TargetOpcode::INSERT_SUBREG:
 219   case TargetOpcode::SUBREG_TO_REG:
 220   case TargetOpcode::REG_SEQUENCE:
 221   case TargetOpcode::IMPLICIT_DEF:
 222   case TargetOpcode::KILL:
 223   case TargetOpcode::CFI_INSTRUCTION:
 224   case TargetOpcode::EH_LABEL:
 225   case TargetOpcode::COPY:
 226   case TargetOpcode::INLINEASM:
 227     break;
 228   }
 229   Packet.push_back(SU);
 230
 231 #ifndef NDEBUG
 232   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
 233   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
 234     DEBUG(dbgs() << "\t[" << i << "] SU(");
 235     DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
 236     DEBUG(Packet[i]->getInstr()->dump());
 237   }
 238 #endif
 239
 240   // If packet is now full, reset the state so in the next cycle
 241   // we start fresh.
 242   if (Packet.size() >= SchedModel->getIssueWidth()) {
 243     ResourcesModel->clearResources();
 244     savePacket();
 245     Packet.clear();
 246     TotalPackets++;
 247     startNewCycle = true;
 248   }
 249
 250   return startNewCycle;
 251 }
 252
 253 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 254 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 255 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 256 void VLIWMachineScheduler::schedule() {
 257   DEBUG(dbgs()
 258         << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
 259         << " " << BB->getName()
 260         << " in_func " << BB->getParent()->getFunction()->getName()
 261         << " at loop depth "  << MLI->getLoopDepth(BB)
 262         << " \n");
 263
 264   buildDAGWithRegPressure();
 265
 266   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 267   findRootsAndBiasEdges(TopRoots, BotRoots);
 268
 269   // Initialize the strategy before modifying the DAG.
 270   SchedImpl->initialize(this);
 271
 272   DEBUG(unsigned maxH = 0;
 273         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 274           if (SUnits[su].getHeight() > maxH)
 275             maxH = SUnits[su].getHeight();
 276         dbgs() << "Max Height " << maxH << "\n";);
 277   DEBUG(unsigned maxD = 0;
 278         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 279           if (SUnits[su].getDepth() > maxD)
 280             maxD = SUnits[su].getDepth();
 281         dbgs() << "Max Depth " << maxD << "\n";);
 282   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 283           SUnits[su].dumpAll(this));
 284
 285   initQueues(TopRoots, BotRoots);
 286
 287   bool IsTopNode = false;
 288   while (true) {
 289     DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
 290     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 291     if (!SU) break;
 292
 293     if (!checkSchedLimit())
 294       break;
 295
 296     scheduleMI(SU, IsTopNode);
 297
 298     updateQueues(SU, IsTopNode);
 299
 300     // Notify the scheduling strategy after updating the DAG.
 301     SchedImpl->schedNode(SU, IsTopNode);
 302   }
 303   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 304
 305   placeDebugValues();
 306
 307   DEBUG({
 308     unsigned BBNum = begin()->getParent()->getNumber();
 309     dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 310     dumpSchedule();
 311     dbgs() << '\n';
 312   });
 313 }
 314
 315 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
 316   DAG = static_cast<VLIWMachineScheduler*>(dag);
 317   SchedModel = DAG->getSchedModel();
 318
 319   Top.init(DAG, SchedModel);
 320   Bot.init(DAG, SchedModel);
 321
 322   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
 323   // are disabled, then these HazardRecs will be disabled.
 324   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
 325   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
 326   const TargetInstrInfo *TII = STI.getInstrInfo();
 327   delete Top.HazardRec;
 328   delete Bot.HazardRec;
 329   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 330   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
 331
 332   delete Top.ResourceModel;
 333   delete Bot.ResourceModel;
 334   Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 335   Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
 336
 337   assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
 338          "-misched-topdown incompatible with -misched-bottomup");
 339
 340   DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
 341   DAG->addMutation(make_unique<HexagonCallMutation>());
 342 }
 343
 344 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
 345   if (SU->isScheduled)
 346     return;
 347
 348   for (const SDep &PI : SU->Preds) {
 349     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
 350     unsigned MinLatency = PI.getLatency();
 351 #ifndef NDEBUG
 352     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
 353 #endif
 354     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
 355       SU->TopReadyCycle = PredReadyCycle + MinLatency;
 356   }
 357   Top.releaseNode(SU, SU->TopReadyCycle);
 358 }
 359
 360 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
 361   if (SU->isScheduled)
 362     return;
 363
 364   assert(SU->getInstr() && "Scheduled SUnit must have instr");
 365
 366   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 367        I != E; ++I) {
 368     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
 369     unsigned MinLatency = I->getLatency();
 370 #ifndef NDEBUG
 371     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
 372 #endif
 373     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
 374       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
 375   }
 376   Bot.releaseNode(SU, SU->BotReadyCycle);
 377 }
 378
 379 /// Does this SU have a hazard within the current instruction group.
 380 ///
 381 /// The scheduler supports two modes of hazard recognition. The first is the
 382 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
 383 /// supports highly complicated in-order reservation tables
 384 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
 385 ///
 386 /// The second is a streamlined mechanism that checks for hazards based on
 387 /// simple counters that the scheduler itself maintains. It explicitly checks
 388 /// for instruction dispatch limitations, including the number of micro-ops that
 389 /// can dispatch per cycle.
 390 ///
 391 /// TODO: Also check whether the SU must start a new group.
 392 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
 393   if (HazardRec->isEnabled())
 394     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
 395
 396   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
 397   if (IssueCount + uops > SchedModel->getIssueWidth())
 398     return true;
 399
 400   return false;
 401 }
 402
 403 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU,
 404                                                      unsigned ReadyCycle) {
 405   if (ReadyCycle < MinReadyCycle)
 406     MinReadyCycle = ReadyCycle;
 407
 408   // Check for interlocks first. For the purpose of other heuristics, an
 409   // instruction that cannot issue appears as if it's not in the ReadyQueue.
 410   if (ReadyCycle > CurrCycle || checkHazard(SU))
 411
 412     Pending.push(SU);
 413   else
 414     Available.push(SU);
 415 }
 416
 417 /// Move the boundary of scheduled code by one cycle.
 418 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
 419   unsigned Width = SchedModel->getIssueWidth();
 420   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
 421
 422   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
 423   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
 424
 425   if (!HazardRec->isEnabled()) {
 426     // Bypass HazardRec virtual calls.
 427     CurrCycle = NextCycle;
 428   } else {
 429     // Bypass getHazardType calls in case of long latency.
 430     for (; CurrCycle != NextCycle; ++CurrCycle) {
 431       if (isTop())
 432         HazardRec->AdvanceCycle();
 433       else
 434         HazardRec->RecedeCycle();
 435     }
 436   }
 437   CheckPending = true;
 438
 439   DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
 440                << CurrCycle << '\n');
 441 }
 442
 443 /// Move the boundary of scheduled code by one SUnit.
 444 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
 445   bool startNewCycle = false;
 446
 447   // Update the reservation table.
 448   if (HazardRec->isEnabled()) {
 449     if (!isTop() && SU->isCall) {
 450       // Calls are scheduled with their preceding instructions. For bottom-up
 451       // scheduling, clear the pipeline state before emitting.
 452       HazardRec->Reset();
 453     }
 454     HazardRec->EmitInstruction(SU);
 455   }
 456
 457   // Update DFA model.
 458   startNewCycle = ResourceModel->reserveResources(SU);
 459
 460   // Check the instruction group dispatch limit.
 461   // TODO: Check if this SU must end a dispatch group.
 462   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
 463   if (startNewCycle) {
 464     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
 465     bumpCycle();
 466   }
 467   else
 468     DEBUG(dbgs() << "*** IssueCount " << IssueCount
 469           << " at cycle " << CurrCycle << '\n');
 470 }
 471
 472 /// Release pending ready nodes in to the available queue. This makes them
 473 /// visible to heuristics.
 474 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
 475   // If the available queue is empty, it is safe to reset MinReadyCycle.
 476   if (Available.empty())
 477     MinReadyCycle = UINT_MAX;
 478
 479   // Check to see if any of the pending instructions are ready to issue.  If
 480   // so, add them to the available queue.
 481   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
 482     SUnit *SU = *(Pending.begin()+i);
 483     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
 484
 485     if (ReadyCycle < MinReadyCycle)
 486       MinReadyCycle = ReadyCycle;
 487
 488     if (ReadyCycle > CurrCycle)
 489       continue;
 490
 491     if (checkHazard(SU))
 492       continue;
 493
 494     Available.push(SU);
 495     Pending.remove(Pending.begin()+i);
 496     --i; --e;
 497   }
 498   CheckPending = false;
 499 }
 500
 501 /// Remove SU from the ready set for this boundary.
 502 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
 503   if (Available.isInQueue(SU))
 504     Available.remove(Available.find(SU));
 505   else {
 506     assert(Pending.isInQueue(SU) && "bad ready count");
 507     Pending.remove(Pending.find(SU));
 508   }
 509 }
 510
 511 /// If this queue only has one ready candidate, return it. As a side effect,
 512 /// advance the cycle until at least one node is ready. If multiple instructions
 513 /// are ready, return NULL.
 514 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
 515   if (CheckPending)
 516     releasePending();
 517
 518   for (unsigned i = 0; Available.empty(); ++i) {
 519     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
 520            "permanent hazard"); (void)i;
 521     ResourceModel->reserveResources(nullptr);
 522     bumpCycle();
 523     releasePending();
 524   }
 525   if (Available.size() == 1)
 526     return *Available.begin();
 527   return nullptr;
 528 }
 529
 530 #ifndef NDEBUG
 531 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
 532       const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
 533   dbgs() << Label << " " << Q.getName() << " ";
 534   if (P.isValid())
 535     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
 536            << P.getUnitInc() << " ";
 537   else
 538     dbgs() << "     ";
 539   dbgs() << "cost(" << Cost << ")\t";
 540   SU->dump(DAG);
 541 }
 542
 543 // Very detailed queue dump, to be used with higher verbosity levels.
 544 void ConvergingVLIWScheduler::readyQueueVerboseDump(
 545       const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
 546       ReadyQueue &Q) {
 547   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
 548
 549   dbgs() << ">>> " << Q.getName() << "\n";
 550   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 551     RegPressureDelta RPDelta;
 552     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 553                                     DAG->getRegionCriticalPSets(),
 554                                     DAG->getRegPressure().MaxSetPressure);
 555     std::stringstream dbgstr;
 556     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
 557     dbgs() << dbgstr.str();
 558     SchedulingCost(Q, *I, Candidate, RPDelta, true);
 559     dbgs() << "\t";
 560     (*I)->getInstr()->dump();
 561   }
 562   dbgs() << "\n";
 563 }
 564 #endif
 565
 566 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
 567 /// of SU, return true (we may have duplicates)
 568 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
 569   if (SU->NumPredsLeft == 0)
 570     return false;
 571
 572   for (auto &Pred : SU->Preds) {
 573     // We found an available, but not scheduled, predecessor.
 574     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
 575       return false;
 576   }
 577
 578   return true;
 579 }
 580
 581 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
 582 /// of SU, return true (we may have duplicates)
 583 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
 584   if (SU->NumSuccsLeft == 0)
 585     return false;
 586
 587   for (auto &Succ : SU->Succs) {
 588     // We found an available, but not scheduled, successor.
 589     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
 590       return false;
 591   }
 592   return true;
 593 }
 594
 595 // Constants used to denote relative importance of
 596 // heuristic components for cost computation.
 597 static const unsigned PriorityOne = 200;
 598 static const unsigned PriorityTwo = 50;
 599 static const unsigned PriorityThree = 75;
 600 static const unsigned ScaleTwo = 10;
 601 static const unsigned FactorOne = 2;
 602
 603 /// Single point to compute overall scheduling cost.
 604 /// TODO: More heuristics will be used soon.
 605 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
 606                                             SchedCandidate &Candidate,
 607                                             RegPressureDelta &Delta,
 608                                             bool verbose) {
 609   // Initial trivial priority.
 610   int ResCount = 1;
 611
 612   // Do not waste time on a node that is already scheduled.
 613   if (!SU || SU->isScheduled)
 614     return ResCount;
 615
 616   MachineInstr &Instr = *SU->getInstr();
 617
 618   DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
 619   // Forced priority is high.
 620   if (SU->isScheduleHigh) {
 621     ResCount += PriorityOne;
 622     DEBUG(dbgs() << "H|");
 623   }
 624
 625   // Critical path first.
 626   if (Q.getID() == TopQID) {
 627     ResCount += (SU->getHeight() * ScaleTwo);
 628
 629     DEBUG(if (verbose) {
 630       std::stringstream dbgstr;
 631       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
 632       dbgs() << dbgstr.str();
 633     });
 634
 635     // If resources are available for it, multiply the
 636     // chance of scheduling.
 637     if (Top.ResourceModel->isResourceAvailable(SU)) {
 638       ResCount <<= FactorOne;
 639       ResCount += PriorityThree;
 640       DEBUG(if (verbose) dbgs() << "A|");
 641     } else
 642       DEBUG(if (verbose) dbgs() << " |");
 643   } else {
 644     ResCount += (SU->getDepth() * ScaleTwo);
 645
 646     DEBUG(if (verbose) {
 647       std::stringstream dbgstr;
 648       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
 649       dbgs() << dbgstr.str();
 650     });
 651
 652     // If resources are available for it, multiply the
 653     // chance of scheduling.
 654     if (Bot.ResourceModel->isResourceAvailable(SU)) {
 655       ResCount <<= FactorOne;
 656       ResCount += PriorityThree;
 657       DEBUG(if (verbose) dbgs() << "A|");
 658     } else
 659       DEBUG(if (verbose) dbgs() << " |");
 660   }
 661
 662   unsigned NumNodesBlocking = 0;
 663   if (Q.getID() == TopQID) {
 664     // How many SUs does it block from scheduling?
 665     // Look at all of the successors of this node.
 666     // Count the number of nodes that
 667     // this node is the sole unscheduled node for.
 668     for (const SDep &SI : SU->Succs)
 669       if (isSingleUnscheduledPred(SI.getSUnit(), SU))
 670         ++NumNodesBlocking;
 671   } else {
 672     // How many unscheduled predecessors block this node?
 673     for (const SDep &PI : SU->Preds)
 674       if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
 675         ++NumNodesBlocking;
 676   }
 677   ResCount += (NumNodesBlocking * ScaleTwo);
 678
 679   DEBUG(if (verbose) {
 680     std::stringstream dbgstr;
 681     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
 682     dbgs() << dbgstr.str();
 683   });
 684
 685   // Factor in reg pressure as a heuristic.
 686   if (!IgnoreBBRegPressure) {
 687     // Decrease priority by the amount that register pressure exceeds the limit.
 688     ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
 689     // Decrease priority if register pressure exceeds the limit.
 690     ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
 691     // Decrease priority slightly if register pressure would increase over the
 692     // current maximum.
 693     ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
 694     DEBUG(if (verbose) {
 695         dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
 696                << Delta.CriticalMax.getUnitInc() <<"/"
 697                << Delta.CurrentMax.getUnitInc() << ")|";
 698     });
 699   }
 700
 701   // Give a little extra priority to a .cur instruction if there is a resource
 702   // available for it.
 703   auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 704   auto &QII = *QST.getInstrInfo();
 705   if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
 706     if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
 707       ResCount += PriorityTwo;
 708       DEBUG(if (verbose) dbgs() << "C|");
 709     } else if (Q.getID() == BotQID &&
 710                Bot.ResourceModel->isResourceAvailable(SU)) {
 711       ResCount += PriorityTwo;
 712       DEBUG(if (verbose) dbgs() << "C|");
 713     }
 714   }
 715
 716   // Give preference to a zero latency instruction if the dependent
 717   // instruction is in the current packet.
 718   if (Q.getID() == TopQID) {
 719     for (const SDep &PI : SU->Preds) {
 720       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
 721           PI.getLatency() == 0 &&
 722           Top.ResourceModel->isInPacket(PI.getSUnit())) {
 723         ResCount += PriorityThree;
 724         DEBUG(if (verbose) dbgs() << "Z|");
 725       }
 726     }
 727   } else {
 728     for (const SDep &SI : SU->Succs) {
 729       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
 730           SI.getLatency() == 0 &&
 731           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 732         ResCount += PriorityThree;
 733         DEBUG(if (verbose) dbgs() << "Z|");
 734       }
 735     }
 736   }
 737
 738   // Give less preference to an instruction that will cause a stall with
 739   // an instruction in the previous packet.
 740   if (QII.isHVXVec(Instr)) {
 741     // Check for stalls in the previous packet.
 742     if (Q.getID() == TopQID) {
 743       for (auto J : Top.ResourceModel->OldPacket)
 744         if (QII.producesStall(*J->getInstr(), Instr))
 745           ResCount -= PriorityOne;
 746     } else {
 747       for (auto J : Bot.ResourceModel->OldPacket)
 748         if (QII.producesStall(Instr, *J->getInstr()))
 749           ResCount -= PriorityOne;
 750     }
 751   }
 752
 753   // If the instruction has a non-zero latency dependence with an instruction in
 754   // the current packet, then it should not be scheduled yet. The case occurs
 755   // when the dependent instruction is scheduled in a new packet, so the
 756   // scheduler updates the current cycle and pending instructions become
 757   // available.
 758   if (CheckEarlyAvail) {
 759     if (Q.getID() == TopQID) {
 760       for (const auto &PI : SU->Preds) {
 761         if (PI.getLatency() > 0 &&
 762             Top.ResourceModel->isInPacket(PI.getSUnit())) {
 763           ResCount -= PriorityOne;
 764           DEBUG(if (verbose) dbgs() << "D|");
 765         }
 766       }
 767     } else {
 768       for (const auto &SI : SU->Succs) {
 769         if (SI.getLatency() > 0 &&
 770             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
 771           ResCount -= PriorityOne;
 772           DEBUG(if (verbose) dbgs() << "D|");
 773         }
 774       }
 775     }
 776   }
 777
 778   DEBUG(if (verbose) {
 779     std::stringstream dbgstr;
 780     dbgstr << "Total " << std::setw(4) << ResCount << ")";
 781     dbgs() << dbgstr.str();
 782   });
 783
 784   return ResCount;
 785 }
 786
 787 /// Pick the best candidate from the top queue.
 788 ///
 789 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 790 /// DAG building. To adjust for the current scheduling location we need to
 791 /// maintain the number of vreg uses remaining to be top-scheduled.
 792 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
 793 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
 794                   SchedCandidate &Candidate) {
 795   DEBUG(if (SchedDebugVerboseLevel > 1)
 796         readyQueueVerboseDump(RPTracker, Candidate, Q);
 797         else Q.dump(););
 798
 799   // getMaxPressureDelta temporarily modifies the tracker.
 800   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
 801
 802   // BestSU remains NULL if no top candidates beat the best existing candidate.
 803   CandResult FoundCandidate = NoCand;
 804   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
 805     RegPressureDelta RPDelta;
 806     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
 807                                     DAG->getRegionCriticalPSets(),
 808                                     DAG->getRegPressure().MaxSetPressure);
 809
 810     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
 811
 812     // Initialize the candidate if needed.
 813     if (!Candidate.SU) {
 814       DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
 815       Candidate.SU = *I;
 816       Candidate.RPDelta = RPDelta;
 817       Candidate.SCost = CurrentCost;
 818       FoundCandidate = NodeOrder;
 819       continue;
 820     }
 821
 822     // Best cost.
 823     if (CurrentCost > Candidate.SCost) {
 824       DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
 825       Candidate.SU = *I;
 826       Candidate.RPDelta = RPDelta;
 827       Candidate.SCost = CurrentCost;
 828       FoundCandidate = BestCost;
 829       continue;
 830     }
 831
 832     // Tie breaker using Timing Class.
 833     if (!DisableTCTie) {
 834       auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
 835       auto &QII = *QST.getInstrInfo();
 836
 837       const MachineInstr *MI = (*I)->getInstr();
 838       const MachineInstr *CandI = Candidate.SU->getInstr();
 839       const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
 840
 841       unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
 842       unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
 843       DEBUG(dbgs() << "TC Tie Breaker Cand: "
 844                    << CandLatency << " Instr:" << InstrLatency << "\n"
 845                    << *MI << *CandI << "\n");
 846       if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
 847         if (InstrLatency < CandLatency && TopUseShorterTie) {
 848           Candidate.SU = *I;
 849           Candidate.RPDelta = RPDelta;
 850           Candidate.SCost = CurrentCost;
 851           FoundCandidate = BestCost;
 852           DEBUG(dbgs() << "Used top shorter tie breaker\n");
 853           continue;
 854         } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
 855           Candidate.SU = *I;
 856           Candidate.RPDelta = RPDelta;
 857           Candidate.SCost = CurrentCost;
 858           FoundCandidate = BestCost;
 859           DEBUG(dbgs() << "Used top longer tie breaker\n");
 860           continue;
 861         }
 862       } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
 863         if (InstrLatency < CandLatency && BotUseShorterTie) {
 864           Candidate.SU = *I;
 865           Candidate.RPDelta = RPDelta;
 866           Candidate.SCost = CurrentCost;
 867           FoundCandidate = BestCost;
 868           DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
 869           continue;
 870         } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
 871           Candidate.SU = *I;
 872           Candidate.RPDelta = RPDelta;
 873           Candidate.SCost = CurrentCost;
 874           FoundCandidate = BestCost;
 875           DEBUG(dbgs() << "Used Bot longer tie breaker\n");
 876           continue;
 877         }
 878       }
 879     }
 880
 881     if (CurrentCost == Candidate.SCost) {
 882       if ((Q.getID() == TopQID &&
 883            (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
 884           (Q.getID() == BotQID &&
 885            (*I)->Preds.size() < Candidate.SU->Preds.size())) {
 886         DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
 887         Candidate.SU = *I;
 888         Candidate.RPDelta = RPDelta;
 889         Candidate.SCost = CurrentCost;
 890         FoundCandidate = BestCost;
 891         continue;
 892       }
 893     }
 894
 895     // Fall through to original instruction order.
 896     // Only consider node order if Candidate was chosen from this Q.
 897     if (FoundCandidate == NoCand)
 898       continue;
 899   }
 900   return FoundCandidate;
 901 }
 902
 903 /// Pick the best candidate node from either the top or bottom queue.
 904 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
 905   // Schedule as far as possible in the direction of no choice. This is most
 906   // efficient, but also provides the best heuristics for CriticalPSets.
 907   if (SUnit *SU = Bot.pickOnlyChoice()) {
 908     DEBUG(dbgs() << "Picked only Bottom\n");
 909     IsTopNode = false;
 910     return SU;
 911   }
 912   if (SUnit *SU = Top.pickOnlyChoice()) {
 913     DEBUG(dbgs() << "Picked only Top\n");
 914     IsTopNode = true;
 915     return SU;
 916   }
 917   SchedCandidate BotCand;
 918   // Prefer bottom scheduling when heuristics are silent.
 919   CandResult BotResult = pickNodeFromQueue(Bot.Available,
 920                                            DAG->getBotRPTracker(), BotCand);
 921   assert(BotResult != NoCand && "failed to find the first candidate");
 922
 923   // If either Q has a single candidate that provides the least increase in
 924   // Excess pressure, we can immediately schedule from that Q.
 925   //
 926   // RegionCriticalPSets summarizes the pressure within the scheduled region and
 927   // affects picking from either Q. If scheduling in one direction must
 928   // increase pressure for one of the excess PSets, then schedule in that
 929   // direction first to provide more freedom in the other direction.
 930   if (BotResult == SingleExcess || BotResult == SingleCritical) {
 931     DEBUG(dbgs() << "Prefered Bottom Node\n");
 932     IsTopNode = false;
 933     return BotCand.SU;
 934   }
 935   // Check if the top Q has a better candidate.
 936   SchedCandidate TopCand;
 937   CandResult TopResult = pickNodeFromQueue(Top.Available,
 938                                            DAG->getTopRPTracker(), TopCand);
 939   assert(TopResult != NoCand && "failed to find the first candidate");
 940
 941   if (TopResult == SingleExcess || TopResult == SingleCritical) {
 942     DEBUG(dbgs() << "Prefered Top Node\n");
 943     IsTopNode = true;
 944     return TopCand.SU;
 945   }
 946   // If either Q has a single candidate that minimizes pressure above the
 947   // original region's pressure pick it.
 948   if (BotResult == SingleMax) {
 949     DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
 950     IsTopNode = false;
 951     return BotCand.SU;
 952   }
 953   if (TopResult == SingleMax) {
 954     DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
 955     IsTopNode = true;
 956     return TopCand.SU;
 957   }
 958   if (TopCand.SCost > BotCand.SCost) {
 959     DEBUG(dbgs() << "Prefered Top Node Cost\n");
 960     IsTopNode = true;
 961     return TopCand.SU;
 962   }
 963   // Otherwise prefer the bottom candidate in node order.
 964   DEBUG(dbgs() << "Prefered Bottom in Node order\n");
 965   IsTopNode = false;
 966   return BotCand.SU;
 967 }
 968
 969 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
 970 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
 971   if (DAG->top() == DAG->bottom()) {
 972     assert(Top.Available.empty() && Top.Pending.empty() &&
 973            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
 974     return nullptr;
 975   }
 976   SUnit *SU;
 977   if (llvm::ForceTopDown) {
 978     SU = Top.pickOnlyChoice();
 979     if (!SU) {
 980       SchedCandidate TopCand;
 981       CandResult TopResult =
 982         pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
 983       assert(TopResult != NoCand && "failed to find the first candidate");
 984       (void)TopResult;
 985       SU = TopCand.SU;
 986     }
 987     IsTopNode = true;
 988   } else if (llvm::ForceBottomUp) {
 989     SU = Bot.pickOnlyChoice();
 990     if (!SU) {
 991       SchedCandidate BotCand;
 992       CandResult BotResult =
 993         pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
 994       assert(BotResult != NoCand && "failed to find the first candidate");
 995       (void)BotResult;
 996       SU = BotCand.SU;
 997     }
 998     IsTopNode = false;
 999   } else {
1000     SU = pickNodeBidrectional(IsTopNode);
1001   }
1002   if (SU->isTopReady())
1003     Top.removeReady(SU);
1004   if (SU->isBottomReady())
1005     Bot.removeReady(SU);
1006
1007   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1008         << " Scheduling Instruction in cycle "
1009         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1010         SU->dump(DAG));
1011   return SU;
1012 }
1013
1014 /// Update the scheduler's state after scheduling a node. This is the same node
1015 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
1016 /// to update it's state based on the current cycle before MachineSchedStrategy
1017 /// does.
1018 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1019   if (IsTopNode) {
1020     SU->TopReadyCycle = Top.CurrCycle;
1021     Top.bumpNode(SU);
1022   } else {
1023     SU->BotReadyCycle = Bot.CurrCycle;
1024     Bot.bumpNode(SU);
1025   }
1026 }