contrib/llvm/lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/BitVector.h"
  17 #include "llvm/ADT/DenseMap.h"
  18 #include "llvm/ADT/iterator_range.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 #include "llvm/ADT/STLExtras.h"
  22 #include "llvm/Analysis/AliasAnalysis.h"
  23 #include "llvm/CodeGen/LiveInterval.h"
  24 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  25 #include "llvm/CodeGen/MachineBasicBlock.h"
  26 #include "llvm/CodeGen/MachineDominators.h"
  27 #include "llvm/CodeGen/MachineFunction.h"
  28 #include "llvm/CodeGen/MachineFunctionPass.h"
  29 #include "llvm/CodeGen/MachineInstr.h"
  30 #include "llvm/CodeGen/MachineLoopInfo.h"
  31 #include "llvm/CodeGen/MachineOperand.h"
  32 #include "llvm/CodeGen/MachinePassRegistry.h"
  33 #include "llvm/CodeGen/RegisterPressure.h"
  34 #include "llvm/CodeGen/MachineRegisterInfo.h"
  35 #include "llvm/CodeGen/MachineScheduler.h"
  36 #include "llvm/CodeGen/MachineValueType.h"
  37 #include "llvm/CodeGen/Passes.h"
  38 #include "llvm/CodeGen/RegisterClassInfo.h"
  39 #include "llvm/CodeGen/ScheduleDAG.h"
  40 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  41 #include "llvm/CodeGen/ScheduleDAGMutation.h"
  42 #include "llvm/CodeGen/ScheduleDFS.h"
  43 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  44 #include "llvm/CodeGen/SlotIndexes.h"
  45 #include "llvm/CodeGen/TargetPassConfig.h"
  46 #include "llvm/CodeGen/TargetSchedule.h"
  47 #include "llvm/MC/LaneBitmask.h"
  48 #include "llvm/Pass.h"
  49 #include "llvm/Support/CommandLine.h"
  50 #include "llvm/Support/Compiler.h"
  51 #include "llvm/Support/Debug.h"
  52 #include "llvm/Support/ErrorHandling.h"
  53 #include "llvm/Support/GraphWriter.h"
  54 #include "llvm/Support/raw_ostream.h"
  55 #include "llvm/Target/TargetInstrInfo.h"
  56 #include "llvm/Target/TargetLowering.h"
  57 #include "llvm/Target/TargetRegisterInfo.h"
  58 #include "llvm/Target/TargetSubtargetInfo.h"
  59 #include <algorithm>
  60 #include <cassert>
  61 #include <cstdint>
  62 #include <iterator>
  63 #include <limits>
  64 #include <memory>
  65 #include <string>
  66 #include <tuple>
  67 #include <utility>
  68 #include <vector>
  69
  70 using namespace llvm;
  71
  72 #define DEBUG_TYPE "misched"
  73
  74 namespace llvm {
  75
  76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  77                            cl::desc("Force top-down list scheduling"));
  78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  79                             cl::desc("Force bottom-up list scheduling"));
  80 cl::opt<bool>
  81 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
  82                        cl::desc("Print critical path length to stdout"));
  83
  84 } // end namespace llvm
  85
  86 #ifndef NDEBUG
  87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  88   cl::desc("Pop up a window to show MISched dags after they are processed"));
  89
  90 /// In some situations a few uninteresting nodes depend on nearly all other
  91 /// nodes in the graph, provide a cutoff to hide them.
  92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
  93   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
  94
  95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  96   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  97
  98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  99   cl::desc("Only schedule this function"));
 100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
 101   cl::desc("Only schedule this MBB#"));
 102 #else
 103 static bool ViewMISchedDAGs = false;
 104 #endif // NDEBUG
 105
 106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
 107 /// size of the ready lists.
 108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
 109   cl::desc("Limit ready list to N instructions"), cl::init(256));
 110
 111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
 112   cl::desc("Enable register pressure scheduling."), cl::init(true));
 113
 114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
 115   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
 116
 117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
 118                                         cl::desc("Enable memop clustering."),
 119                                         cl::init(true));
 120
 121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
 122   cl::desc("Verify machine instrs before and after machine scheduling"));
 123
 124 // DAG subtrees must have at least this many nodes.
 125 static const unsigned MinSubtreeSize = 8;
 126
 127 // Pin the vtables to this file.
 128 void MachineSchedStrategy::anchor() {}
 129
 130 void ScheduleDAGMutation::anchor() {}
 131
 132 //===----------------------------------------------------------------------===//
 133 // Machine Instruction Scheduling Pass and Registry
 134 //===----------------------------------------------------------------------===//
 135
 136 MachineSchedContext::MachineSchedContext() {
 137   RegClassInfo = new RegisterClassInfo();
 138 }
 139
 140 MachineSchedContext::~MachineSchedContext() {
 141   delete RegClassInfo;
 142 }
 143
 144 namespace {
 145
 146 /// Base class for a machine scheduler class that can run at any point.
 147 class MachineSchedulerBase : public MachineSchedContext,
 148                              public MachineFunctionPass {
 149 public:
 150   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
 151
 152   void print(raw_ostream &O, const Module* = nullptr) const override;
 153
 154 protected:
 155   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
 156 };
 157
 158 /// MachineScheduler runs after coalescing and before register allocation.
 159 class MachineScheduler : public MachineSchedulerBase {
 160 public:
 161   MachineScheduler();
 162
 163   void getAnalysisUsage(AnalysisUsage &AU) const override;
 164
 165   bool runOnMachineFunction(MachineFunction&) override;
 166
 167   static char ID; // Class identification, replacement for typeinfo
 168
 169 protected:
 170   ScheduleDAGInstrs *createMachineScheduler();
 171 };
 172
 173 /// PostMachineScheduler runs after shortly before code emission.
 174 class PostMachineScheduler : public MachineSchedulerBase {
 175 public:
 176   PostMachineScheduler();
 177
 178   void getAnalysisUsage(AnalysisUsage &AU) const override;
 179
 180   bool runOnMachineFunction(MachineFunction&) override;
 181
 182   static char ID; // Class identification, replacement for typeinfo
 183
 184 protected:
 185   ScheduleDAGInstrs *createPostMachineScheduler();
 186 };
 187
 188 } // end anonymous namespace
 189
 190 char MachineScheduler::ID = 0;
 191
 192 char &llvm::MachineSchedulerID = MachineScheduler::ID;
 193
 194 INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
 195                       "Machine Instruction Scheduler", false, false)
 196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 197 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 198 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 199 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 200 INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
 201                     "Machine Instruction Scheduler", false, false)
 202
 203 MachineScheduler::MachineScheduler()
 204 : MachineSchedulerBase(ID) {
 205   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 206 }
 207
 208 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 209   AU.setPreservesCFG();
 210   AU.addRequiredID(MachineDominatorsID);
 211   AU.addRequired<MachineLoopInfo>();
 212   AU.addRequired<AAResultsWrapperPass>();
 213   AU.addRequired<TargetPassConfig>();
 214   AU.addRequired<SlotIndexes>();
 215   AU.addPreserved<SlotIndexes>();
 216   AU.addRequired<LiveIntervals>();
 217   AU.addPreserved<LiveIntervals>();
 218   MachineFunctionPass::getAnalysisUsage(AU);
 219 }
 220
 221 char PostMachineScheduler::ID = 0;
 222
 223 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 224
 225 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
 226                 "PostRA Machine Instruction Scheduler", false, false)
 227
 228 PostMachineScheduler::PostMachineScheduler()
 229 : MachineSchedulerBase(ID) {
 230   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 231 }
 232
 233 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 234   AU.setPreservesCFG();
 235   AU.addRequiredID(MachineDominatorsID);
 236   AU.addRequired<MachineLoopInfo>();
 237   AU.addRequired<TargetPassConfig>();
 238   MachineFunctionPass::getAnalysisUsage(AU);
 239 }
 240
 241 MachinePassRegistry MachineSchedRegistry::Registry;
 242
 243 /// A dummy default scheduler factory indicates whether the scheduler
 244 /// is overridden on the command line.
 245 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 246   return nullptr;
 247 }
 248
 249 /// MachineSchedOpt allows command line selection of the scheduler.
 250 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 251                RegisterPassParser<MachineSchedRegistry>>
 252 MachineSchedOpt("misched",
 253                 cl::init(&useDefaultMachineSched), cl::Hidden,
 254                 cl::desc("Machine instruction scheduler to use"));
 255
 256 static MachineSchedRegistry
 257 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 258                      useDefaultMachineSched);
 259
 260 static cl::opt<bool> EnableMachineSched(
 261     "enable-misched",
 262     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
 263     cl::Hidden);
 264
 265 static cl::opt<bool> EnablePostRAMachineSched(
 266     "enable-post-misched",
 267     cl::desc("Enable the post-ra machine instruction scheduling pass."),
 268     cl::init(true), cl::Hidden);
 269
 270 /// Decrement this iterator until reaching the top or a non-debug instr.
 271 static MachineBasicBlock::const_iterator
 272 priorNonDebug(MachineBasicBlock::const_iterator I,
 273               MachineBasicBlock::const_iterator Beg) {
 274   assert(I != Beg && "reached the top of the region, cannot decrement");
 275   while (--I != Beg) {
 276     if (!I->isDebugValue())
 277       break;
 278   }
 279   return I;
 280 }
 281
 282 /// Non-const version.
 283 static MachineBasicBlock::iterator
 284 priorNonDebug(MachineBasicBlock::iterator I,
 285               MachineBasicBlock::const_iterator Beg) {
 286   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
 287       .getNonConstIterator();
 288 }
 289
 290 /// If this iterator is a debug value, increment until reaching the End or a
 291 /// non-debug instruction.
 292 static MachineBasicBlock::const_iterator
 293 nextIfDebug(MachineBasicBlock::const_iterator I,
 294             MachineBasicBlock::const_iterator End) {
 295   for(; I != End; ++I) {
 296     if (!I->isDebugValue())
 297       break;
 298   }
 299   return I;
 300 }
 301
 302 /// Non-const version.
 303 static MachineBasicBlock::iterator
 304 nextIfDebug(MachineBasicBlock::iterator I,
 305             MachineBasicBlock::const_iterator End) {
 306   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
 307       .getNonConstIterator();
 308 }
 309
 310 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
 311 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
 312   // Select the scheduler, or set the default.
 313   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 314   if (Ctor != useDefaultMachineSched)
 315     return Ctor(this);
 316
 317   // Get the default scheduler set by the target for this function.
 318   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
 319   if (Scheduler)
 320     return Scheduler;
 321
 322   // Default to GenericScheduler.
 323   return createGenericSchedLive(this);
 324 }
 325
 326 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 327 /// the caller. We don't have a command line option to override the postRA
 328 /// scheduler. The Target must configure it.
 329 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 330   // Get the postRA scheduler set by the target for this function.
 331   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
 332   if (Scheduler)
 333     return Scheduler;
 334
 335   // Default to GenericScheduler.
 336   return createGenericSchedPostRA(this);
 337 }
 338
 339 /// Top-level MachineScheduler pass driver.
 340 ///
 341 /// Visit blocks in function order. Divide each block into scheduling regions
 342 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 343 /// consistent with the DAG builder, which traverses the interior of the
 344 /// scheduling regions bottom-up.
 345 ///
 346 /// This design avoids exposing scheduling boundaries to the DAG builder,
 347 /// simplifying the DAG builder's support for "special" target instructions.
 348 /// At the same time the design allows target schedulers to operate across
 349 /// scheduling boundaries, for example to bundle the boudary instructions
 350 /// without reordering them. This creates complexity, because the target
 351 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 352 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 353 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 354 /// general bias against block splitting purely for implementation simplicity.
 355 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 356   if (skipFunction(*mf.getFunction()))
 357     return false;
 358
 359   if (EnableMachineSched.getNumOccurrences()) {
 360     if (!EnableMachineSched)
 361       return false;
 362   } else if (!mf.getSubtarget().enableMachineScheduler())
 363     return false;
 364
 365   DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
 366
 367   // Initialize the context of the pass.
 368   MF = &mf;
 369   MLI = &getAnalysis<MachineLoopInfo>();
 370   MDT = &getAnalysis<MachineDominatorTree>();
 371   PassConfig = &getAnalysis<TargetPassConfig>();
 372   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 373
 374   LIS = &getAnalysis<LiveIntervals>();
 375
 376   if (VerifyScheduling) {
 377     DEBUG(LIS->dump());
 378     MF->verify(this, "Before machine scheduling.");
 379   }
 380   RegClassInfo->runOnMachineFunction(*MF);
 381
 382   // Instantiate the selected scheduler for this target, function, and
 383   // optimization level.
 384   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
 385   scheduleRegions(*Scheduler, false);
 386
 387   DEBUG(LIS->dump());
 388   if (VerifyScheduling)
 389     MF->verify(this, "After machine scheduling.");
 390   return true;
 391 }
 392
 393 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 394   if (skipFunction(*mf.getFunction()))
 395     return false;
 396
 397   if (EnablePostRAMachineSched.getNumOccurrences()) {
 398     if (!EnablePostRAMachineSched)
 399       return false;
 400   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
 401     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
 402     return false;
 403   }
 404   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 405
 406   // Initialize the context of the pass.
 407   MF = &mf;
 408   PassConfig = &getAnalysis<TargetPassConfig>();
 409
 410   if (VerifyScheduling)
 411     MF->verify(this, "Before post machine scheduling.");
 412
 413   // Instantiate the selected scheduler for this target, function, and
 414   // optimization level.
 415   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
 416   scheduleRegions(*Scheduler, true);
 417
 418   if (VerifyScheduling)
 419     MF->verify(this, "After post machine scheduling.");
 420   return true;
 421 }
 422
 423 /// Return true of the given instruction should not be included in a scheduling
 424 /// region.
 425 ///
 426 /// MachineScheduler does not currently support scheduling across calls. To
 427 /// handle calls, the DAG builder needs to be modified to create register
 428 /// anti/output dependencies on the registers clobbered by the call's regmask
 429 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
 430 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
 431 /// the boundary, but there would be no benefit to postRA scheduling across
 432 /// calls this late anyway.
 433 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
 434                             MachineBasicBlock *MBB,
 435                             MachineFunction *MF,
 436                             const TargetInstrInfo *TII) {
 437   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
 438 }
 439
 440 /// Main driver for both MachineScheduler and PostMachineScheduler.
 441 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
 442                                            bool FixKillFlags) {
 443   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 444
 445   // Visit all machine basic blocks.
 446   //
 447   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 448   // loop tree. Then we can optionally compute global RegPressure.
 449   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 450        MBB != MBBEnd; ++MBB) {
 451
 452     Scheduler.startBlock(&*MBB);
 453
 454 #ifndef NDEBUG
 455     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
 456       continue;
 457     if (SchedOnlyBlock.getNumOccurrences()
 458         && (int)SchedOnlyBlock != MBB->getNumber())
 459       continue;
 460 #endif
 461
 462     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 463     // region as soon as it is discovered. RegionEnd points the scheduling
 464     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 465     // but the region does (i.e. the next RegionEnd is above the previous
 466     // RegionBegin). If the current block has no terminator then RegionEnd ==
 467     // MBB->end() for the bottom region.
 468     //
 469     // The Scheduler may insert instructions during either schedule() or
 470     // exitRegion(), even for empty regions. So the local iterators 'I' and
 471     // 'RegionEnd' are invalid across these calls.
 472     //
 473     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
 474     // as a single instruction.
 475     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 476         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 477
 478       // Avoid decrementing RegionEnd for blocks with no terminator.
 479       if (RegionEnd != MBB->end() ||
 480           isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
 481         --RegionEnd;
 482       }
 483
 484       // The next region starts above the previous region. Look backward in the
 485       // instruction stream until we find the nearest boundary.
 486       unsigned NumRegionInstrs = 0;
 487       MachineBasicBlock::iterator I = RegionEnd;
 488       for (; I != MBB->begin(); --I) {
 489         MachineInstr &MI = *std::prev(I);
 490         if (isSchedBoundary(&MI, &*MBB, MF, TII))
 491           break;
 492         if (!MI.isDebugValue())
 493           ++NumRegionInstrs;
 494       }
 495       // Notify the scheduler of the region, even if we may skip scheduling
 496       // it. Perhaps it still needs to be bundled.
 497       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
 498
 499       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 500       if (I == RegionEnd || I == std::prev(RegionEnd)) {
 501         // Close the current region. Bundle the terminator if needed.
 502         // This invalidates 'RegionEnd' and 'I'.
 503         Scheduler.exitRegion();
 504         continue;
 505       }
 506       DEBUG(dbgs() << "********** MI Scheduling **********\n");
 507       DEBUG(dbgs() << MF->getName()
 508             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 509             << "\n  From: " << *I << "    To: ";
 510             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 511             else dbgs() << "End";
 512             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
 513       if (DumpCriticalPathLength) {
 514         errs() << MF->getName();
 515         errs() << ":BB# " << MBB->getNumber();
 516         errs() << " " << MBB->getName() << " \n";
 517       }
 518
 519       // Schedule a region: possibly reorder instructions.
 520       // This invalidates 'RegionEnd' and 'I'.
 521       Scheduler.schedule();
 522
 523       // Close the current region.
 524       Scheduler.exitRegion();
 525
 526       // Scheduling has invalidated the current iterator 'I'. Ask the
 527       // scheduler for the top of it's scheduled region.
 528       RegionEnd = Scheduler.begin();
 529     }
 530     Scheduler.finishBlock();
 531     // FIXME: Ideally, no further passes should rely on kill flags. However,
 532     // thumb2 size reduction is currently an exception, so the PostMIScheduler
 533     // needs to do this.
 534     if (FixKillFlags)
 535         Scheduler.fixupKills(&*MBB);
 536   }
 537   Scheduler.finalizeSchedule();
 538 }
 539
 540 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 541   // unimplemented
 542 }
 543
 544 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 545 LLVM_DUMP_METHOD void ReadyQueue::dump() {
 546   dbgs() << "Queue " << Name << ": ";
 547   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 548     dbgs() << Queue[i]->NodeNum << " ";
 549   dbgs() << "\n";
 550 }
 551 #endif
 552
 553 //===----------------------------------------------------------------------===//
 554 // ScheduleDAGMI - Basic machine instruction scheduling. This is
 555 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
 556 // virtual registers.
 557 // ===----------------------------------------------------------------------===/
 558
 559 // Provide a vtable anchor.
 560 ScheduleDAGMI::~ScheduleDAGMI() = default;
 561
 562 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
 563   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
 564 }
 565
 566 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 567   if (SuccSU != &ExitSU) {
 568     // Do not use WillCreateCycle, it assumes SD scheduling.
 569     // If Pred is reachable from Succ, then the edge creates a cycle.
 570     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 571       return false;
 572     Topo.AddPred(SuccSU, PredDep.getSUnit());
 573   }
 574   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 575   // Return true regardless of whether a new edge needed to be inserted.
 576   return true;
 577 }
 578
 579 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 580 /// NumPredsLeft reaches zero, release the successor node.
 581 ///
 582 /// FIXME: Adjust SuccSU height based on MinLatency.
 583 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 584   SUnit *SuccSU = SuccEdge->getSUnit();
 585
 586   if (SuccEdge->isWeak()) {
 587     --SuccSU->WeakPredsLeft;
 588     if (SuccEdge->isCluster())
 589       NextClusterSucc = SuccSU;
 590     return;
 591   }
 592 #ifndef NDEBUG
 593   if (SuccSU->NumPredsLeft == 0) {
 594     dbgs() << "*** Scheduling failed! ***\n";
 595     SuccSU->dump(this);
 596     dbgs() << " has been released too many times!\n";
 597     llvm_unreachable(nullptr);
 598   }
 599 #endif
 600   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
 601   // CurrCycle may have advanced since then.
 602   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
 603     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
 604
 605   --SuccSU->NumPredsLeft;
 606   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 607     SchedImpl->releaseTopNode(SuccSU);
 608 }
 609
 610 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 611 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 612   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 613        I != E; ++I) {
 614     releaseSucc(SU, &*I);
 615   }
 616 }
 617
 618 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 619 /// NumSuccsLeft reaches zero, release the predecessor node.
 620 ///
 621 /// FIXME: Adjust PredSU height based on MinLatency.
 622 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 623   SUnit *PredSU = PredEdge->getSUnit();
 624
 625   if (PredEdge->isWeak()) {
 626     --PredSU->WeakSuccsLeft;
 627     if (PredEdge->isCluster())
 628       NextClusterPred = PredSU;
 629     return;
 630   }
 631 #ifndef NDEBUG
 632   if (PredSU->NumSuccsLeft == 0) {
 633     dbgs() << "*** Scheduling failed! ***\n";
 634     PredSU->dump(this);
 635     dbgs() << " has been released too many times!\n";
 636     llvm_unreachable(nullptr);
 637   }
 638 #endif
 639   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
 640   // CurrCycle may have advanced since then.
 641   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
 642     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
 643
 644   --PredSU->NumSuccsLeft;
 645   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 646     SchedImpl->releaseBottomNode(PredSU);
 647 }
 648
 649 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 650 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 651   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 652        I != E; ++I) {
 653     releasePred(SU, &*I);
 654   }
 655 }
 656
 657 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 658 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 659 /// the region, including the boundary itself and single-instruction regions
 660 /// that don't get scheduled.
 661 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 662                                      MachineBasicBlock::iterator begin,
 663                                      MachineBasicBlock::iterator end,
 664                                      unsigned regioninstrs)
 665 {
 666   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
 667
 668   SchedImpl->initPolicy(begin, end, regioninstrs);
 669 }
 670
 671 /// This is normally called from the main scheduler loop but may also be invoked
 672 /// by the scheduling strategy to perform additional code motion.
 673 void ScheduleDAGMI::moveInstruction(
 674   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
 675   // Advance RegionBegin if the first instruction moves down.
 676   if (&*RegionBegin == MI)
 677     ++RegionBegin;
 678
 679   // Update the instruction stream.
 680   BB->splice(InsertPos, BB, MI);
 681
 682   // Update LiveIntervals
 683   if (LIS)
 684     LIS->handleMove(*MI, /*UpdateFlags=*/true);
 685
 686   // Recede RegionBegin if an instruction moves above the first.
 687   if (RegionBegin == InsertPos)
 688     RegionBegin = MI;
 689 }
 690
 691 bool ScheduleDAGMI::checkSchedLimit() {
 692 #ifndef NDEBUG
 693   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 694     CurrentTop = CurrentBottom;
 695     return false;
 696   }
 697   ++NumInstrsScheduled;
 698 #endif
 699   return true;
 700 }
 701
 702 /// Per-region scheduling driver, called back from
 703 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
 704 /// does not consider liveness or register pressure. It is useful for PostRA
 705 /// scheduling and potentially other custom schedulers.
 706 void ScheduleDAGMI::schedule() {
 707   DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
 708   DEBUG(SchedImpl->dumpPolicy());
 709
 710   // Build the DAG.
 711   buildSchedGraph(AA);
 712
 713   Topo.InitDAGTopologicalSorting();
 714
 715   postprocessDAG();
 716
 717   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 718   findRootsAndBiasEdges(TopRoots, BotRoots);
 719
 720   // Initialize the strategy before modifying the DAG.
 721   // This may initialize a DFSResult to be used for queue priority.
 722   SchedImpl->initialize(this);
 723
 724   DEBUG(
 725     if (EntrySU.getInstr() != nullptr)
 726       EntrySU.dumpAll(this);
 727     for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 728       SUnits[su].dumpAll(this);
 729     if (ExitSU.getInstr() != nullptr)
 730       ExitSU.dumpAll(this);
 731   );
 732   if (ViewMISchedDAGs) viewGraph();
 733
 734   // Initialize ready queues now that the DAG and priority data are finalized.
 735   initQueues(TopRoots, BotRoots);
 736
 737   bool IsTopNode = false;
 738   while (true) {
 739     DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
 740     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 741     if (!SU) break;
 742
 743     assert(!SU->isScheduled && "Node already scheduled");
 744     if (!checkSchedLimit())
 745       break;
 746
 747     MachineInstr *MI = SU->getInstr();
 748     if (IsTopNode) {
 749       assert(SU->isTopReady() && "node still has unscheduled dependencies");
 750       if (&*CurrentTop == MI)
 751         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 752       else
 753         moveInstruction(MI, CurrentTop);
 754     } else {
 755       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 756       MachineBasicBlock::iterator priorII =
 757         priorNonDebug(CurrentBottom, CurrentTop);
 758       if (&*priorII == MI)
 759         CurrentBottom = priorII;
 760       else {
 761         if (&*CurrentTop == MI)
 762           CurrentTop = nextIfDebug(++CurrentTop, priorII);
 763         moveInstruction(MI, CurrentBottom);
 764         CurrentBottom = MI;
 765       }
 766     }
 767     // Notify the scheduling strategy before updating the DAG.
 768     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
 769     // runs, it can then use the accurate ReadyCycle time to determine whether
 770     // newly released nodes can move to the readyQ.
 771     SchedImpl->schedNode(SU, IsTopNode);
 772
 773     updateQueues(SU, IsTopNode);
 774   }
 775   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 776
 777   placeDebugValues();
 778
 779   DEBUG({
 780       unsigned BBNum = begin()->getParent()->getNumber();
 781       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 782       dumpSchedule();
 783       dbgs() << '\n';
 784     });
 785 }
 786
 787 /// Apply each ScheduleDAGMutation step in order.
 788 void ScheduleDAGMI::postprocessDAG() {
 789   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 790     Mutations[i]->apply(this);
 791   }
 792 }
 793
 794 void ScheduleDAGMI::
 795 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 796                       SmallVectorImpl<SUnit*> &BotRoots) {
 797   for (std::vector<SUnit>::iterator
 798          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 799     SUnit *SU = &(*I);
 800     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 801
 802     // Order predecessors so DFSResult follows the critical path.
 803     SU->biasCriticalPath();
 804
 805     // A SUnit is ready to top schedule if it has no predecessors.
 806     if (!I->NumPredsLeft)
 807       TopRoots.push_back(SU);
 808     // A SUnit is ready to bottom schedule if it has no successors.
 809     if (!I->NumSuccsLeft)
 810       BotRoots.push_back(SU);
 811   }
 812   ExitSU.biasCriticalPath();
 813 }
 814
 815 /// Identify DAG roots and setup scheduler queues.
 816 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 817                                ArrayRef<SUnit*> BotRoots) {
 818   NextClusterSucc = nullptr;
 819   NextClusterPred = nullptr;
 820
 821   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 822   //
 823   // Nodes with unreleased weak edges can still be roots.
 824   // Release top roots in forward order.
 825   for (SmallVectorImpl<SUnit*>::const_iterator
 826          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 827     SchedImpl->releaseTopNode(*I);
 828   }
 829   // Release bottom roots in reverse order so the higher priority nodes appear
 830   // first. This is more natural and slightly more efficient.
 831   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 832          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 833     SchedImpl->releaseBottomNode(*I);
 834   }
 835
 836   releaseSuccessors(&EntrySU);
 837   releasePredecessors(&ExitSU);
 838
 839   SchedImpl->registerRoots();
 840
 841   // Advance past initial DebugValues.
 842   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 843   CurrentBottom = RegionEnd;
 844 }
 845
 846 /// Update scheduler queues after scheduling an instruction.
 847 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 848   // Release dependent instructions for scheduling.
 849   if (IsTopNode)
 850     releaseSuccessors(SU);
 851   else
 852     releasePredecessors(SU);
 853
 854   SU->isScheduled = true;
 855 }
 856
 857 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 858 void ScheduleDAGMI::placeDebugValues() {
 859   // If first instruction was a DBG_VALUE then put it back.
 860   if (FirstDbgValue) {
 861     BB->splice(RegionBegin, BB, FirstDbgValue);
 862     RegionBegin = FirstDbgValue;
 863   }
 864
 865   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
 866          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 867     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
 868     MachineInstr *DbgValue = P.first;
 869     MachineBasicBlock::iterator OrigPrevMI = P.second;
 870     if (&*RegionBegin == DbgValue)
 871       ++RegionBegin;
 872     BB->splice(++OrigPrevMI, BB, DbgValue);
 873     if (OrigPrevMI == std::prev(RegionEnd))
 874       RegionEnd = DbgValue;
 875   }
 876   DbgValues.clear();
 877   FirstDbgValue = nullptr;
 878 }
 879
 880 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 881 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
 882   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 883     if (SUnit *SU = getSUnit(&(*MI)))
 884       SU->dump(this);
 885     else
 886       dbgs() << "Missing SUnit\n";
 887   }
 888 }
 889 #endif
 890
 891 //===----------------------------------------------------------------------===//
 892 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
 893 // preservation.
 894 //===----------------------------------------------------------------------===//
 895
 896 ScheduleDAGMILive::~ScheduleDAGMILive() {
 897   delete DFSResult;
 898 }
 899
 900 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
 901   const MachineInstr &MI = *SU.getInstr();
 902   for (const MachineOperand &MO : MI.operands()) {
 903     if (!MO.isReg())
 904       continue;
 905     if (!MO.readsReg())
 906       continue;
 907     if (TrackLaneMasks && !MO.isUse())
 908       continue;
 909
 910     unsigned Reg = MO.getReg();
 911     if (!TargetRegisterInfo::isVirtualRegister(Reg))
 912       continue;
 913
 914     // Ignore re-defs.
 915     if (TrackLaneMasks) {
 916       bool FoundDef = false;
 917       for (const MachineOperand &MO2 : MI.operands()) {
 918         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
 919           FoundDef = true;
 920           break;
 921         }
 922       }
 923       if (FoundDef)
 924         continue;
 925     }
 926
 927     // Record this local VReg use.
 928     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
 929     for (; UI != VRegUses.end(); ++UI) {
 930       if (UI->SU == &SU)
 931         break;
 932     }
 933     if (UI == VRegUses.end())
 934       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
 935   }
 936 }
 937
 938 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 939 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 940 /// the region, including the boundary itself and single-instruction regions
 941 /// that don't get scheduled.
 942 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 943                                 MachineBasicBlock::iterator begin,
 944                                 MachineBasicBlock::iterator end,
 945                                 unsigned regioninstrs)
 946 {
 947   // ScheduleDAGMI initializes SchedImpl's per-region policy.
 948   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 949
 950   // For convenience remember the end of the liveness region.
 951   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
 952
 953   SUPressureDiffs.clear();
 954
 955   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 956   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
 957
 958   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
 959          "ShouldTrackLaneMasks requires ShouldTrackPressure");
 960 }
 961
 962 // Setup the register pressure trackers for the top scheduled top and bottom
 963 // scheduled regions.
 964 void ScheduleDAGMILive::initRegPressure() {
 965   VRegUses.clear();
 966   VRegUses.setUniverse(MRI.getNumVirtRegs());
 967   for (SUnit &SU : SUnits)
 968     collectVRegUses(SU);
 969
 970   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
 971                     ShouldTrackLaneMasks, false);
 972   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
 973                     ShouldTrackLaneMasks, false);
 974
 975   // Close the RPTracker to finalize live ins.
 976   RPTracker.closeRegion();
 977
 978   DEBUG(RPTracker.dump());
 979
 980   // Initialize the live ins and live outs.
 981   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 982   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 983
 984   // Close one end of the tracker so we can call
 985   // getMaxUpward/DownwardPressureDelta before advancing across any
 986   // instructions. This converts currently live regs into live ins/outs.
 987   TopRPTracker.closeTop();
 988   BotRPTracker.closeBottom();
 989
 990   BotRPTracker.initLiveThru(RPTracker);
 991   if (!BotRPTracker.getLiveThru().empty()) {
 992     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
 993     DEBUG(dbgs() << "Live Thru: ";
 994           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
 995   };
 996
 997   // For each live out vreg reduce the pressure change associated with other
 998   // uses of the same vreg below the live-out reaching def.
 999   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1000
1001   // Account for liveness generated by the region boundary.
1002   if (LiveRegionEnd != RegionEnd) {
1003     SmallVector<RegisterMaskPair, 8> LiveUses;
1004     BotRPTracker.recede(&LiveUses);
1005     updatePressureDiffs(LiveUses);
1006   }
1007
1008   DEBUG(
1009     dbgs() << "Top Pressure:\n";
1010     dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1011     dbgs() << "Bottom Pressure:\n";
1012     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1013   );
1014
1015   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
1016
1017   // Cache the list of excess pressure sets in this region. This will also track
1018   // the max pressure in the scheduled code for these sets.
1019   RegionCriticalPSets.clear();
1020   const std::vector<unsigned> &RegionPressure =
1021     RPTracker.getPressure().MaxSetPressure;
1022   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1023     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1024     if (RegionPressure[i] > Limit) {
1025       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1026             << " Limit " << Limit
1027             << " Actual " << RegionPressure[i] << "\n");
1028       RegionCriticalPSets.push_back(PressureChange(i));
1029     }
1030   }
1031   DEBUG(dbgs() << "Excess PSets: ";
1032         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
1033           dbgs() << TRI->getRegPressureSetName(
1034             RegionCriticalPSets[i].getPSet()) << " ";
1035         dbgs() << "\n");
1036 }
1037
1038 void ScheduleDAGMILive::
1039 updateScheduledPressure(const SUnit *SU,
1040                         const std::vector<unsigned> &NewMaxPressure) {
1041   const PressureDiff &PDiff = getPressureDiff(SU);
1042   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1043   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
1044        I != E; ++I) {
1045     if (!I->isValid())
1046       break;
1047     unsigned ID = I->getPSet();
1048     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1049       ++CritIdx;
1050     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1051       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1052           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1053         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1054     }
1055     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1056     if (NewMaxPressure[ID] >= Limit - 2) {
1057       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1058             << NewMaxPressure[ID]
1059             << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1060             << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1061     }
1062   }
1063 }
1064
1065 /// Update the PressureDiff array for liveness after scheduling this
1066 /// instruction.
1067 void ScheduleDAGMILive::updatePressureDiffs(
1068     ArrayRef<RegisterMaskPair> LiveUses) {
1069   for (const RegisterMaskPair &P : LiveUses) {
1070     unsigned Reg = P.RegUnit;
1071     /// FIXME: Currently assuming single-use physregs.
1072     if (!TRI->isVirtualRegister(Reg))
1073       continue;
1074
1075     if (ShouldTrackLaneMasks) {
1076       // If the register has just become live then other uses won't change
1077       // this fact anymore => decrement pressure.
1078       // If the register has just become dead then other uses make it come
1079       // back to life => increment pressure.
1080       bool Decrement = P.LaneMask.any();
1081
1082       for (const VReg2SUnit &V2SU
1083            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1084         SUnit &SU = *V2SU.SU;
1085         if (SU.isScheduled || &SU == &ExitSU)
1086           continue;
1087
1088         PressureDiff &PDiff = getPressureDiff(&SU);
1089         PDiff.addPressureChange(Reg, Decrement, &MRI);
1090         DEBUG(
1091           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1092                  << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1093                  << ' ' << *SU.getInstr();
1094           dbgs() << "              to ";
1095           PDiff.dump(*TRI);
1096         );
1097       }
1098     } else {
1099       assert(P.LaneMask.any());
1100       DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
1101       // This may be called before CurrentBottom has been initialized. However,
1102       // BotRPTracker must have a valid position. We want the value live into the
1103       // instruction or live out of the block, so ask for the previous
1104       // instruction's live-out.
1105       const LiveInterval &LI = LIS->getInterval(Reg);
1106       VNInfo *VNI;
1107       MachineBasicBlock::const_iterator I =
1108         nextIfDebug(BotRPTracker.getPos(), BB->end());
1109       if (I == BB->end())
1110         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1111       else {
1112         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1113         VNI = LRQ.valueIn();
1114       }
1115       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1116       assert(VNI && "No live value at use.");
1117       for (const VReg2SUnit &V2SU
1118            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1119         SUnit *SU = V2SU.SU;
1120         // If this use comes before the reaching def, it cannot be a last use,
1121         // so decrease its pressure change.
1122         if (!SU->isScheduled && SU != &ExitSU) {
1123           LiveQueryResult LRQ =
1124               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1125           if (LRQ.valueIn() == VNI) {
1126             PressureDiff &PDiff = getPressureDiff(SU);
1127             PDiff.addPressureChange(Reg, true, &MRI);
1128             DEBUG(
1129               dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1130                      << *SU->getInstr();
1131               dbgs() << "              to ";
1132               PDiff.dump(*TRI);
1133             );
1134           }
1135         }
1136       }
1137     }
1138   }
1139 }
1140
1141 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1142 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1143 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1144 ///
1145 /// This is a skeletal driver, with all the functionality pushed into helpers,
1146 /// so that it can be easily extended by experimental schedulers. Generally,
1147 /// implementing MachineSchedStrategy should be sufficient to implement a new
1148 /// scheduling algorithm. However, if a scheduler further subclasses
1149 /// ScheduleDAGMILive then it will want to override this virtual method in order
1150 /// to update any specialized state.
1151 void ScheduleDAGMILive::schedule() {
1152   DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1153   DEBUG(SchedImpl->dumpPolicy());
1154   buildDAGWithRegPressure();
1155
1156   Topo.InitDAGTopologicalSorting();
1157
1158   postprocessDAG();
1159
1160   SmallVector<SUnit*, 8> TopRoots, BotRoots;
1161   findRootsAndBiasEdges(TopRoots, BotRoots);
1162
1163   // Initialize the strategy before modifying the DAG.
1164   // This may initialize a DFSResult to be used for queue priority.
1165   SchedImpl->initialize(this);
1166
1167   DEBUG(
1168     if (EntrySU.getInstr() != nullptr)
1169       EntrySU.dumpAll(this);
1170     for (const SUnit &SU : SUnits) {
1171       SU.dumpAll(this);
1172       if (ShouldTrackPressure) {
1173         dbgs() << "  Pressure Diff      : ";
1174         getPressureDiff(&SU).dump(*TRI);
1175       }
1176       dbgs() << "  Single Issue       : ";
1177       if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1178          SchedModel.mustEndGroup(SU.getInstr()))
1179         dbgs() << "true;";
1180       else
1181         dbgs() << "false;";
1182       dbgs() << '\n';
1183     }
1184     if (ExitSU.getInstr() != nullptr)
1185       ExitSU.dumpAll(this);
1186   );
1187   if (ViewMISchedDAGs) viewGraph();
1188
1189   // Initialize ready queues now that the DAG and priority data are finalized.
1190   initQueues(TopRoots, BotRoots);
1191
1192   bool IsTopNode = false;
1193   while (true) {
1194     DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1195     SUnit *SU = SchedImpl->pickNode(IsTopNode);
1196     if (!SU) break;
1197
1198     assert(!SU->isScheduled && "Node already scheduled");
1199     if (!checkSchedLimit())
1200       break;
1201
1202     scheduleMI(SU, IsTopNode);
1203
1204     if (DFSResult) {
1205       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1206       if (!ScheduledTrees.test(SubtreeID)) {
1207         ScheduledTrees.set(SubtreeID);
1208         DFSResult->scheduleTree(SubtreeID);
1209         SchedImpl->scheduleTree(SubtreeID);
1210       }
1211     }
1212
1213     // Notify the scheduling strategy after updating the DAG.
1214     SchedImpl->schedNode(SU, IsTopNode);
1215
1216     updateQueues(SU, IsTopNode);
1217   }
1218   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1219
1220   placeDebugValues();
1221
1222   DEBUG({
1223       unsigned BBNum = begin()->getParent()->getNumber();
1224       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1225       dumpSchedule();
1226       dbgs() << '\n';
1227     });
1228 }
1229
1230 /// Build the DAG and setup three register pressure trackers.
1231 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1232   if (!ShouldTrackPressure) {
1233     RPTracker.reset();
1234     RegionCriticalPSets.clear();
1235     buildSchedGraph(AA);
1236     return;
1237   }
1238
1239   // Initialize the register pressure tracker used by buildSchedGraph.
1240   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1241                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1242
1243   // Account for liveness generate by the region boundary.
1244   if (LiveRegionEnd != RegionEnd)
1245     RPTracker.recede();
1246
1247   // Build the DAG, and compute current register pressure.
1248   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1249
1250   // Initialize top/bottom trackers after computing region pressure.
1251   initRegPressure();
1252 }
1253
1254 void ScheduleDAGMILive::computeDFSResult() {
1255   if (!DFSResult)
1256     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1257   DFSResult->clear();
1258   ScheduledTrees.clear();
1259   DFSResult->resize(SUnits.size());
1260   DFSResult->compute(SUnits);
1261   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1262 }
1263
1264 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1265 /// only provides the critical path for single block loops. To handle loops that
1266 /// span blocks, we could use the vreg path latencies provided by
1267 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1268 /// available for use in the scheduler.
1269 ///
1270 /// The cyclic path estimation identifies a def-use pair that crosses the back
1271 /// edge and considers the depth and height of the nodes. For example, consider
1272 /// the following instruction sequence where each instruction has unit latency
1273 /// and defines an epomymous virtual register:
1274 ///
1275 /// a->b(a,c)->c(b)->d(c)->exit
1276 ///
1277 /// The cyclic critical path is a two cycles: b->c->b
1278 /// The acyclic critical path is four cycles: a->b->c->d->exit
1279 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1280 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1281 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1282 /// LiveInDepth = depth(b) = len(a->b) = 1
1283 ///
1284 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1285 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1286 /// CyclicCriticalPath = min(2, 2) = 2
1287 ///
1288 /// This could be relevant to PostRA scheduling, but is currently implemented
1289 /// assuming LiveIntervals.
1290 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1291   // This only applies to single block loop.
1292   if (!BB->isSuccessor(BB))
1293     return 0;
1294
1295   unsigned MaxCyclicLatency = 0;
1296   // Visit each live out vreg def to find def/use pairs that cross iterations.
1297   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1298     unsigned Reg = P.RegUnit;
1299     if (!TRI->isVirtualRegister(Reg))
1300         continue;
1301     const LiveInterval &LI = LIS->getInterval(Reg);
1302     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1303     if (!DefVNI)
1304       continue;
1305
1306     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1307     const SUnit *DefSU = getSUnit(DefMI);
1308     if (!DefSU)
1309       continue;
1310
1311     unsigned LiveOutHeight = DefSU->getHeight();
1312     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1313     // Visit all local users of the vreg def.
1314     for (const VReg2SUnit &V2SU
1315          : make_range(VRegUses.find(Reg), VRegUses.end())) {
1316       SUnit *SU = V2SU.SU;
1317       if (SU == &ExitSU)
1318         continue;
1319
1320       // Only consider uses of the phi.
1321       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1322       if (!LRQ.valueIn()->isPHIDef())
1323         continue;
1324
1325       // Assume that a path spanning two iterations is a cycle, which could
1326       // overestimate in strange cases. This allows cyclic latency to be
1327       // estimated as the minimum slack of the vreg's depth or height.
1328       unsigned CyclicLatency = 0;
1329       if (LiveOutDepth > SU->getDepth())
1330         CyclicLatency = LiveOutDepth - SU->getDepth();
1331
1332       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1333       if (LiveInHeight > LiveOutHeight) {
1334         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1335           CyclicLatency = LiveInHeight - LiveOutHeight;
1336       } else
1337         CyclicLatency = 0;
1338
1339       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1340             << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1341       if (CyclicLatency > MaxCyclicLatency)
1342         MaxCyclicLatency = CyclicLatency;
1343     }
1344   }
1345   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1346   return MaxCyclicLatency;
1347 }
1348
1349 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1350 /// the Top RP tracker in case the region beginning has changed.
1351 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1352                                    ArrayRef<SUnit*> BotRoots) {
1353   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1354   if (ShouldTrackPressure) {
1355     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1356     TopRPTracker.setPos(CurrentTop);
1357   }
1358 }
1359
1360 /// Move an instruction and update register pressure.
1361 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1362   // Move the instruction to its new location in the instruction stream.
1363   MachineInstr *MI = SU->getInstr();
1364
1365   if (IsTopNode) {
1366     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1367     if (&*CurrentTop == MI)
1368       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1369     else {
1370       moveInstruction(MI, CurrentTop);
1371       TopRPTracker.setPos(MI);
1372     }
1373
1374     if (ShouldTrackPressure) {
1375       // Update top scheduled pressure.
1376       RegisterOperands RegOpers;
1377       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1378       if (ShouldTrackLaneMasks) {
1379         // Adjust liveness and add missing dead+read-undef flags.
1380         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1381         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1382       } else {
1383         // Adjust for missing dead-def flags.
1384         RegOpers.detectDeadDefs(*MI, *LIS);
1385       }
1386
1387       TopRPTracker.advance(RegOpers);
1388       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1389       DEBUG(
1390         dbgs() << "Top Pressure:\n";
1391         dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1392       );
1393
1394       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1395     }
1396   } else {
1397     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1398     MachineBasicBlock::iterator priorII =
1399       priorNonDebug(CurrentBottom, CurrentTop);
1400     if (&*priorII == MI)
1401       CurrentBottom = priorII;
1402     else {
1403       if (&*CurrentTop == MI) {
1404         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1405         TopRPTracker.setPos(CurrentTop);
1406       }
1407       moveInstruction(MI, CurrentBottom);
1408       CurrentBottom = MI;
1409     }
1410     if (ShouldTrackPressure) {
1411       RegisterOperands RegOpers;
1412       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1413       if (ShouldTrackLaneMasks) {
1414         // Adjust liveness and add missing dead+read-undef flags.
1415         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1416         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1417       } else {
1418         // Adjust for missing dead-def flags.
1419         RegOpers.detectDeadDefs(*MI, *LIS);
1420       }
1421
1422       BotRPTracker.recedeSkipDebugValues();
1423       SmallVector<RegisterMaskPair, 8> LiveUses;
1424       BotRPTracker.recede(RegOpers, &LiveUses);
1425       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1426       DEBUG(
1427         dbgs() << "Bottom Pressure:\n";
1428         dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1429       );
1430
1431       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1432       updatePressureDiffs(LiveUses);
1433     }
1434   }
1435 }
1436
1437 //===----------------------------------------------------------------------===//
1438 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1439 //===----------------------------------------------------------------------===//
1440
1441 namespace {
1442
1443 /// \brief Post-process the DAG to create cluster edges between neighboring
1444 /// loads or between neighboring stores.
1445 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1446   struct MemOpInfo {
1447     SUnit *SU;
1448     unsigned BaseReg;
1449     int64_t Offset;
1450
1451     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1452         : SU(su), BaseReg(reg), Offset(ofs) {}
1453
1454     bool operator<(const MemOpInfo&RHS) const {
1455       return std::tie(BaseReg, Offset, SU->NodeNum) <
1456              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1457     }
1458   };
1459
1460   const TargetInstrInfo *TII;
1461   const TargetRegisterInfo *TRI;
1462   bool IsLoad;
1463
1464 public:
1465   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1466                            const TargetRegisterInfo *tri, bool IsLoad)
1467       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1468
1469   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1470
1471 protected:
1472   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1473 };
1474
1475 class StoreClusterMutation : public BaseMemOpClusterMutation {
1476 public:
1477   StoreClusterMutation(const TargetInstrInfo *tii,
1478                        const TargetRegisterInfo *tri)
1479       : BaseMemOpClusterMutation(tii, tri, false) {}
1480 };
1481
1482 class LoadClusterMutation : public BaseMemOpClusterMutation {
1483 public:
1484   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1485       : BaseMemOpClusterMutation(tii, tri, true) {}
1486 };
1487
1488 } // end anonymous namespace
1489
1490 namespace llvm {
1491
1492 std::unique_ptr<ScheduleDAGMutation>
1493 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1494                              const TargetRegisterInfo *TRI) {
1495   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1496                             : nullptr;
1497 }
1498
1499 std::unique_ptr<ScheduleDAGMutation>
1500 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1501                               const TargetRegisterInfo *TRI) {
1502   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1503                             : nullptr;
1504 }
1505
1506 } // end namespace llvm
1507
1508 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1509     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1510   SmallVector<MemOpInfo, 32> MemOpRecords;
1511   for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
1512     SUnit *SU = MemOps[Idx];
1513     unsigned BaseReg;
1514     int64_t Offset;
1515     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1516       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1517   }
1518   if (MemOpRecords.size() < 2)
1519     return;
1520
1521   std::sort(MemOpRecords.begin(), MemOpRecords.end());
1522   unsigned ClusterLength = 1;
1523   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1524     if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
1525       ClusterLength = 1;
1526       continue;
1527     }
1528
1529     SUnit *SUa = MemOpRecords[Idx].SU;
1530     SUnit *SUb = MemOpRecords[Idx+1].SU;
1531     if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
1532                                  ClusterLength) &&
1533         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1534       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1535             << SUb->NodeNum << ")\n");
1536       // Copy successor edges from SUa to SUb. Interleaving computation
1537       // dependent on SUa can prevent load combining due to register reuse.
1538       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1539       // loads should have effectively the same inputs.
1540       for (SUnit::const_succ_iterator
1541              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1542         if (SI->getSUnit() == SUb)
1543           continue;
1544         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1545         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1546       }
1547       ++ClusterLength;
1548     } else
1549       ClusterLength = 1;
1550   }
1551 }
1552
1553 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1554 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
1555
1556   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1557
1558   // Map DAG NodeNum to store chain ID.
1559   DenseMap<unsigned, unsigned> StoreChainIDs;
1560   // Map each store chain to a set of dependent MemOps.
1561   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1562   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1563     SUnit *SU = &DAG->SUnits[Idx];
1564     if ((IsLoad && !SU->getInstr()->mayLoad()) ||
1565         (!IsLoad && !SU->getInstr()->mayStore()))
1566       continue;
1567
1568     unsigned ChainPredID = DAG->SUnits.size();
1569     for (SUnit::const_pred_iterator
1570            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1571       if (PI->isCtrl()) {
1572         ChainPredID = PI->getSUnit()->NodeNum;
1573         break;
1574       }
1575     }
1576     // Check if this chain-like pred has been seen
1577     // before. ChainPredID==MaxNodeID at the top of the schedule.
1578     unsigned NumChains = StoreChainDependents.size();
1579     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1580       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1581     if (Result.second)
1582       StoreChainDependents.resize(NumChains + 1);
1583     StoreChainDependents[Result.first->second].push_back(SU);
1584   }
1585
1586   // Iterate over the store chains.
1587   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1588     clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
1589 }
1590
1591 //===----------------------------------------------------------------------===//
1592 // CopyConstrain - DAG post-processing to encourage copy elimination.
1593 //===----------------------------------------------------------------------===//
1594
1595 namespace {
1596
1597 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1598 /// the one use that defines the copy's source vreg, most likely an induction
1599 /// variable increment.
1600 class CopyConstrain : public ScheduleDAGMutation {
1601   // Transient state.
1602   SlotIndex RegionBeginIdx;
1603   // RegionEndIdx is the slot index of the last non-debug instruction in the
1604   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1605   SlotIndex RegionEndIdx;
1606
1607 public:
1608   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1609
1610   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1611
1612 protected:
1613   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1614 };
1615
1616 } // end anonymous namespace
1617
1618 namespace llvm {
1619
1620 std::unique_ptr<ScheduleDAGMutation>
1621 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1622                                const TargetRegisterInfo *TRI) {
1623   return llvm::make_unique<CopyConstrain>(TII, TRI);
1624 }
1625
1626 } // end namespace llvm
1627
1628 /// constrainLocalCopy handles two possibilities:
1629 /// 1) Local src:
1630 /// I0:     = dst
1631 /// I1: src = ...
1632 /// I2:     = dst
1633 /// I3: dst = src (copy)
1634 /// (create pred->succ edges I0->I1, I2->I1)
1635 ///
1636 /// 2) Local copy:
1637 /// I0: dst = src (copy)
1638 /// I1:     = dst
1639 /// I2: src = ...
1640 /// I3:     = dst
1641 /// (create pred->succ edges I1->I2, I3->I2)
1642 ///
1643 /// Although the MachineScheduler is currently constrained to single blocks,
1644 /// this algorithm should handle extended blocks. An EBB is a set of
1645 /// contiguously numbered blocks such that the previous block in the EBB is
1646 /// always the single predecessor.
1647 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1648   LiveIntervals *LIS = DAG->getLIS();
1649   MachineInstr *Copy = CopySU->getInstr();
1650
1651   // Check for pure vreg copies.
1652   const MachineOperand &SrcOp = Copy->getOperand(1);
1653   unsigned SrcReg = SrcOp.getReg();
1654   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1655     return;
1656
1657   const MachineOperand &DstOp = Copy->getOperand(0);
1658   unsigned DstReg = DstOp.getReg();
1659   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1660     return;
1661
1662   // Check if either the dest or source is local. If it's live across a back
1663   // edge, it's not local. Note that if both vregs are live across the back
1664   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1665   // If both the copy's source and dest are local live intervals, then we
1666   // should treat the dest as the global for the purpose of adding
1667   // constraints. This adds edges from source's other uses to the copy.
1668   unsigned LocalReg = SrcReg;
1669   unsigned GlobalReg = DstReg;
1670   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1671   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1672     LocalReg = DstReg;
1673     GlobalReg = SrcReg;
1674     LocalLI = &LIS->getInterval(LocalReg);
1675     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1676       return;
1677   }
1678   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1679
1680   // Find the global segment after the start of the local LI.
1681   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1682   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1683   // local live range. We could create edges from other global uses to the local
1684   // start, but the coalescer should have already eliminated these cases, so
1685   // don't bother dealing with it.
1686   if (GlobalSegment == GlobalLI->end())
1687     return;
1688
1689   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1690   // returned the next global segment. But if GlobalSegment overlaps with
1691   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1692   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1693   if (GlobalSegment->contains(LocalLI->beginIndex()))
1694     ++GlobalSegment;
1695
1696   if (GlobalSegment == GlobalLI->end())
1697     return;
1698
1699   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1700   if (GlobalSegment != GlobalLI->begin()) {
1701     // Two address defs have no hole.
1702     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1703                                GlobalSegment->start)) {
1704       return;
1705     }
1706     // If the prior global segment may be defined by the same two-address
1707     // instruction that also defines LocalLI, then can't make a hole here.
1708     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1709                                LocalLI->beginIndex())) {
1710       return;
1711     }
1712     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1713     // it would be a disconnected component in the live range.
1714     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1715            "Disconnected LRG within the scheduling region.");
1716   }
1717   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1718   if (!GlobalDef)
1719     return;
1720
1721   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1722   if (!GlobalSU)
1723     return;
1724
1725   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1726   // constraining the uses of the last local def to precede GlobalDef.
1727   SmallVector<SUnit*,8> LocalUses;
1728   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1729   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1730   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1731   for (SUnit::const_succ_iterator
1732          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1733        I != E; ++I) {
1734     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1735       continue;
1736     if (I->getSUnit() == GlobalSU)
1737       continue;
1738     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1739       return;
1740     LocalUses.push_back(I->getSUnit());
1741   }
1742   // Open the top of the GlobalLI hole by constraining any earlier global uses
1743   // to precede the start of LocalLI.
1744   SmallVector<SUnit*,8> GlobalUses;
1745   MachineInstr *FirstLocalDef =
1746     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1747   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1748   for (SUnit::const_pred_iterator
1749          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1750     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1751       continue;
1752     if (I->getSUnit() == FirstLocalSU)
1753       continue;
1754     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1755       return;
1756     GlobalUses.push_back(I->getSUnit());
1757   }
1758   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1759   // Add the weak edges.
1760   for (SmallVectorImpl<SUnit*>::const_iterator
1761          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1762     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1763           << GlobalSU->NodeNum << ")\n");
1764     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1765   }
1766   for (SmallVectorImpl<SUnit*>::const_iterator
1767          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1768     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1769           << FirstLocalSU->NodeNum << ")\n");
1770     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1771   }
1772 }
1773
1774 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1775 /// copy elimination.
1776 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1777   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1778   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1779
1780   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1781   if (FirstPos == DAG->end())
1782     return;
1783   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1784   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1785       *priorNonDebug(DAG->end(), DAG->begin()));
1786
1787   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1788     SUnit *SU = &DAG->SUnits[Idx];
1789     if (!SU->getInstr()->isCopy())
1790       continue;
1791
1792     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1793   }
1794 }
1795
1796 //===----------------------------------------------------------------------===//
1797 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1798 // and possibly other custom schedulers.
1799 //===----------------------------------------------------------------------===//
1800
1801 static const unsigned InvalidCycle = ~0U;
1802
1803 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1804
1805 void SchedBoundary::reset() {
1806   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1807   // Destroying and reconstructing it is very expensive though. So keep
1808   // invalid, placeholder HazardRecs.
1809   if (HazardRec && HazardRec->isEnabled()) {
1810     delete HazardRec;
1811     HazardRec = nullptr;
1812   }
1813   Available.clear();
1814   Pending.clear();
1815   CheckPending = false;
1816   CurrCycle = 0;
1817   CurrMOps = 0;
1818   MinReadyCycle = std::numeric_limits<unsigned>::max();
1819   ExpectedLatency = 0;
1820   DependentLatency = 0;
1821   RetiredMOps = 0;
1822   MaxExecutedResCount = 0;
1823   ZoneCritResIdx = 0;
1824   IsResourceLimited = false;
1825   ReservedCycles.clear();
1826 #ifndef NDEBUG
1827   // Track the maximum number of stall cycles that could arise either from the
1828   // latency of a DAG edge or the number of cycles that a processor resource is
1829   // reserved (SchedBoundary::ReservedCycles).
1830   MaxObservedStall = 0;
1831 #endif
1832   // Reserve a zero-count for invalid CritResIdx.
1833   ExecutedResCounts.resize(1);
1834   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1835 }
1836
1837 void SchedRemainder::
1838 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1839   reset();
1840   if (!SchedModel->hasInstrSchedModel())
1841     return;
1842   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1843   for (std::vector<SUnit>::iterator
1844          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1845     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1846     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1847       * SchedModel->getMicroOpFactor();
1848     for (TargetSchedModel::ProcResIter
1849            PI = SchedModel->getWriteProcResBegin(SC),
1850            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1851       unsigned PIdx = PI->ProcResourceIdx;
1852       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1853       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1854     }
1855   }
1856 }
1857
1858 void SchedBoundary::
1859 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1860   reset();
1861   DAG = dag;
1862   SchedModel = smodel;
1863   Rem = rem;
1864   if (SchedModel->hasInstrSchedModel()) {
1865     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1866     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1867   }
1868 }
1869
1870 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1871 /// these "soft stalls" differently than the hard stall cycles based on CPU
1872 /// resources and computed by checkHazard(). A fully in-order model
1873 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1874 /// available for scheduling until they are ready. However, a weaker in-order
1875 /// model may use this for heuristics. For example, if a processor has in-order
1876 /// behavior when reading certain resources, this may come into play.
1877 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1878   if (!SU->isUnbuffered)
1879     return 0;
1880
1881   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1882   if (ReadyCycle > CurrCycle)
1883     return ReadyCycle - CurrCycle;
1884   return 0;
1885 }
1886
1887 /// Compute the next cycle at which the given processor resource can be
1888 /// scheduled.
1889 unsigned SchedBoundary::
1890 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1891   unsigned NextUnreserved = ReservedCycles[PIdx];
1892   // If this resource has never been used, always return cycle zero.
1893   if (NextUnreserved == InvalidCycle)
1894     return 0;
1895   // For bottom-up scheduling add the cycles needed for the current operation.
1896   if (!isTop())
1897     NextUnreserved += Cycles;
1898   return NextUnreserved;
1899 }
1900
1901 /// Does this SU have a hazard within the current instruction group.
1902 ///
1903 /// The scheduler supports two modes of hazard recognition. The first is the
1904 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1905 /// supports highly complicated in-order reservation tables
1906 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1907 ///
1908 /// The second is a streamlined mechanism that checks for hazards based on
1909 /// simple counters that the scheduler itself maintains. It explicitly checks
1910 /// for instruction dispatch limitations, including the number of micro-ops that
1911 /// can dispatch per cycle.
1912 ///
1913 /// TODO: Also check whether the SU must start a new group.
1914 bool SchedBoundary::checkHazard(SUnit *SU) {
1915   if (HazardRec->isEnabled()
1916       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1917     return true;
1918   }
1919
1920   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1921   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1922     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1923           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1924     return true;
1925   }
1926
1927   if (CurrMOps > 0 &&
1928       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1929        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1930     DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
1931                  << (isTop()? "begin" : "end") << " group\n");
1932     return true;
1933   }
1934
1935   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1936     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1937     for (TargetSchedModel::ProcResIter
1938            PI = SchedModel->getWriteProcResBegin(SC),
1939            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1940       unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
1941       if (NRCycle > CurrCycle) {
1942 #ifndef NDEBUG
1943         MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
1944 #endif
1945         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
1946               << SchedModel->getResourceName(PI->ProcResourceIdx)
1947               << "=" << NRCycle << "c\n");
1948         return true;
1949       }
1950     }
1951   }
1952   return false;
1953 }
1954
1955 // Find the unscheduled node in ReadySUs with the highest latency.
1956 unsigned SchedBoundary::
1957 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1958   SUnit *LateSU = nullptr;
1959   unsigned RemLatency = 0;
1960   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1961        I != E; ++I) {
1962     unsigned L = getUnscheduledLatency(*I);
1963     if (L > RemLatency) {
1964       RemLatency = L;
1965       LateSU = *I;
1966     }
1967   }
1968   if (LateSU) {
1969     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1970           << LateSU->NodeNum << ") " << RemLatency << "c\n");
1971   }
1972   return RemLatency;
1973 }
1974
1975 // Count resources in this zone and the remaining unscheduled
1976 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1977 // resource index, or zero if the zone is issue limited.
1978 unsigned SchedBoundary::
1979 getOtherResourceCount(unsigned &OtherCritIdx) {
1980   OtherCritIdx = 0;
1981   if (!SchedModel->hasInstrSchedModel())
1982     return 0;
1983
1984   unsigned OtherCritCount = Rem->RemIssueCount
1985     + (RetiredMOps * SchedModel->getMicroOpFactor());
1986   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1987         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1988   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1989        PIdx != PEnd; ++PIdx) {
1990     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1991     if (OtherCount > OtherCritCount) {
1992       OtherCritCount = OtherCount;
1993       OtherCritIdx = PIdx;
1994     }
1995   }
1996   if (OtherCritIdx) {
1997     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1998           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1999           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2000   }
2001   return OtherCritCount;
2002 }
2003
2004 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2005   assert(SU->getInstr() && "Scheduled SUnit must have instr");
2006
2007 #ifndef NDEBUG
2008   // ReadyCycle was been bumped up to the CurrCycle when this node was
2009   // scheduled, but CurrCycle may have been eagerly advanced immediately after
2010   // scheduling, so may now be greater than ReadyCycle.
2011   if (ReadyCycle > CurrCycle)
2012     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2013 #endif
2014
2015   if (ReadyCycle < MinReadyCycle)
2016     MinReadyCycle = ReadyCycle;
2017
2018   // Check for interlocks first. For the purpose of other heuristics, an
2019   // instruction that cannot issue appears as if it's not in the ReadyQueue.
2020   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2021   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2022       Available.size() >= ReadyListLimit)
2023     Pending.push(SU);
2024   else
2025     Available.push(SU);
2026 }
2027
2028 /// Move the boundary of scheduled code by one cycle.
2029 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2030   if (SchedModel->getMicroOpBufferSize() == 0) {
2031     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2032            "MinReadyCycle uninitialized");
2033     if (MinReadyCycle > NextCycle)
2034       NextCycle = MinReadyCycle;
2035   }
2036   // Update the current micro-ops, which will issue in the next cycle.
2037   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2038   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2039
2040   // Decrement DependentLatency based on the next cycle.
2041   if ((NextCycle - CurrCycle) > DependentLatency)
2042     DependentLatency = 0;
2043   else
2044     DependentLatency -= (NextCycle - CurrCycle);
2045
2046   if (!HazardRec->isEnabled()) {
2047     // Bypass HazardRec virtual calls.
2048     CurrCycle = NextCycle;
2049   } else {
2050     // Bypass getHazardType calls in case of long latency.
2051     for (; CurrCycle != NextCycle; ++CurrCycle) {
2052       if (isTop())
2053         HazardRec->AdvanceCycle();
2054       else
2055         HazardRec->RecedeCycle();
2056     }
2057   }
2058   CheckPending = true;
2059   unsigned LFactor = SchedModel->getLatencyFactor();
2060   IsResourceLimited =
2061     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2062     > (int)LFactor;
2063
2064   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2065 }
2066
2067 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2068   ExecutedResCounts[PIdx] += Count;
2069   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2070     MaxExecutedResCount = ExecutedResCounts[PIdx];
2071 }
2072
2073 /// Add the given processor resource to this scheduled zone.
2074 ///
2075 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2076 /// during which this resource is consumed.
2077 ///
2078 /// \return the next cycle at which the instruction may execute without
2079 /// oversubscribing resources.
2080 unsigned SchedBoundary::
2081 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2082   unsigned Factor = SchedModel->getResourceFactor(PIdx);
2083   unsigned Count = Factor * Cycles;
2084   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
2085         << " +" << Cycles << "x" << Factor << "u\n");
2086
2087   // Update Executed resources counts.
2088   incExecutedResources(PIdx, Count);
2089   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2090   Rem->RemainingCounts[PIdx] -= Count;
2091
2092   // Check if this resource exceeds the current critical resource. If so, it
2093   // becomes the critical resource.
2094   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2095     ZoneCritResIdx = PIdx;
2096     DEBUG(dbgs() << "  *** Critical resource "
2097           << SchedModel->getResourceName(PIdx) << ": "
2098           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2099   }
2100   // For reserved resources, record the highest cycle using the resource.
2101   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2102   if (NextAvailable > CurrCycle) {
2103     DEBUG(dbgs() << "  Resource conflict: "
2104           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2105           << NextAvailable << "\n");
2106   }
2107   return NextAvailable;
2108 }
2109
2110 /// Move the boundary of scheduled code by one SUnit.
2111 void SchedBoundary::bumpNode(SUnit *SU) {
2112   // Update the reservation table.
2113   if (HazardRec->isEnabled()) {
2114     if (!isTop() && SU->isCall) {
2115       // Calls are scheduled with their preceding instructions. For bottom-up
2116       // scheduling, clear the pipeline state before emitting.
2117       HazardRec->Reset();
2118     }
2119     HazardRec->EmitInstruction(SU);
2120   }
2121   // checkHazard should prevent scheduling multiple instructions per cycle that
2122   // exceed the issue width.
2123   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2124   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2125   assert(
2126       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2127       "Cannot schedule this instruction's MicroOps in the current cycle.");
2128
2129   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2130   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2131
2132   unsigned NextCycle = CurrCycle;
2133   switch (SchedModel->getMicroOpBufferSize()) {
2134   case 0:
2135     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2136     break;
2137   case 1:
2138     if (ReadyCycle > NextCycle) {
2139       NextCycle = ReadyCycle;
2140       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2141     }
2142     break;
2143   default:
2144     // We don't currently model the OOO reorder buffer, so consider all
2145     // scheduled MOps to be "retired". We do loosely model in-order resource
2146     // latency. If this instruction uses an in-order resource, account for any
2147     // likely stall cycles.
2148     if (SU->isUnbuffered && ReadyCycle > NextCycle)
2149       NextCycle = ReadyCycle;
2150     break;
2151   }
2152   RetiredMOps += IncMOps;
2153
2154   // Update resource counts and critical resource.
2155   if (SchedModel->hasInstrSchedModel()) {
2156     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2157     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2158     Rem->RemIssueCount -= DecRemIssue;
2159     if (ZoneCritResIdx) {
2160       // Scale scheduled micro-ops for comparing with the critical resource.
2161       unsigned ScaledMOps =
2162         RetiredMOps * SchedModel->getMicroOpFactor();
2163
2164       // If scaled micro-ops are now more than the previous critical resource by
2165       // a full cycle, then micro-ops issue becomes critical.
2166       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2167           >= (int)SchedModel->getLatencyFactor()) {
2168         ZoneCritResIdx = 0;
2169         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2170               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2171       }
2172     }
2173     for (TargetSchedModel::ProcResIter
2174            PI = SchedModel->getWriteProcResBegin(SC),
2175            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2176       unsigned RCycle =
2177         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2178       if (RCycle > NextCycle)
2179         NextCycle = RCycle;
2180     }
2181     if (SU->hasReservedResource) {
2182       // For reserved resources, record the highest cycle using the resource.
2183       // For top-down scheduling, this is the cycle in which we schedule this
2184       // instruction plus the number of cycles the operations reserves the
2185       // resource. For bottom-up is it simply the instruction's cycle.
2186       for (TargetSchedModel::ProcResIter
2187              PI = SchedModel->getWriteProcResBegin(SC),
2188              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2189         unsigned PIdx = PI->ProcResourceIdx;
2190         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2191           if (isTop()) {
2192             ReservedCycles[PIdx] =
2193               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2194           }
2195           else
2196             ReservedCycles[PIdx] = NextCycle;
2197         }
2198       }
2199     }
2200   }
2201   // Update ExpectedLatency and DependentLatency.
2202   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2203   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2204   if (SU->getDepth() > TopLatency) {
2205     TopLatency = SU->getDepth();
2206     DEBUG(dbgs() << "  " << Available.getName()
2207           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2208   }
2209   if (SU->getHeight() > BotLatency) {
2210     BotLatency = SU->getHeight();
2211     DEBUG(dbgs() << "  " << Available.getName()
2212           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2213   }
2214   // If we stall for any reason, bump the cycle.
2215   if (NextCycle > CurrCycle) {
2216     bumpCycle(NextCycle);
2217   } else {
2218     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2219     // resource limited. If a stall occurred, bumpCycle does this.
2220     unsigned LFactor = SchedModel->getLatencyFactor();
2221     IsResourceLimited =
2222       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2223       > (int)LFactor;
2224   }
2225   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2226   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2227   // one cycle.  Since we commonly reach the max MOps here, opportunistically
2228   // bump the cycle to avoid uselessly checking everything in the readyQ.
2229   CurrMOps += IncMOps;
2230
2231   // Bump the cycle count for issue group constraints.
2232   // This must be done after NextCycle has been adjust for all other stalls.
2233   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2234   // currCycle to X.
2235   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2236       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2237     DEBUG(dbgs() << "  Bump cycle to "
2238                  << (isTop() ? "end" : "begin") << " group\n");
2239     bumpCycle(++NextCycle);
2240   }
2241
2242   while (CurrMOps >= SchedModel->getIssueWidth()) {
2243     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
2244           << " at cycle " << CurrCycle << '\n');
2245     bumpCycle(++NextCycle);
2246   }
2247   DEBUG(dumpScheduledState());
2248 }
2249
2250 /// Release pending ready nodes in to the available queue. This makes them
2251 /// visible to heuristics.
2252 void SchedBoundary::releasePending() {
2253   // If the available queue is empty, it is safe to reset MinReadyCycle.
2254   if (Available.empty())
2255     MinReadyCycle = std::numeric_limits<unsigned>::max();
2256
2257   // Check to see if any of the pending instructions are ready to issue.  If
2258   // so, add them to the available queue.
2259   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2260   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2261     SUnit *SU = *(Pending.begin()+i);
2262     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2263
2264     if (ReadyCycle < MinReadyCycle)
2265       MinReadyCycle = ReadyCycle;
2266
2267     if (!IsBuffered && ReadyCycle > CurrCycle)
2268       continue;
2269
2270     if (checkHazard(SU))
2271       continue;
2272
2273     if (Available.size() >= ReadyListLimit)
2274       break;
2275
2276     Available.push(SU);
2277     Pending.remove(Pending.begin()+i);
2278     --i; --e;
2279   }
2280   CheckPending = false;
2281 }
2282
2283 /// Remove SU from the ready set for this boundary.
2284 void SchedBoundary::removeReady(SUnit *SU) {
2285   if (Available.isInQueue(SU))
2286     Available.remove(Available.find(SU));
2287   else {
2288     assert(Pending.isInQueue(SU) && "bad ready count");
2289     Pending.remove(Pending.find(SU));
2290   }
2291 }
2292
2293 /// If this queue only has one ready candidate, return it. As a side effect,
2294 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2295 /// one node is ready. If multiple instructions are ready, return NULL.
2296 SUnit *SchedBoundary::pickOnlyChoice() {
2297   if (CheckPending)
2298     releasePending();
2299
2300   if (CurrMOps > 0) {
2301     // Defer any ready instrs that now have a hazard.
2302     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2303       if (checkHazard(*I)) {
2304         Pending.push(*I);
2305         I = Available.remove(I);
2306         continue;
2307       }
2308       ++I;
2309     }
2310   }
2311   for (unsigned i = 0; Available.empty(); ++i) {
2312 //  FIXME: Re-enable assert once PR20057 is resolved.
2313 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2314 //           "permanent hazard");
2315     (void)i;
2316     bumpCycle(CurrCycle + 1);
2317     releasePending();
2318   }
2319
2320   DEBUG(Pending.dump());
2321   DEBUG(Available.dump());
2322
2323   if (Available.size() == 1)
2324     return *Available.begin();
2325   return nullptr;
2326 }
2327
2328 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2329 // This is useful information to dump after bumpNode.
2330 // Note that the Queue contents are more useful before pickNodeFromQueue.
2331 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
2332   unsigned ResFactor;
2333   unsigned ResCount;
2334   if (ZoneCritResIdx) {
2335     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2336     ResCount = getResourceCount(ZoneCritResIdx);
2337   } else {
2338     ResFactor = SchedModel->getMicroOpFactor();
2339     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2340   }
2341   unsigned LFactor = SchedModel->getLatencyFactor();
2342   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2343          << "  Retired: " << RetiredMOps;
2344   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2345   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2346          << ResCount / ResFactor << " "
2347          << SchedModel->getResourceName(ZoneCritResIdx)
2348          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2349          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2350          << " limited.\n";
2351 }
2352 #endif
2353
2354 //===----------------------------------------------------------------------===//
2355 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2356 //===----------------------------------------------------------------------===//
2357
2358 void GenericSchedulerBase::SchedCandidate::
2359 initResourceDelta(const ScheduleDAGMI *DAG,
2360                   const TargetSchedModel *SchedModel) {
2361   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2362     return;
2363
2364   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2365   for (TargetSchedModel::ProcResIter
2366          PI = SchedModel->getWriteProcResBegin(SC),
2367          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2368     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2369       ResDelta.CritResources += PI->Cycles;
2370     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2371       ResDelta.DemandedResources += PI->Cycles;
2372   }
2373 }
2374
2375 /// Set the CandPolicy given a scheduling zone given the current resources and
2376 /// latencies inside and outside the zone.
2377 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2378                                      SchedBoundary &CurrZone,
2379                                      SchedBoundary *OtherZone) {
2380   // Apply preemptive heuristics based on the total latency and resources
2381   // inside and outside this zone. Potential stalls should be considered before
2382   // following this policy.
2383
2384   // Compute remaining latency. We need this both to determine whether the
2385   // overall schedule has become latency-limited and whether the instructions
2386   // outside this zone are resource or latency limited.
2387   //
2388   // The "dependent" latency is updated incrementally during scheduling as the
2389   // max height/depth of scheduled nodes minus the cycles since it was
2390   // scheduled:
2391   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2392   //
2393   // The "independent" latency is the max ready queue depth:
2394   //   ILat = max N.depth for N in Available|Pending
2395   //
2396   // RemainingLatency is the greater of independent and dependent latency.
2397   unsigned RemLatency = CurrZone.getDependentLatency();
2398   RemLatency = std::max(RemLatency,
2399                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2400   RemLatency = std::max(RemLatency,
2401                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2402
2403   // Compute the critical resource outside the zone.
2404   unsigned OtherCritIdx = 0;
2405   unsigned OtherCount =
2406     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2407
2408   bool OtherResLimited = false;
2409   if (SchedModel->hasInstrSchedModel()) {
2410     unsigned LFactor = SchedModel->getLatencyFactor();
2411     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2412   }
2413   // Schedule aggressively for latency in PostRA mode. We don't check for
2414   // acyclic latency during PostRA, and highly out-of-order processors will
2415   // skip PostRA scheduling.
2416   if (!OtherResLimited) {
2417     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2418       Policy.ReduceLatency |= true;
2419       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2420             << " RemainingLatency " << RemLatency << " + "
2421             << CurrZone.getCurrCycle() << "c > CritPath "
2422             << Rem.CriticalPath << "\n");
2423     }
2424   }
2425   // If the same resource is limiting inside and outside the zone, do nothing.
2426   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2427     return;
2428
2429   DEBUG(
2430     if (CurrZone.isResourceLimited()) {
2431       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2432              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2433              << "\n";
2434     }
2435     if (OtherResLimited)
2436       dbgs() << "  RemainingLimit: "
2437              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2438     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2439       dbgs() << "  Latency limited both directions.\n");
2440
2441   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2442     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2443
2444   if (OtherResLimited)
2445     Policy.DemandResIdx = OtherCritIdx;
2446 }
2447
2448 #ifndef NDEBUG
2449 const char *GenericSchedulerBase::getReasonStr(
2450   GenericSchedulerBase::CandReason Reason) {
2451   switch (Reason) {
2452   case NoCand:         return "NOCAND    ";
2453   case Only1:          return "ONLY1     ";
2454   case PhysRegCopy:    return "PREG-COPY ";
2455   case RegExcess:      return "REG-EXCESS";
2456   case RegCritical:    return "REG-CRIT  ";
2457   case Stall:          return "STALL     ";
2458   case Cluster:        return "CLUSTER   ";
2459   case Weak:           return "WEAK      ";
2460   case RegMax:         return "REG-MAX   ";
2461   case ResourceReduce: return "RES-REDUCE";
2462   case ResourceDemand: return "RES-DEMAND";
2463   case TopDepthReduce: return "TOP-DEPTH ";
2464   case TopPathReduce:  return "TOP-PATH  ";
2465   case BotHeightReduce:return "BOT-HEIGHT";
2466   case BotPathReduce:  return "BOT-PATH  ";
2467   case NextDefUse:     return "DEF-USE   ";
2468   case NodeOrder:      return "ORDER     ";
2469   };
2470   llvm_unreachable("Unknown reason!");
2471 }
2472
2473 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2474   PressureChange P;
2475   unsigned ResIdx = 0;
2476   unsigned Latency = 0;
2477   switch (Cand.Reason) {
2478   default:
2479     break;
2480   case RegExcess:
2481     P = Cand.RPDelta.Excess;
2482     break;
2483   case RegCritical:
2484     P = Cand.RPDelta.CriticalMax;
2485     break;
2486   case RegMax:
2487     P = Cand.RPDelta.CurrentMax;
2488     break;
2489   case ResourceReduce:
2490     ResIdx = Cand.Policy.ReduceResIdx;
2491     break;
2492   case ResourceDemand:
2493     ResIdx = Cand.Policy.DemandResIdx;
2494     break;
2495   case TopDepthReduce:
2496     Latency = Cand.SU->getDepth();
2497     break;
2498   case TopPathReduce:
2499     Latency = Cand.SU->getHeight();
2500     break;
2501   case BotHeightReduce:
2502     Latency = Cand.SU->getHeight();
2503     break;
2504   case BotPathReduce:
2505     Latency = Cand.SU->getDepth();
2506     break;
2507   }
2508   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2509   if (P.isValid())
2510     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2511            << ":" << P.getUnitInc() << " ";
2512   else
2513     dbgs() << "      ";
2514   if (ResIdx)
2515     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2516   else
2517     dbgs() << "         ";
2518   if (Latency)
2519     dbgs() << " " << Latency << " cycles ";
2520   else
2521     dbgs() << "          ";
2522   dbgs() << '\n';
2523 }
2524 #endif
2525
2526 /// Return true if this heuristic determines order.
2527 static bool tryLess(int TryVal, int CandVal,
2528                     GenericSchedulerBase::SchedCandidate &TryCand,
2529                     GenericSchedulerBase::SchedCandidate &Cand,
2530                     GenericSchedulerBase::CandReason Reason) {
2531   if (TryVal < CandVal) {
2532     TryCand.Reason = Reason;
2533     return true;
2534   }
2535   if (TryVal > CandVal) {
2536     if (Cand.Reason > Reason)
2537       Cand.Reason = Reason;
2538     return true;
2539   }
2540   return false;
2541 }
2542
2543 static bool tryGreater(int TryVal, int CandVal,
2544                        GenericSchedulerBase::SchedCandidate &TryCand,
2545                        GenericSchedulerBase::SchedCandidate &Cand,
2546                        GenericSchedulerBase::CandReason Reason) {
2547   if (TryVal > CandVal) {
2548     TryCand.Reason = Reason;
2549     return true;
2550   }
2551   if (TryVal < CandVal) {
2552     if (Cand.Reason > Reason)
2553       Cand.Reason = Reason;
2554     return true;
2555   }
2556   return false;
2557 }
2558
2559 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2560                        GenericSchedulerBase::SchedCandidate &Cand,
2561                        SchedBoundary &Zone) {
2562   if (Zone.isTop()) {
2563     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2564       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2565                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2566         return true;
2567     }
2568     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2569                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2570       return true;
2571   } else {
2572     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2573       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2574                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2575         return true;
2576     }
2577     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2578                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2579       return true;
2580   }
2581   return false;
2582 }
2583
2584 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2585   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2586         << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2587 }
2588
2589 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2590   tracePick(Cand.Reason, Cand.AtTop);
2591 }
2592
2593 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2594   assert(dag->hasVRegLiveness() &&
2595          "(PreRA)GenericScheduler needs vreg liveness");
2596   DAG = static_cast<ScheduleDAGMILive*>(dag);
2597   SchedModel = DAG->getSchedModel();
2598   TRI = DAG->TRI;
2599
2600   Rem.init(DAG, SchedModel);
2601   Top.init(DAG, SchedModel, &Rem);
2602   Bot.init(DAG, SchedModel, &Rem);
2603
2604   // Initialize resource counts.
2605
2606   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2607   // are disabled, then these HazardRecs will be disabled.
2608   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2609   if (!Top.HazardRec) {
2610     Top.HazardRec =
2611         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2612             Itin, DAG);
2613   }
2614   if (!Bot.HazardRec) {
2615     Bot.HazardRec =
2616         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2617             Itin, DAG);
2618   }
2619   TopCand.SU = nullptr;
2620   BotCand.SU = nullptr;
2621 }
2622
2623 /// Initialize the per-region scheduling policy.
2624 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2625                                   MachineBasicBlock::iterator End,
2626                                   unsigned NumRegionInstrs) {
2627   const MachineFunction &MF = *Begin->getParent()->getParent();
2628   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2629
2630   // Avoid setting up the register pressure tracker for small regions to save
2631   // compile time. As a rough heuristic, only track pressure when the number of
2632   // schedulable instructions exceeds half the integer register file.
2633   RegionPolicy.ShouldTrackPressure = true;
2634   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2635     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2636     if (TLI->isTypeLegal(LegalIntVT)) {
2637       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2638         TLI->getRegClassFor(LegalIntVT));
2639       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2640     }
2641   }
2642
2643   // For generic targets, we default to bottom-up, because it's simpler and more
2644   // compile-time optimizations have been implemented in that direction.
2645   RegionPolicy.OnlyBottomUp = true;
2646
2647   // Allow the subtarget to override default policy.
2648   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2649
2650   // After subtarget overrides, apply command line options.
2651   if (!EnableRegPressure)
2652     RegionPolicy.ShouldTrackPressure = false;
2653
2654   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2655   // e.g. -misched-bottomup=false allows scheduling in both directions.
2656   assert((!ForceTopDown || !ForceBottomUp) &&
2657          "-misched-topdown incompatible with -misched-bottomup");
2658   if (ForceBottomUp.getNumOccurrences() > 0) {
2659     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2660     if (RegionPolicy.OnlyBottomUp)
2661       RegionPolicy.OnlyTopDown = false;
2662   }
2663   if (ForceTopDown.getNumOccurrences() > 0) {
2664     RegionPolicy.OnlyTopDown = ForceTopDown;
2665     if (RegionPolicy.OnlyTopDown)
2666       RegionPolicy.OnlyBottomUp = false;
2667   }
2668 }
2669
2670 void GenericScheduler::dumpPolicy() {
2671   // Cannot completely remove virtual function even in release mode.
2672 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2673   dbgs() << "GenericScheduler RegionPolicy: "
2674          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2675          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2676          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2677          << "\n";
2678 #endif
2679 }
2680
2681 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2682 /// critical path by more cycles than it takes to drain the instruction buffer.
2683 /// We estimate an upper bounds on in-flight instructions as:
2684 ///
2685 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2686 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2687 /// InFlightResources = InFlightIterations * LoopResources
2688 ///
2689 /// TODO: Check execution resources in addition to IssueCount.
2690 void GenericScheduler::checkAcyclicLatency() {
2691   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2692     return;
2693
2694   // Scaled number of cycles per loop iteration.
2695   unsigned IterCount =
2696     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2697              Rem.RemIssueCount);
2698   // Scaled acyclic critical path.
2699   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2700   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2701   unsigned InFlightCount =
2702     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2703   unsigned BufferLimit =
2704     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2705
2706   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2707
2708   DEBUG(dbgs() << "IssueCycles="
2709         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2710         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2711         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2712         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2713         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2714         if (Rem.IsAcyclicLatencyLimited)
2715           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2716 }
2717
2718 void GenericScheduler::registerRoots() {
2719   Rem.CriticalPath = DAG->ExitSU.getDepth();
2720
2721   // Some roots may not feed into ExitSU. Check all of them in case.
2722   for (std::vector<SUnit*>::const_iterator
2723          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2724     if ((*I)->getDepth() > Rem.CriticalPath)
2725       Rem.CriticalPath = (*I)->getDepth();
2726   }
2727   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2728   if (DumpCriticalPathLength) {
2729     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2730   }
2731
2732   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2733     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2734     checkAcyclicLatency();
2735   }
2736 }
2737
2738 static bool tryPressure(const PressureChange &TryP,
2739                         const PressureChange &CandP,
2740                         GenericSchedulerBase::SchedCandidate &TryCand,
2741                         GenericSchedulerBase::SchedCandidate &Cand,
2742                         GenericSchedulerBase::CandReason Reason,
2743                         const TargetRegisterInfo *TRI,
2744                         const MachineFunction &MF) {
2745   // If one candidate decreases and the other increases, go with it.
2746   // Invalid candidates have UnitInc==0.
2747   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2748                  Reason)) {
2749     return true;
2750   }
2751   // Do not compare the magnitude of pressure changes between top and bottom
2752   // boundary.
2753   if (Cand.AtTop != TryCand.AtTop)
2754     return false;
2755
2756   // If both candidates affect the same set in the same boundary, go with the
2757   // smallest increase.
2758   unsigned TryPSet = TryP.getPSetOrMax();
2759   unsigned CandPSet = CandP.getPSetOrMax();
2760   if (TryPSet == CandPSet) {
2761     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2762                    Reason);
2763   }
2764
2765   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2766                                  std::numeric_limits<int>::max();
2767
2768   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2769                                    std::numeric_limits<int>::max();
2770
2771   // If the candidates are decreasing pressure, reverse priority.
2772   if (TryP.getUnitInc() < 0)
2773     std::swap(TryRank, CandRank);
2774   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2775 }
2776
2777 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2778   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2779 }
2780
2781 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2782 /// their physreg def/use.
2783 ///
2784 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2785 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2786 /// with the operation that produces or consumes the physreg. We'll do this when
2787 /// regalloc has support for parallel copies.
2788 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2789   const MachineInstr *MI = SU->getInstr();
2790   if (!MI->isCopy())
2791     return 0;
2792
2793   unsigned ScheduledOper = isTop ? 1 : 0;
2794   unsigned UnscheduledOper = isTop ? 0 : 1;
2795   // If we have already scheduled the physreg produce/consumer, immediately
2796   // schedule the copy.
2797   if (TargetRegisterInfo::isPhysicalRegister(
2798         MI->getOperand(ScheduledOper).getReg()))
2799     return 1;
2800   // If the physreg is at the boundary, defer it. Otherwise schedule it
2801   // immediately to free the dependent. We can hoist the copy later.
2802   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2803   if (TargetRegisterInfo::isPhysicalRegister(
2804         MI->getOperand(UnscheduledOper).getReg()))
2805     return AtBoundary ? -1 : 1;
2806   return 0;
2807 }
2808
2809 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2810                                      bool AtTop,
2811                                      const RegPressureTracker &RPTracker,
2812                                      RegPressureTracker &TempTracker) {
2813   Cand.SU = SU;
2814   Cand.AtTop = AtTop;
2815   if (DAG->isTrackingPressure()) {
2816     if (AtTop) {
2817       TempTracker.getMaxDownwardPressureDelta(
2818         Cand.SU->getInstr(),
2819         Cand.RPDelta,
2820         DAG->getRegionCriticalPSets(),
2821         DAG->getRegPressure().MaxSetPressure);
2822     } else {
2823       if (VerifyScheduling) {
2824         TempTracker.getMaxUpwardPressureDelta(
2825           Cand.SU->getInstr(),
2826           &DAG->getPressureDiff(Cand.SU),
2827           Cand.RPDelta,
2828           DAG->getRegionCriticalPSets(),
2829           DAG->getRegPressure().MaxSetPressure);
2830       } else {
2831         RPTracker.getUpwardPressureDelta(
2832           Cand.SU->getInstr(),
2833           DAG->getPressureDiff(Cand.SU),
2834           Cand.RPDelta,
2835           DAG->getRegionCriticalPSets(),
2836           DAG->getRegPressure().MaxSetPressure);
2837       }
2838     }
2839   }
2840   DEBUG(if (Cand.RPDelta.Excess.isValid())
2841           dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
2842                  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2843                  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2844 }
2845
2846 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2847 /// hierarchical. This may be more efficient than a graduated cost model because
2848 /// we don't need to evaluate all aspects of the model for each node in the
2849 /// queue. But it's really done to make the heuristics easier to debug and
2850 /// statistically analyze.
2851 ///
2852 /// \param Cand provides the policy and current best candidate.
2853 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2854 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2855 //              if Cand is from a different zone than TryCand.
2856 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2857                                     SchedCandidate &TryCand,
2858                                     SchedBoundary *Zone) {
2859   // Initialize the candidate if needed.
2860   if (!Cand.isValid()) {
2861     TryCand.Reason = NodeOrder;
2862     return;
2863   }
2864
2865   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2866                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2867                  TryCand, Cand, PhysRegCopy))
2868     return;
2869
2870   // Avoid exceeding the target's limit.
2871   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2872                                                Cand.RPDelta.Excess,
2873                                                TryCand, Cand, RegExcess, TRI,
2874                                                DAG->MF))
2875     return;
2876
2877   // Avoid increasing the max critical pressure in the scheduled region.
2878   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2879                                                Cand.RPDelta.CriticalMax,
2880                                                TryCand, Cand, RegCritical, TRI,
2881                                                DAG->MF))
2882     return;
2883
2884   // We only compare a subset of features when comparing nodes between
2885   // Top and Bottom boundary. Some properties are simply incomparable, in many
2886   // other instances we should only override the other boundary if something
2887   // is a clear good pick on one boundary. Skip heuristics that are more
2888   // "tie-breaking" in nature.
2889   bool SameBoundary = Zone != nullptr;
2890   if (SameBoundary) {
2891     // For loops that are acyclic path limited, aggressively schedule for
2892     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2893     // heuristics to take precedence.
2894     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2895         tryLatency(TryCand, Cand, *Zone))
2896       return;
2897
2898     // Prioritize instructions that read unbuffered resources by stall cycles.
2899     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2900                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2901       return;
2902   }
2903
2904   // Keep clustered nodes together to encourage downstream peephole
2905   // optimizations which may reduce resource requirements.
2906   //
2907   // This is a best effort to set things up for a post-RA pass. Optimizations
2908   // like generating loads of multiple registers should ideally be done within
2909   // the scheduler pass by combining the loads during DAG postprocessing.
2910   const SUnit *CandNextClusterSU =
2911     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2912   const SUnit *TryCandNextClusterSU =
2913     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2914   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2915                  Cand.SU == CandNextClusterSU,
2916                  TryCand, Cand, Cluster))
2917     return;
2918
2919   if (SameBoundary) {
2920     // Weak edges are for clustering and other constraints.
2921     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2922                 getWeakLeft(Cand.SU, Cand.AtTop),
2923                 TryCand, Cand, Weak))
2924       return;
2925   }
2926
2927   // Avoid increasing the max pressure of the entire region.
2928   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2929                                                Cand.RPDelta.CurrentMax,
2930                                                TryCand, Cand, RegMax, TRI,
2931                                                DAG->MF))
2932     return;
2933
2934   if (SameBoundary) {
2935     // Avoid critical resource consumption and balance the schedule.
2936     TryCand.initResourceDelta(DAG, SchedModel);
2937     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2938                 TryCand, Cand, ResourceReduce))
2939       return;
2940     if (tryGreater(TryCand.ResDelta.DemandedResources,
2941                    Cand.ResDelta.DemandedResources,
2942                    TryCand, Cand, ResourceDemand))
2943       return;
2944
2945     // Avoid serializing long latency dependence chains.
2946     // For acyclic path limited loops, latency was already checked above.
2947     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2948         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2949       return;
2950
2951     // Fall through to original instruction order.
2952     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2953         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2954       TryCand.Reason = NodeOrder;
2955     }
2956   }
2957 }
2958
2959 /// Pick the best candidate from the queue.
2960 ///
2961 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2962 /// DAG building. To adjust for the current scheduling location we need to
2963 /// maintain the number of vreg uses remaining to be top-scheduled.
2964 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2965                                          const CandPolicy &ZonePolicy,
2966                                          const RegPressureTracker &RPTracker,
2967                                          SchedCandidate &Cand) {
2968   // getMaxPressureDelta temporarily modifies the tracker.
2969   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2970
2971   ReadyQueue &Q = Zone.Available;
2972   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2973
2974     SchedCandidate TryCand(ZonePolicy);
2975     initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
2976     // Pass SchedBoundary only when comparing nodes from the same boundary.
2977     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
2978     tryCandidate(Cand, TryCand, ZoneArg);
2979     if (TryCand.Reason != NoCand) {
2980       // Initialize resource delta if needed in case future heuristics query it.
2981       if (TryCand.ResDelta == SchedResourceDelta())
2982         TryCand.initResourceDelta(DAG, SchedModel);
2983       Cand.setBest(TryCand);
2984       DEBUG(traceCandidate(Cand));
2985     }
2986   }
2987 }
2988
2989 /// Pick the best candidate node from either the top or bottom queue.
2990 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2991   // Schedule as far as possible in the direction of no choice. This is most
2992   // efficient, but also provides the best heuristics for CriticalPSets.
2993   if (SUnit *SU = Bot.pickOnlyChoice()) {
2994     IsTopNode = false;
2995     tracePick(Only1, false);
2996     return SU;
2997   }
2998   if (SUnit *SU = Top.pickOnlyChoice()) {
2999     IsTopNode = true;
3000     tracePick(Only1, true);
3001     return SU;
3002   }
3003   // Set the bottom-up policy based on the state of the current bottom zone and
3004   // the instructions outside the zone, including the top zone.
3005   CandPolicy BotPolicy;
3006   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3007   // Set the top-down policy based on the state of the current top zone and
3008   // the instructions outside the zone, including the bottom zone.
3009   CandPolicy TopPolicy;
3010   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3011
3012   // See if BotCand is still valid (because we previously scheduled from Top).
3013   DEBUG(dbgs() << "Picking from Bot:\n");
3014   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3015       BotCand.Policy != BotPolicy) {
3016     BotCand.reset(CandPolicy());
3017     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3018     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3019   } else {
3020     DEBUG(traceCandidate(BotCand));
3021 #ifndef NDEBUG
3022     if (VerifyScheduling) {
3023       SchedCandidate TCand;
3024       TCand.reset(CandPolicy());
3025       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3026       assert(TCand.SU == BotCand.SU &&
3027              "Last pick result should correspond to re-picking right now");
3028     }
3029 #endif
3030   }
3031
3032   // Check if the top Q has a better candidate.
3033   DEBUG(dbgs() << "Picking from Top:\n");
3034   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3035       TopCand.Policy != TopPolicy) {
3036     TopCand.reset(CandPolicy());
3037     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3038     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3039   } else {
3040     DEBUG(traceCandidate(TopCand));
3041 #ifndef NDEBUG
3042     if (VerifyScheduling) {
3043       SchedCandidate TCand;
3044       TCand.reset(CandPolicy());
3045       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3046       assert(TCand.SU == TopCand.SU &&
3047            "Last pick result should correspond to re-picking right now");
3048     }
3049 #endif
3050   }
3051
3052   // Pick best from BotCand and TopCand.
3053   assert(BotCand.isValid());
3054   assert(TopCand.isValid());
3055   SchedCandidate Cand = BotCand;
3056   TopCand.Reason = NoCand;
3057   tryCandidate(Cand, TopCand, nullptr);
3058   if (TopCand.Reason != NoCand) {
3059     Cand.setBest(TopCand);
3060     DEBUG(traceCandidate(Cand));
3061   }
3062
3063   IsTopNode = Cand.AtTop;
3064   tracePick(Cand);
3065   return Cand.SU;
3066 }
3067
3068 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3069 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3070   if (DAG->top() == DAG->bottom()) {
3071     assert(Top.Available.empty() && Top.Pending.empty() &&
3072            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3073     return nullptr;
3074   }
3075   SUnit *SU;
3076   do {
3077     if (RegionPolicy.OnlyTopDown) {
3078       SU = Top.pickOnlyChoice();
3079       if (!SU) {
3080         CandPolicy NoPolicy;
3081         TopCand.reset(NoPolicy);
3082         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3083         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3084         tracePick(TopCand);
3085         SU = TopCand.SU;
3086       }
3087       IsTopNode = true;
3088     } else if (RegionPolicy.OnlyBottomUp) {
3089       SU = Bot.pickOnlyChoice();
3090       if (!SU) {
3091         CandPolicy NoPolicy;
3092         BotCand.reset(NoPolicy);
3093         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3094         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3095         tracePick(BotCand);
3096         SU = BotCand.SU;
3097       }
3098       IsTopNode = false;
3099     } else {
3100       SU = pickNodeBidirectional(IsTopNode);
3101     }
3102   } while (SU->isScheduled);
3103
3104   if (SU->isTopReady())
3105     Top.removeReady(SU);
3106   if (SU->isBottomReady())
3107     Bot.removeReady(SU);
3108
3109   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3110   return SU;
3111 }
3112
3113 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
3114   MachineBasicBlock::iterator InsertPos = SU->getInstr();
3115   if (!isTop)
3116     ++InsertPos;
3117   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3118
3119   // Find already scheduled copies with a single physreg dependence and move
3120   // them just above the scheduled instruction.
3121   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
3122        I != E; ++I) {
3123     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
3124       continue;
3125     SUnit *DepSU = I->getSUnit();
3126     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3127       continue;
3128     MachineInstr *Copy = DepSU->getInstr();
3129     if (!Copy->isCopy())
3130       continue;
3131     DEBUG(dbgs() << "  Rescheduling physreg copy ";
3132           I->getSUnit()->dump(DAG));
3133     DAG->moveInstruction(Copy, InsertPos);
3134   }
3135 }
3136
3137 /// Update the scheduler's state after scheduling a node. This is the same node
3138 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3139 /// update it's state based on the current cycle before MachineSchedStrategy
3140 /// does.
3141 ///
3142 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3143 /// them here. See comments in biasPhysRegCopy.
3144 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3145   if (IsTopNode) {
3146     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3147     Top.bumpNode(SU);
3148     if (SU->hasPhysRegUses)
3149       reschedulePhysRegCopies(SU, true);
3150   } else {
3151     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3152     Bot.bumpNode(SU);
3153     if (SU->hasPhysRegDefs)
3154       reschedulePhysRegCopies(SU, false);
3155   }
3156 }
3157
3158 /// Create the standard converging machine scheduler. This will be used as the
3159 /// default scheduler if the target does not set a default.
3160 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3161   ScheduleDAGMILive *DAG =
3162       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3163   // Register DAG post-processors.
3164   //
3165   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3166   // data and pass it to later mutations. Have a single mutation that gathers
3167   // the interesting nodes in one pass.
3168   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3169   return DAG;
3170 }
3171
3172 static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3173   return createGenericSchedLive(C);
3174 }
3175
3176 static MachineSchedRegistry
3177 GenericSchedRegistry("converge", "Standard converging scheduler.",
3178                      createConveringSched);
3179
3180 //===----------------------------------------------------------------------===//
3181 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3182 //===----------------------------------------------------------------------===//
3183
3184 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3185   DAG = Dag;
3186   SchedModel = DAG->getSchedModel();
3187   TRI = DAG->TRI;
3188
3189   Rem.init(DAG, SchedModel);
3190   Top.init(DAG, SchedModel, &Rem);
3191   BotRoots.clear();
3192
3193   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3194   // or are disabled, then these HazardRecs will be disabled.
3195   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3196   if (!Top.HazardRec) {
3197     Top.HazardRec =
3198         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3199             Itin, DAG);
3200   }
3201 }
3202
3203 void PostGenericScheduler::registerRoots() {
3204   Rem.CriticalPath = DAG->ExitSU.getDepth();
3205
3206   // Some roots may not feed into ExitSU. Check all of them in case.
3207   for (SmallVectorImpl<SUnit*>::const_iterator
3208          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3209     if ((*I)->getDepth() > Rem.CriticalPath)
3210       Rem.CriticalPath = (*I)->getDepth();
3211   }
3212   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3213   if (DumpCriticalPathLength) {
3214     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3215   }
3216 }
3217
3218 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3219 ///
3220 /// \param Cand provides the policy and current best candidate.
3221 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3222 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3223                                         SchedCandidate &TryCand) {
3224
3225   // Initialize the candidate if needed.
3226   if (!Cand.isValid()) {
3227     TryCand.Reason = NodeOrder;
3228     return;
3229   }
3230
3231   // Prioritize instructions that read unbuffered resources by stall cycles.
3232   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3233               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3234     return;
3235
3236   // Avoid critical resource consumption and balance the schedule.
3237   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3238               TryCand, Cand, ResourceReduce))
3239     return;
3240   if (tryGreater(TryCand.ResDelta.DemandedResources,
3241                  Cand.ResDelta.DemandedResources,
3242                  TryCand, Cand, ResourceDemand))
3243     return;
3244
3245   // Avoid serializing long latency dependence chains.
3246   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3247     return;
3248   }
3249
3250   // Fall through to original instruction order.
3251   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3252     TryCand.Reason = NodeOrder;
3253 }
3254
3255 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3256   ReadyQueue &Q = Top.Available;
3257   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3258     SchedCandidate TryCand(Cand.Policy);
3259     TryCand.SU = *I;
3260     TryCand.AtTop = true;
3261     TryCand.initResourceDelta(DAG, SchedModel);
3262     tryCandidate(Cand, TryCand);
3263     if (TryCand.Reason != NoCand) {
3264       Cand.setBest(TryCand);
3265       DEBUG(traceCandidate(Cand));
3266     }
3267   }
3268 }
3269
3270 /// Pick the next node to schedule.
3271 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3272   if (DAG->top() == DAG->bottom()) {
3273     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3274     return nullptr;
3275   }
3276   SUnit *SU;
3277   do {
3278     SU = Top.pickOnlyChoice();
3279     if (SU) {
3280       tracePick(Only1, true);
3281     } else {
3282       CandPolicy NoPolicy;
3283       SchedCandidate TopCand(NoPolicy);
3284       // Set the top-down policy based on the state of the current top zone and
3285       // the instructions outside the zone, including the bottom zone.
3286       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3287       pickNodeFromQueue(TopCand);
3288       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3289       tracePick(TopCand);
3290       SU = TopCand.SU;
3291     }
3292   } while (SU->isScheduled);
3293
3294   IsTopNode = true;
3295   Top.removeReady(SU);
3296
3297   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3298   return SU;
3299 }
3300
3301 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3302 /// scheduled/remaining flags in the DAG nodes.
3303 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3304   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3305   Top.bumpNode(SU);
3306 }
3307
3308 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3309   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3310                            /*RemoveKillFlags=*/true);
3311 }
3312
3313 //===----------------------------------------------------------------------===//
3314 // ILP Scheduler. Currently for experimental analysis of heuristics.
3315 //===----------------------------------------------------------------------===//
3316
3317 namespace {
3318
3319 /// \brief Order nodes by the ILP metric.
3320 struct ILPOrder {
3321   const SchedDFSResult *DFSResult = nullptr;
3322   const BitVector *ScheduledTrees = nullptr;
3323   bool MaximizeILP;
3324
3325   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3326
3327   /// \brief Apply a less-than relation on node priority.
3328   ///
3329   /// (Return true if A comes after B in the Q.)
3330   bool operator()(const SUnit *A, const SUnit *B) const {
3331     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3332     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3333     if (SchedTreeA != SchedTreeB) {
3334       // Unscheduled trees have lower priority.
3335       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3336         return ScheduledTrees->test(SchedTreeB);
3337
3338       // Trees with shallower connections have have lower priority.
3339       if (DFSResult->getSubtreeLevel(SchedTreeA)
3340           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3341         return DFSResult->getSubtreeLevel(SchedTreeA)
3342           < DFSResult->getSubtreeLevel(SchedTreeB);
3343       }
3344     }
3345     if (MaximizeILP)
3346       return DFSResult->getILP(A) < DFSResult->getILP(B);
3347     else
3348       return DFSResult->getILP(A) > DFSResult->getILP(B);
3349   }
3350 };
3351
3352 /// \brief Schedule based on the ILP metric.
3353 class ILPScheduler : public MachineSchedStrategy {
3354   ScheduleDAGMILive *DAG = nullptr;
3355   ILPOrder Cmp;
3356
3357   std::vector<SUnit*> ReadyQ;
3358
3359 public:
3360   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3361
3362   void initialize(ScheduleDAGMI *dag) override {
3363     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3364     DAG = static_cast<ScheduleDAGMILive*>(dag);
3365     DAG->computeDFSResult();
3366     Cmp.DFSResult = DAG->getDFSResult();
3367     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3368     ReadyQ.clear();
3369   }
3370
3371   void registerRoots() override {
3372     // Restore the heap in ReadyQ with the updated DFS results.
3373     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3374   }
3375
3376   /// Implement MachineSchedStrategy interface.
3377   /// -----------------------------------------
3378
3379   /// Callback to select the highest priority node from the ready Q.
3380   SUnit *pickNode(bool &IsTopNode) override {
3381     if (ReadyQ.empty()) return nullptr;
3382     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3383     SUnit *SU = ReadyQ.back();
3384     ReadyQ.pop_back();
3385     IsTopNode = false;
3386     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3387           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3388           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3389           << DAG->getDFSResult()->getSubtreeLevel(
3390             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3391           << "Scheduling " << *SU->getInstr());
3392     return SU;
3393   }
3394
3395   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3396   void scheduleTree(unsigned SubtreeID) override {
3397     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3398   }
3399
3400   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3401   /// DFSResults, and resort the priority Q.
3402   void schedNode(SUnit *SU, bool IsTopNode) override {
3403     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3404   }
3405
3406   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3407
3408   void releaseBottomNode(SUnit *SU) override {
3409     ReadyQ.push_back(SU);
3410     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3411   }
3412 };
3413
3414 } // end anonymous namespace
3415
3416 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3417   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3418 }
3419 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3420   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3421 }
3422
3423 static MachineSchedRegistry ILPMaxRegistry(
3424   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3425 static MachineSchedRegistry ILPMinRegistry(
3426   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3427
3428 //===----------------------------------------------------------------------===//
3429 // Machine Instruction Shuffler for Correctness Testing
3430 //===----------------------------------------------------------------------===//
3431
3432 #ifndef NDEBUG
3433 namespace {
3434
3435 /// Apply a less-than relation on the node order, which corresponds to the
3436 /// instruction order prior to scheduling. IsReverse implements greater-than.
3437 template<bool IsReverse>
3438 struct SUnitOrder {
3439   bool operator()(SUnit *A, SUnit *B) const {
3440     if (IsReverse)
3441       return A->NodeNum > B->NodeNum;
3442     else
3443       return A->NodeNum < B->NodeNum;
3444   }
3445 };
3446
3447 /// Reorder instructions as much as possible.
3448 class InstructionShuffler : public MachineSchedStrategy {
3449   bool IsAlternating;
3450   bool IsTopDown;
3451
3452   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3453   // gives nodes with a higher number higher priority causing the latest
3454   // instructions to be scheduled first.
3455   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3456     TopQ;
3457   // When scheduling bottom-up, use greater-than as the queue priority.
3458   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3459     BottomQ;
3460
3461 public:
3462   InstructionShuffler(bool alternate, bool topdown)
3463     : IsAlternating(alternate), IsTopDown(topdown) {}
3464
3465   void initialize(ScheduleDAGMI*) override {
3466     TopQ.clear();
3467     BottomQ.clear();
3468   }
3469
3470   /// Implement MachineSchedStrategy interface.
3471   /// -----------------------------------------
3472
3473   SUnit *pickNode(bool &IsTopNode) override {
3474     SUnit *SU;
3475     if (IsTopDown) {
3476       do {
3477         if (TopQ.empty()) return nullptr;
3478         SU = TopQ.top();
3479         TopQ.pop();
3480       } while (SU->isScheduled);
3481       IsTopNode = true;
3482     } else {
3483       do {
3484         if (BottomQ.empty()) return nullptr;
3485         SU = BottomQ.top();
3486         BottomQ.pop();
3487       } while (SU->isScheduled);
3488       IsTopNode = false;
3489     }
3490     if (IsAlternating)
3491       IsTopDown = !IsTopDown;
3492     return SU;
3493   }
3494
3495   void schedNode(SUnit *SU, bool IsTopNode) override {}
3496
3497   void releaseTopNode(SUnit *SU) override {
3498     TopQ.push(SU);
3499   }
3500   void releaseBottomNode(SUnit *SU) override {
3501     BottomQ.push(SU);
3502   }
3503 };
3504
3505 } // end anonymous namespace
3506
3507 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3508   bool Alternate = !ForceTopDown && !ForceBottomUp;
3509   bool TopDown = !ForceBottomUp;
3510   assert((TopDown || !ForceTopDown) &&
3511          "-misched-topdown incompatible with -misched-bottomup");
3512   return new ScheduleDAGMILive(
3513       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3514 }
3515
3516 static MachineSchedRegistry ShufflerRegistry(
3517   "shuffle", "Shuffle machine instructions alternating directions",
3518   createInstructionShuffler);
3519 #endif // !NDEBUG
3520
3521 //===----------------------------------------------------------------------===//
3522 // GraphWriter support for ScheduleDAGMILive.
3523 //===----------------------------------------------------------------------===//
3524
3525 #ifndef NDEBUG
3526 namespace llvm {
3527
3528 template<> struct GraphTraits<
3529   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3530
3531 template<>
3532 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3533   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3534
3535   static std::string getGraphName(const ScheduleDAG *G) {
3536     return G->MF.getName();
3537   }
3538
3539   static bool renderGraphFromBottomUp() {
3540     return true;
3541   }
3542
3543   static bool isNodeHidden(const SUnit *Node) {
3544     if (ViewMISchedCutoff == 0)
3545       return false;
3546     return (Node->Preds.size() > ViewMISchedCutoff
3547          || Node->Succs.size() > ViewMISchedCutoff);
3548   }
3549
3550   /// If you want to override the dot attributes printed for a particular
3551   /// edge, override this method.
3552   static std::string getEdgeAttributes(const SUnit *Node,
3553                                        SUnitIterator EI,
3554                                        const ScheduleDAG *Graph) {
3555     if (EI.isArtificialDep())
3556       return "color=cyan,style=dashed";
3557     if (EI.isCtrlDep())
3558       return "color=blue,style=dashed";
3559     return "";
3560   }
3561
3562   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3563     std::string Str;
3564     raw_string_ostream SS(Str);
3565     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3566     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3567       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3568     SS << "SU:" << SU->NodeNum;
3569     if (DFS)
3570       SS << " I:" << DFS->getNumInstrs(SU);
3571     return SS.str();
3572   }
3573   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3574     return G->getGraphNodeLabel(SU);
3575   }
3576
3577   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3578     std::string Str("shape=Mrecord");
3579     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3580     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3581       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3582     if (DFS) {
3583       Str += ",style=filled,fillcolor=\"#";
3584       Str += DOT::getColorString(DFS->getSubtreeID(N));
3585       Str += '"';
3586     }
3587     return Str;
3588   }
3589 };
3590
3591 } // end namespace llvm
3592 #endif // NDEBUG
3593
3594 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3595 /// rendered using 'dot'.
3596 ///
3597 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3598 #ifndef NDEBUG
3599   ViewGraph(this, Name, false, Title);
3600 #else
3601   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3602          << "systems with Graphviz or gv!\n";
3603 #endif  // NDEBUG
3604 }
3605
3606 /// Out-of-line implementation with no arguments is handy for gdb.
3607 void ScheduleDAGMI::viewGraph() {
3608   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3609 }