contrib/llvm/lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/CodeGen/MachineScheduler.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/BitVector.h"
  18 #include "llvm/ADT/DenseMap.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/ADT/SmallVector.h"
  22 #include "llvm/ADT/iterator_range.h"
  23 #include "llvm/Analysis/AliasAnalysis.h"
  24 #include "llvm/CodeGen/LiveInterval.h"
  25 #include "llvm/CodeGen/LiveIntervals.h"
  26 #include "llvm/CodeGen/MachineBasicBlock.h"
  27 #include "llvm/CodeGen/MachineDominators.h"
  28 #include "llvm/CodeGen/MachineFunction.h"
  29 #include "llvm/CodeGen/MachineFunctionPass.h"
  30 #include "llvm/CodeGen/MachineInstr.h"
  31 #include "llvm/CodeGen/MachineLoopInfo.h"
  32 #include "llvm/CodeGen/MachineOperand.h"
  33 #include "llvm/CodeGen/MachinePassRegistry.h"
  34 #include "llvm/CodeGen/MachineRegisterInfo.h"
  35 #include "llvm/CodeGen/MachineValueType.h"
  36 #include "llvm/CodeGen/Passes.h"
  37 #include "llvm/CodeGen/RegisterClassInfo.h"
  38 #include "llvm/CodeGen/RegisterPressure.h"
  39 #include "llvm/CodeGen/ScheduleDAG.h"
  40 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  41 #include "llvm/CodeGen/ScheduleDAGMutation.h"
  42 #include "llvm/CodeGen/ScheduleDFS.h"
  43 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  44 #include "llvm/CodeGen/SlotIndexes.h"
  45 #include "llvm/CodeGen/TargetInstrInfo.h"
  46 #include "llvm/CodeGen/TargetLowering.h"
  47 #include "llvm/CodeGen/TargetPassConfig.h"
  48 #include "llvm/CodeGen/TargetRegisterInfo.h"
  49 #include "llvm/CodeGen/TargetSchedule.h"
  50 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  51 #include "llvm/MC/LaneBitmask.h"
  52 #include "llvm/Pass.h"
  53 #include "llvm/Support/CommandLine.h"
  54 #include "llvm/Support/Compiler.h"
  55 #include "llvm/Support/Debug.h"
  56 #include "llvm/Support/ErrorHandling.h"
  57 #include "llvm/Support/GraphWriter.h"
  58 #include "llvm/Support/raw_ostream.h"
  59 #include <algorithm>
  60 #include <cassert>
  61 #include <cstdint>
  62 #include <iterator>
  63 #include <limits>
  64 #include <memory>
  65 #include <string>
  66 #include <tuple>
  67 #include <utility>
  68 #include <vector>
  69
  70 using namespace llvm;
  71
  72 #define DEBUG_TYPE "machine-scheduler"
  73
  74 namespace llvm {
  75
  76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  77                            cl::desc("Force top-down list scheduling"));
  78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  79                             cl::desc("Force bottom-up list scheduling"));
  80 cl::opt<bool>
  81 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
  82                        cl::desc("Print critical path length to stdout"));
  83
  84 } // end namespace llvm
  85
  86 #ifndef NDEBUG
  87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  88   cl::desc("Pop up a window to show MISched dags after they are processed"));
  89
  90 /// In some situations a few uninteresting nodes depend on nearly all other
  91 /// nodes in the graph, provide a cutoff to hide them.
  92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
  93   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
  94
  95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  96   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  97
  98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  99   cl::desc("Only schedule this function"));
 100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
 101                                         cl::desc("Only schedule this MBB#"));
 102 #else
 103 static bool ViewMISchedDAGs = false;
 104 #endif // NDEBUG
 105
 106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
 107 /// size of the ready lists.
 108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
 109   cl::desc("Limit ready list to N instructions"), cl::init(256));
 110
 111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
 112   cl::desc("Enable register pressure scheduling."), cl::init(true));
 113
 114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
 115   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
 116
 117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
 118                                         cl::desc("Enable memop clustering."),
 119                                         cl::init(true));
 120
 121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
 122   cl::desc("Verify machine instrs before and after machine scheduling"));
 123
 124 // DAG subtrees must have at least this many nodes.
 125 static const unsigned MinSubtreeSize = 8;
 126
 127 // Pin the vtables to this file.
 128 void MachineSchedStrategy::anchor() {}
 129
 130 void ScheduleDAGMutation::anchor() {}
 131
 132 //===----------------------------------------------------------------------===//
 133 // Machine Instruction Scheduling Pass and Registry
 134 //===----------------------------------------------------------------------===//
 135
 136 MachineSchedContext::MachineSchedContext() {
 137   RegClassInfo = new RegisterClassInfo();
 138 }
 139
 140 MachineSchedContext::~MachineSchedContext() {
 141   delete RegClassInfo;
 142 }
 143
 144 namespace {
 145
 146 /// Base class for a machine scheduler class that can run at any point.
 147 class MachineSchedulerBase : public MachineSchedContext,
 148                              public MachineFunctionPass {
 149 public:
 150   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
 151
 152   void print(raw_ostream &O, const Module* = nullptr) const override;
 153
 154 protected:
 155   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
 156 };
 157
 158 /// MachineScheduler runs after coalescing and before register allocation.
 159 class MachineScheduler : public MachineSchedulerBase {
 160 public:
 161   MachineScheduler();
 162
 163   void getAnalysisUsage(AnalysisUsage &AU) const override;
 164
 165   bool runOnMachineFunction(MachineFunction&) override;
 166
 167   static char ID; // Class identification, replacement for typeinfo
 168
 169 protected:
 170   ScheduleDAGInstrs *createMachineScheduler();
 171 };
 172
 173 /// PostMachineScheduler runs after shortly before code emission.
 174 class PostMachineScheduler : public MachineSchedulerBase {
 175 public:
 176   PostMachineScheduler();
 177
 178   void getAnalysisUsage(AnalysisUsage &AU) const override;
 179
 180   bool runOnMachineFunction(MachineFunction&) override;
 181
 182   static char ID; // Class identification, replacement for typeinfo
 183
 184 protected:
 185   ScheduleDAGInstrs *createPostMachineScheduler();
 186 };
 187
 188 } // end anonymous namespace
 189
 190 char MachineScheduler::ID = 0;
 191
 192 char &llvm::MachineSchedulerID = MachineScheduler::ID;
 193
 194 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
 195                       "Machine Instruction Scheduler", false, false)
 196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 197 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 198 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 199 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 200 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
 201                     "Machine Instruction Scheduler", false, false)
 202
 203 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
 204   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 205 }
 206
 207 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 208   AU.setPreservesCFG();
 209   AU.addRequiredID(MachineDominatorsID);
 210   AU.addRequired<MachineLoopInfo>();
 211   AU.addRequired<AAResultsWrapperPass>();
 212   AU.addRequired<TargetPassConfig>();
 213   AU.addRequired<SlotIndexes>();
 214   AU.addPreserved<SlotIndexes>();
 215   AU.addRequired<LiveIntervals>();
 216   AU.addPreserved<LiveIntervals>();
 217   MachineFunctionPass::getAnalysisUsage(AU);
 218 }
 219
 220 char PostMachineScheduler::ID = 0;
 221
 222 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 223
 224 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
 225                 "PostRA Machine Instruction Scheduler", false, false)
 226
 227 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
 228   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 229 }
 230
 231 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 232   AU.setPreservesCFG();
 233   AU.addRequiredID(MachineDominatorsID);
 234   AU.addRequired<MachineLoopInfo>();
 235   AU.addRequired<TargetPassConfig>();
 236   MachineFunctionPass::getAnalysisUsage(AU);
 237 }
 238
 239 MachinePassRegistry MachineSchedRegistry::Registry;
 240
 241 /// A dummy default scheduler factory indicates whether the scheduler
 242 /// is overridden on the command line.
 243 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 244   return nullptr;
 245 }
 246
 247 /// MachineSchedOpt allows command line selection of the scheduler.
 248 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 249                RegisterPassParser<MachineSchedRegistry>>
 250 MachineSchedOpt("misched",
 251                 cl::init(&useDefaultMachineSched), cl::Hidden,
 252                 cl::desc("Machine instruction scheduler to use"));
 253
 254 static MachineSchedRegistry
 255 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 256                      useDefaultMachineSched);
 257
 258 static cl::opt<bool> EnableMachineSched(
 259     "enable-misched",
 260     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
 261     cl::Hidden);
 262
 263 static cl::opt<bool> EnablePostRAMachineSched(
 264     "enable-post-misched",
 265     cl::desc("Enable the post-ra machine instruction scheduling pass."),
 266     cl::init(true), cl::Hidden);
 267
 268 /// Decrement this iterator until reaching the top or a non-debug instr.
 269 static MachineBasicBlock::const_iterator
 270 priorNonDebug(MachineBasicBlock::const_iterator I,
 271               MachineBasicBlock::const_iterator Beg) {
 272   assert(I != Beg && "reached the top of the region, cannot decrement");
 273   while (--I != Beg) {
 274     if (!I->isDebugValue())
 275       break;
 276   }
 277   return I;
 278 }
 279
 280 /// Non-const version.
 281 static MachineBasicBlock::iterator
 282 priorNonDebug(MachineBasicBlock::iterator I,
 283               MachineBasicBlock::const_iterator Beg) {
 284   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
 285       .getNonConstIterator();
 286 }
 287
 288 /// If this iterator is a debug value, increment until reaching the End or a
 289 /// non-debug instruction.
 290 static MachineBasicBlock::const_iterator
 291 nextIfDebug(MachineBasicBlock::const_iterator I,
 292             MachineBasicBlock::const_iterator End) {
 293   for(; I != End; ++I) {
 294     if (!I->isDebugValue())
 295       break;
 296   }
 297   return I;
 298 }
 299
 300 /// Non-const version.
 301 static MachineBasicBlock::iterator
 302 nextIfDebug(MachineBasicBlock::iterator I,
 303             MachineBasicBlock::const_iterator End) {
 304   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
 305       .getNonConstIterator();
 306 }
 307
 308 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
 309 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
 310   // Select the scheduler, or set the default.
 311   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 312   if (Ctor != useDefaultMachineSched)
 313     return Ctor(this);
 314
 315   // Get the default scheduler set by the target for this function.
 316   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
 317   if (Scheduler)
 318     return Scheduler;
 319
 320   // Default to GenericScheduler.
 321   return createGenericSchedLive(this);
 322 }
 323
 324 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 325 /// the caller. We don't have a command line option to override the postRA
 326 /// scheduler. The Target must configure it.
 327 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 328   // Get the postRA scheduler set by the target for this function.
 329   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
 330   if (Scheduler)
 331     return Scheduler;
 332
 333   // Default to GenericScheduler.
 334   return createGenericSchedPostRA(this);
 335 }
 336
 337 /// Top-level MachineScheduler pass driver.
 338 ///
 339 /// Visit blocks in function order. Divide each block into scheduling regions
 340 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 341 /// consistent with the DAG builder, which traverses the interior of the
 342 /// scheduling regions bottom-up.
 343 ///
 344 /// This design avoids exposing scheduling boundaries to the DAG builder,
 345 /// simplifying the DAG builder's support for "special" target instructions.
 346 /// At the same time the design allows target schedulers to operate across
 347 /// scheduling boundaries, for example to bundle the boudary instructions
 348 /// without reordering them. This creates complexity, because the target
 349 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 350 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 351 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 352 /// general bias against block splitting purely for implementation simplicity.
 353 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 354   if (skipFunction(mf.getFunction()))
 355     return false;
 356
 357   if (EnableMachineSched.getNumOccurrences()) {
 358     if (!EnableMachineSched)
 359       return false;
 360   } else if (!mf.getSubtarget().enableMachineScheduler())
 361     return false;
 362
 363   DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
 364
 365   // Initialize the context of the pass.
 366   MF = &mf;
 367   MLI = &getAnalysis<MachineLoopInfo>();
 368   MDT = &getAnalysis<MachineDominatorTree>();
 369   PassConfig = &getAnalysis<TargetPassConfig>();
 370   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 371
 372   LIS = &getAnalysis<LiveIntervals>();
 373
 374   if (VerifyScheduling) {
 375     DEBUG(LIS->dump());
 376     MF->verify(this, "Before machine scheduling.");
 377   }
 378   RegClassInfo->runOnMachineFunction(*MF);
 379
 380   // Instantiate the selected scheduler for this target, function, and
 381   // optimization level.
 382   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
 383   scheduleRegions(*Scheduler, false);
 384
 385   DEBUG(LIS->dump());
 386   if (VerifyScheduling)
 387     MF->verify(this, "After machine scheduling.");
 388   return true;
 389 }
 390
 391 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 392   if (skipFunction(mf.getFunction()))
 393     return false;
 394
 395   if (EnablePostRAMachineSched.getNumOccurrences()) {
 396     if (!EnablePostRAMachineSched)
 397       return false;
 398   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
 399     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
 400     return false;
 401   }
 402   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 403
 404   // Initialize the context of the pass.
 405   MF = &mf;
 406   MLI = &getAnalysis<MachineLoopInfo>();
 407   PassConfig = &getAnalysis<TargetPassConfig>();
 408
 409   if (VerifyScheduling)
 410     MF->verify(this, "Before post machine scheduling.");
 411
 412   // Instantiate the selected scheduler for this target, function, and
 413   // optimization level.
 414   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
 415   scheduleRegions(*Scheduler, true);
 416
 417   if (VerifyScheduling)
 418     MF->verify(this, "After post machine scheduling.");
 419   return true;
 420 }
 421
 422 /// Return true of the given instruction should not be included in a scheduling
 423 /// region.
 424 ///
 425 /// MachineScheduler does not currently support scheduling across calls. To
 426 /// handle calls, the DAG builder needs to be modified to create register
 427 /// anti/output dependencies on the registers clobbered by the call's regmask
 428 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
 429 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
 430 /// the boundary, but there would be no benefit to postRA scheduling across
 431 /// calls this late anyway.
 432 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
 433                             MachineBasicBlock *MBB,
 434                             MachineFunction *MF,
 435                             const TargetInstrInfo *TII) {
 436   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
 437 }
 438
 439 /// A region of an MBB for scheduling.
 440 namespace {
 441 struct SchedRegion {
 442   /// RegionBegin is the first instruction in the scheduling region, and
 443   /// RegionEnd is either MBB->end() or the scheduling boundary after the
 444   /// last instruction in the scheduling region. These iterators cannot refer
 445   /// to instructions outside of the identified scheduling region because
 446   /// those may be reordered before scheduling this region.
 447   MachineBasicBlock::iterator RegionBegin;
 448   MachineBasicBlock::iterator RegionEnd;
 449   unsigned NumRegionInstrs;
 450
 451   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
 452               unsigned N) :
 453     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
 454 };
 455 } // end anonymous namespace
 456
 457 using MBBRegionsVector = SmallVector<SchedRegion, 16>;
 458
 459 static void
 460 getSchedRegions(MachineBasicBlock *MBB,
 461                 MBBRegionsVector &Regions,
 462                 bool RegionsTopDown) {
 463   MachineFunction *MF = MBB->getParent();
 464   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 465
 466   MachineBasicBlock::iterator I = nullptr;
 467   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 468       RegionEnd != MBB->begin(); RegionEnd = I) {
 469
 470     // Avoid decrementing RegionEnd for blocks with no terminator.
 471     if (RegionEnd != MBB->end() ||
 472         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
 473       --RegionEnd;
 474     }
 475
 476     // The next region starts above the previous region. Look backward in the
 477     // instruction stream until we find the nearest boundary.
 478     unsigned NumRegionInstrs = 0;
 479     I = RegionEnd;
 480     for (;I != MBB->begin(); --I) {
 481       MachineInstr &MI = *std::prev(I);
 482       if (isSchedBoundary(&MI, &*MBB, MF, TII))
 483         break;
 484       if (!MI.isDebugValue())
 485         // MBB::size() uses instr_iterator to count. Here we need a bundle to
 486         // count as a single instruction.
 487         ++NumRegionInstrs;
 488     }
 489
 490     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
 491   }
 492
 493   if (RegionsTopDown)
 494     std::reverse(Regions.begin(), Regions.end());
 495 }
 496
 497 /// Main driver for both MachineScheduler and PostMachineScheduler.
 498 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
 499                                            bool FixKillFlags) {
 500   // Visit all machine basic blocks.
 501   //
 502   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 503   // loop tree. Then we can optionally compute global RegPressure.
 504   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 505        MBB != MBBEnd; ++MBB) {
 506
 507     Scheduler.startBlock(&*MBB);
 508
 509 #ifndef NDEBUG
 510     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
 511       continue;
 512     if (SchedOnlyBlock.getNumOccurrences()
 513         && (int)SchedOnlyBlock != MBB->getNumber())
 514       continue;
 515 #endif
 516
 517     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
 518     // points to the scheduling boundary at the bottom of the region. The DAG
 519     // does not include RegionEnd, but the region does (i.e. the next
 520     // RegionEnd is above the previous RegionBegin). If the current block has
 521     // no terminator then RegionEnd == MBB->end() for the bottom region.
 522     //
 523     // All the regions of MBB are first found and stored in MBBRegions, which
 524     // will be processed (MBB) top-down if initialized with true.
 525     //
 526     // The Scheduler may insert instructions during either schedule() or
 527     // exitRegion(), even for empty regions. So the local iterators 'I' and
 528     // 'RegionEnd' are invalid across these calls. Instructions must not be
 529     // added to other regions than the current one without updating MBBRegions.
 530
 531     MBBRegionsVector MBBRegions;
 532     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
 533     for (MBBRegionsVector::iterator R = MBBRegions.begin();
 534          R != MBBRegions.end(); ++R) {
 535       MachineBasicBlock::iterator I = R->RegionBegin;
 536       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
 537       unsigned NumRegionInstrs = R->NumRegionInstrs;
 538
 539       // Notify the scheduler of the region, even if we may skip scheduling
 540       // it. Perhaps it still needs to be bundled.
 541       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
 542
 543       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 544       if (I == RegionEnd || I == std::prev(RegionEnd)) {
 545         // Close the current region. Bundle the terminator if needed.
 546         // This invalidates 'RegionEnd' and 'I'.
 547         Scheduler.exitRegion();
 548         continue;
 549       }
 550       DEBUG(dbgs() << "********** MI Scheduling **********\n");
 551       DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
 552                    << MBB->getName() << "\n  From: " << *I << "    To: ";
 553             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 554             else dbgs() << "End";
 555             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
 556       if (DumpCriticalPathLength) {
 557         errs() << MF->getName();
 558         errs() << ":%bb. " << MBB->getNumber();
 559         errs() << " " << MBB->getName() << " \n";
 560       }
 561
 562       // Schedule a region: possibly reorder instructions.
 563       // This invalidates the original region iterators.
 564       Scheduler.schedule();
 565
 566       // Close the current region.
 567       Scheduler.exitRegion();
 568     }
 569     Scheduler.finishBlock();
 570     // FIXME: Ideally, no further passes should rely on kill flags. However,
 571     // thumb2 size reduction is currently an exception, so the PostMIScheduler
 572     // needs to do this.
 573     if (FixKillFlags)
 574       Scheduler.fixupKills(*MBB);
 575   }
 576   Scheduler.finalizeSchedule();
 577 }
 578
 579 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 580   // unimplemented
 581 }
 582
 583 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 584 LLVM_DUMP_METHOD void ReadyQueue::dump() const {
 585   dbgs() << "Queue " << Name << ": ";
 586   for (const SUnit *SU : Queue)
 587     dbgs() << SU->NodeNum << " ";
 588   dbgs() << "\n";
 589 }
 590 #endif
 591
 592 //===----------------------------------------------------------------------===//
 593 // ScheduleDAGMI - Basic machine instruction scheduling. This is
 594 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
 595 // virtual registers.
 596 // ===----------------------------------------------------------------------===/
 597
 598 // Provide a vtable anchor.
 599 ScheduleDAGMI::~ScheduleDAGMI() = default;
 600
 601 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
 602   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
 603 }
 604
 605 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 606   if (SuccSU != &ExitSU) {
 607     // Do not use WillCreateCycle, it assumes SD scheduling.
 608     // If Pred is reachable from Succ, then the edge creates a cycle.
 609     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 610       return false;
 611     Topo.AddPred(SuccSU, PredDep.getSUnit());
 612   }
 613   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 614   // Return true regardless of whether a new edge needed to be inserted.
 615   return true;
 616 }
 617
 618 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 619 /// NumPredsLeft reaches zero, release the successor node.
 620 ///
 621 /// FIXME: Adjust SuccSU height based on MinLatency.
 622 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 623   SUnit *SuccSU = SuccEdge->getSUnit();
 624
 625   if (SuccEdge->isWeak()) {
 626     --SuccSU->WeakPredsLeft;
 627     if (SuccEdge->isCluster())
 628       NextClusterSucc = SuccSU;
 629     return;
 630   }
 631 #ifndef NDEBUG
 632   if (SuccSU->NumPredsLeft == 0) {
 633     dbgs() << "*** Scheduling failed! ***\n";
 634     SuccSU->dump(this);
 635     dbgs() << " has been released too many times!\n";
 636     llvm_unreachable(nullptr);
 637   }
 638 #endif
 639   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
 640   // CurrCycle may have advanced since then.
 641   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
 642     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
 643
 644   --SuccSU->NumPredsLeft;
 645   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 646     SchedImpl->releaseTopNode(SuccSU);
 647 }
 648
 649 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 650 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 651   for (SDep &Succ : SU->Succs)
 652     releaseSucc(SU, &Succ);
 653 }
 654
 655 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 656 /// NumSuccsLeft reaches zero, release the predecessor node.
 657 ///
 658 /// FIXME: Adjust PredSU height based on MinLatency.
 659 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 660   SUnit *PredSU = PredEdge->getSUnit();
 661
 662   if (PredEdge->isWeak()) {
 663     --PredSU->WeakSuccsLeft;
 664     if (PredEdge->isCluster())
 665       NextClusterPred = PredSU;
 666     return;
 667   }
 668 #ifndef NDEBUG
 669   if (PredSU->NumSuccsLeft == 0) {
 670     dbgs() << "*** Scheduling failed! ***\n";
 671     PredSU->dump(this);
 672     dbgs() << " has been released too many times!\n";
 673     llvm_unreachable(nullptr);
 674   }
 675 #endif
 676   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
 677   // CurrCycle may have advanced since then.
 678   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
 679     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
 680
 681   --PredSU->NumSuccsLeft;
 682   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 683     SchedImpl->releaseBottomNode(PredSU);
 684 }
 685
 686 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 687 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 688   for (SDep &Pred : SU->Preds)
 689     releasePred(SU, &Pred);
 690 }
 691
 692 void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
 693   ScheduleDAGInstrs::startBlock(bb);
 694   SchedImpl->enterMBB(bb);
 695 }
 696
 697 void ScheduleDAGMI::finishBlock() {
 698   SchedImpl->leaveMBB();
 699   ScheduleDAGInstrs::finishBlock();
 700 }
 701
 702 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 703 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 704 /// the region, including the boundary itself and single-instruction regions
 705 /// that don't get scheduled.
 706 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 707                                      MachineBasicBlock::iterator begin,
 708                                      MachineBasicBlock::iterator end,
 709                                      unsigned regioninstrs)
 710 {
 711   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
 712
 713   SchedImpl->initPolicy(begin, end, regioninstrs);
 714 }
 715
 716 /// This is normally called from the main scheduler loop but may also be invoked
 717 /// by the scheduling strategy to perform additional code motion.
 718 void ScheduleDAGMI::moveInstruction(
 719   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
 720   // Advance RegionBegin if the first instruction moves down.
 721   if (&*RegionBegin == MI)
 722     ++RegionBegin;
 723
 724   // Update the instruction stream.
 725   BB->splice(InsertPos, BB, MI);
 726
 727   // Update LiveIntervals
 728   if (LIS)
 729     LIS->handleMove(*MI, /*UpdateFlags=*/true);
 730
 731   // Recede RegionBegin if an instruction moves above the first.
 732   if (RegionBegin == InsertPos)
 733     RegionBegin = MI;
 734 }
 735
 736 bool ScheduleDAGMI::checkSchedLimit() {
 737 #ifndef NDEBUG
 738   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 739     CurrentTop = CurrentBottom;
 740     return false;
 741   }
 742   ++NumInstrsScheduled;
 743 #endif
 744   return true;
 745 }
 746
 747 /// Per-region scheduling driver, called back from
 748 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
 749 /// does not consider liveness or register pressure. It is useful for PostRA
 750 /// scheduling and potentially other custom schedulers.
 751 void ScheduleDAGMI::schedule() {
 752   DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
 753   DEBUG(SchedImpl->dumpPolicy());
 754
 755   // Build the DAG.
 756   buildSchedGraph(AA);
 757
 758   Topo.InitDAGTopologicalSorting();
 759
 760   postprocessDAG();
 761
 762   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 763   findRootsAndBiasEdges(TopRoots, BotRoots);
 764
 765   // Initialize the strategy before modifying the DAG.
 766   // This may initialize a DFSResult to be used for queue priority.
 767   SchedImpl->initialize(this);
 768
 769   DEBUG(
 770     if (EntrySU.getInstr() != nullptr)
 771       EntrySU.dumpAll(this);
 772     for (const SUnit &SU : SUnits)
 773       SU.dumpAll(this);
 774     if (ExitSU.getInstr() != nullptr)
 775       ExitSU.dumpAll(this);
 776   );
 777   if (ViewMISchedDAGs) viewGraph();
 778
 779   // Initialize ready queues now that the DAG and priority data are finalized.
 780   initQueues(TopRoots, BotRoots);
 781
 782   bool IsTopNode = false;
 783   while (true) {
 784     DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
 785     SUnit *SU = SchedImpl->pickNode(IsTopNode);
 786     if (!SU) break;
 787
 788     assert(!SU->isScheduled && "Node already scheduled");
 789     if (!checkSchedLimit())
 790       break;
 791
 792     MachineInstr *MI = SU->getInstr();
 793     if (IsTopNode) {
 794       assert(SU->isTopReady() && "node still has unscheduled dependencies");
 795       if (&*CurrentTop == MI)
 796         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 797       else
 798         moveInstruction(MI, CurrentTop);
 799     } else {
 800       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 801       MachineBasicBlock::iterator priorII =
 802         priorNonDebug(CurrentBottom, CurrentTop);
 803       if (&*priorII == MI)
 804         CurrentBottom = priorII;
 805       else {
 806         if (&*CurrentTop == MI)
 807           CurrentTop = nextIfDebug(++CurrentTop, priorII);
 808         moveInstruction(MI, CurrentBottom);
 809         CurrentBottom = MI;
 810       }
 811     }
 812     // Notify the scheduling strategy before updating the DAG.
 813     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
 814     // runs, it can then use the accurate ReadyCycle time to determine whether
 815     // newly released nodes can move to the readyQ.
 816     SchedImpl->schedNode(SU, IsTopNode);
 817
 818     updateQueues(SU, IsTopNode);
 819   }
 820   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 821
 822   placeDebugValues();
 823
 824   DEBUG({
 825     dbgs() << "*** Final schedule for "
 826            << printMBBReference(*begin()->getParent()) << " ***\n";
 827     dumpSchedule();
 828     dbgs() << '\n';
 829   });
 830 }
 831
 832 /// Apply each ScheduleDAGMutation step in order.
 833 void ScheduleDAGMI::postprocessDAG() {
 834   for (auto &m : Mutations)
 835     m->apply(this);
 836 }
 837
 838 void ScheduleDAGMI::
 839 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 840                       SmallVectorImpl<SUnit*> &BotRoots) {
 841   for (SUnit &SU : SUnits) {
 842     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
 843
 844     // Order predecessors so DFSResult follows the critical path.
 845     SU.biasCriticalPath();
 846
 847     // A SUnit is ready to top schedule if it has no predecessors.
 848     if (!SU.NumPredsLeft)
 849       TopRoots.push_back(&SU);
 850     // A SUnit is ready to bottom schedule if it has no successors.
 851     if (!SU.NumSuccsLeft)
 852       BotRoots.push_back(&SU);
 853   }
 854   ExitSU.biasCriticalPath();
 855 }
 856
 857 /// Identify DAG roots and setup scheduler queues.
 858 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 859                                ArrayRef<SUnit*> BotRoots) {
 860   NextClusterSucc = nullptr;
 861   NextClusterPred = nullptr;
 862
 863   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 864   //
 865   // Nodes with unreleased weak edges can still be roots.
 866   // Release top roots in forward order.
 867   for (SUnit *SU : TopRoots)
 868     SchedImpl->releaseTopNode(SU);
 869
 870   // Release bottom roots in reverse order so the higher priority nodes appear
 871   // first. This is more natural and slightly more efficient.
 872   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 873          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 874     SchedImpl->releaseBottomNode(*I);
 875   }
 876
 877   releaseSuccessors(&EntrySU);
 878   releasePredecessors(&ExitSU);
 879
 880   SchedImpl->registerRoots();
 881
 882   // Advance past initial DebugValues.
 883   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 884   CurrentBottom = RegionEnd;
 885 }
 886
 887 /// Update scheduler queues after scheduling an instruction.
 888 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 889   // Release dependent instructions for scheduling.
 890   if (IsTopNode)
 891     releaseSuccessors(SU);
 892   else
 893     releasePredecessors(SU);
 894
 895   SU->isScheduled = true;
 896 }
 897
 898 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 899 void ScheduleDAGMI::placeDebugValues() {
 900   // If first instruction was a DBG_VALUE then put it back.
 901   if (FirstDbgValue) {
 902     BB->splice(RegionBegin, BB, FirstDbgValue);
 903     RegionBegin = FirstDbgValue;
 904   }
 905
 906   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
 907          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 908     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
 909     MachineInstr *DbgValue = P.first;
 910     MachineBasicBlock::iterator OrigPrevMI = P.second;
 911     if (&*RegionBegin == DbgValue)
 912       ++RegionBegin;
 913     BB->splice(++OrigPrevMI, BB, DbgValue);
 914     if (OrigPrevMI == std::prev(RegionEnd))
 915       RegionEnd = DbgValue;
 916   }
 917   DbgValues.clear();
 918   FirstDbgValue = nullptr;
 919 }
 920
 921 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 922 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
 923   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 924     if (SUnit *SU = getSUnit(&(*MI)))
 925       SU->dump(this);
 926     else
 927       dbgs() << "Missing SUnit\n";
 928   }
 929 }
 930 #endif
 931
 932 //===----------------------------------------------------------------------===//
 933 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
 934 // preservation.
 935 //===----------------------------------------------------------------------===//
 936
 937 ScheduleDAGMILive::~ScheduleDAGMILive() {
 938   delete DFSResult;
 939 }
 940
 941 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
 942   const MachineInstr &MI = *SU.getInstr();
 943   for (const MachineOperand &MO : MI.operands()) {
 944     if (!MO.isReg())
 945       continue;
 946     if (!MO.readsReg())
 947       continue;
 948     if (TrackLaneMasks && !MO.isUse())
 949       continue;
 950
 951     unsigned Reg = MO.getReg();
 952     if (!TargetRegisterInfo::isVirtualRegister(Reg))
 953       continue;
 954
 955     // Ignore re-defs.
 956     if (TrackLaneMasks) {
 957       bool FoundDef = false;
 958       for (const MachineOperand &MO2 : MI.operands()) {
 959         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
 960           FoundDef = true;
 961           break;
 962         }
 963       }
 964       if (FoundDef)
 965         continue;
 966     }
 967
 968     // Record this local VReg use.
 969     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
 970     for (; UI != VRegUses.end(); ++UI) {
 971       if (UI->SU == &SU)
 972         break;
 973     }
 974     if (UI == VRegUses.end())
 975       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
 976   }
 977 }
 978
 979 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 980 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 981 /// the region, including the boundary itself and single-instruction regions
 982 /// that don't get scheduled.
 983 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 984                                 MachineBasicBlock::iterator begin,
 985                                 MachineBasicBlock::iterator end,
 986                                 unsigned regioninstrs)
 987 {
 988   // ScheduleDAGMI initializes SchedImpl's per-region policy.
 989   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 990
 991   // For convenience remember the end of the liveness region.
 992   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
 993
 994   SUPressureDiffs.clear();
 995
 996   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 997   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
 998
 999   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1000          "ShouldTrackLaneMasks requires ShouldTrackPressure");
1001 }
1002
1003 // Setup the register pressure trackers for the top scheduled top and bottom
1004 // scheduled regions.
1005 void ScheduleDAGMILive::initRegPressure() {
1006   VRegUses.clear();
1007   VRegUses.setUniverse(MRI.getNumVirtRegs());
1008   for (SUnit &SU : SUnits)
1009     collectVRegUses(SU);
1010
1011   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1012                     ShouldTrackLaneMasks, false);
1013   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1014                     ShouldTrackLaneMasks, false);
1015
1016   // Close the RPTracker to finalize live ins.
1017   RPTracker.closeRegion();
1018
1019   DEBUG(RPTracker.dump());
1020
1021   // Initialize the live ins and live outs.
1022   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1023   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1024
1025   // Close one end of the tracker so we can call
1026   // getMaxUpward/DownwardPressureDelta before advancing across any
1027   // instructions. This converts currently live regs into live ins/outs.
1028   TopRPTracker.closeTop();
1029   BotRPTracker.closeBottom();
1030
1031   BotRPTracker.initLiveThru(RPTracker);
1032   if (!BotRPTracker.getLiveThru().empty()) {
1033     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1034     DEBUG(dbgs() << "Live Thru: ";
1035           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1036   };
1037
1038   // For each live out vreg reduce the pressure change associated with other
1039   // uses of the same vreg below the live-out reaching def.
1040   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1041
1042   // Account for liveness generated by the region boundary.
1043   if (LiveRegionEnd != RegionEnd) {
1044     SmallVector<RegisterMaskPair, 8> LiveUses;
1045     BotRPTracker.recede(&LiveUses);
1046     updatePressureDiffs(LiveUses);
1047   }
1048
1049   DEBUG(
1050     dbgs() << "Top Pressure:\n";
1051     dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1052     dbgs() << "Bottom Pressure:\n";
1053     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1054   );
1055
1056   assert((BotRPTracker.getPos() == RegionEnd ||
1057           (RegionEnd->isDebugValue() &&
1058            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1059          "Can't find the region bottom");
1060
1061   // Cache the list of excess pressure sets in this region. This will also track
1062   // the max pressure in the scheduled code for these sets.
1063   RegionCriticalPSets.clear();
1064   const std::vector<unsigned> &RegionPressure =
1065     RPTracker.getPressure().MaxSetPressure;
1066   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1067     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1068     if (RegionPressure[i] > Limit) {
1069       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1070             << " Limit " << Limit
1071             << " Actual " << RegionPressure[i] << "\n");
1072       RegionCriticalPSets.push_back(PressureChange(i));
1073     }
1074   }
1075   DEBUG(dbgs() << "Excess PSets: ";
1076         for (const PressureChange &RCPS : RegionCriticalPSets)
1077           dbgs() << TRI->getRegPressureSetName(
1078             RCPS.getPSet()) << " ";
1079         dbgs() << "\n");
1080 }
1081
1082 void ScheduleDAGMILive::
1083 updateScheduledPressure(const SUnit *SU,
1084                         const std::vector<unsigned> &NewMaxPressure) {
1085   const PressureDiff &PDiff = getPressureDiff(SU);
1086   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1087   for (const PressureChange &PC : PDiff) {
1088     if (!PC.isValid())
1089       break;
1090     unsigned ID = PC.getPSet();
1091     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1092       ++CritIdx;
1093     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1094       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1095           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1096         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1097     }
1098     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1099     if (NewMaxPressure[ID] >= Limit - 2) {
1100       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1101             << NewMaxPressure[ID]
1102             << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1103             << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1104     }
1105   }
1106 }
1107
1108 /// Update the PressureDiff array for liveness after scheduling this
1109 /// instruction.
1110 void ScheduleDAGMILive::updatePressureDiffs(
1111     ArrayRef<RegisterMaskPair> LiveUses) {
1112   for (const RegisterMaskPair &P : LiveUses) {
1113     unsigned Reg = P.RegUnit;
1114     /// FIXME: Currently assuming single-use physregs.
1115     if (!TRI->isVirtualRegister(Reg))
1116       continue;
1117
1118     if (ShouldTrackLaneMasks) {
1119       // If the register has just become live then other uses won't change
1120       // this fact anymore => decrement pressure.
1121       // If the register has just become dead then other uses make it come
1122       // back to life => increment pressure.
1123       bool Decrement = P.LaneMask.any();
1124
1125       for (const VReg2SUnit &V2SU
1126            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1127         SUnit &SU = *V2SU.SU;
1128         if (SU.isScheduled || &SU == &ExitSU)
1129           continue;
1130
1131         PressureDiff &PDiff = getPressureDiff(&SU);
1132         PDiff.addPressureChange(Reg, Decrement, &MRI);
1133         DEBUG(
1134           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1135                  << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1136                  << ' ' << *SU.getInstr();
1137           dbgs() << "              to ";
1138           PDiff.dump(*TRI);
1139         );
1140       }
1141     } else {
1142       assert(P.LaneMask.any());
1143       DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1144       // This may be called before CurrentBottom has been initialized. However,
1145       // BotRPTracker must have a valid position. We want the value live into the
1146       // instruction or live out of the block, so ask for the previous
1147       // instruction's live-out.
1148       const LiveInterval &LI = LIS->getInterval(Reg);
1149       VNInfo *VNI;
1150       MachineBasicBlock::const_iterator I =
1151         nextIfDebug(BotRPTracker.getPos(), BB->end());
1152       if (I == BB->end())
1153         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1154       else {
1155         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1156         VNI = LRQ.valueIn();
1157       }
1158       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1159       assert(VNI && "No live value at use.");
1160       for (const VReg2SUnit &V2SU
1161            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1162         SUnit *SU = V2SU.SU;
1163         // If this use comes before the reaching def, it cannot be a last use,
1164         // so decrease its pressure change.
1165         if (!SU->isScheduled && SU != &ExitSU) {
1166           LiveQueryResult LRQ =
1167               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1168           if (LRQ.valueIn() == VNI) {
1169             PressureDiff &PDiff = getPressureDiff(SU);
1170             PDiff.addPressureChange(Reg, true, &MRI);
1171             DEBUG(
1172               dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1173                      << *SU->getInstr();
1174               dbgs() << "              to ";
1175               PDiff.dump(*TRI);
1176             );
1177           }
1178         }
1179       }
1180     }
1181   }
1182 }
1183
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1194 void ScheduleDAGMILive::schedule() {
1195   DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196   DEBUG(SchedImpl->dumpPolicy());
1197   buildDAGWithRegPressure();
1198
1199   Topo.InitDAGTopologicalSorting();
1200
1201   postprocessDAG();
1202
1203   SmallVector<SUnit*, 8> TopRoots, BotRoots;
1204   findRootsAndBiasEdges(TopRoots, BotRoots);
1205
1206   // Initialize the strategy before modifying the DAG.
1207   // This may initialize a DFSResult to be used for queue priority.
1208   SchedImpl->initialize(this);
1209
1210   DEBUG(
1211     if (EntrySU.getInstr() != nullptr)
1212       EntrySU.dumpAll(this);
1213     for (const SUnit &SU : SUnits) {
1214       SU.dumpAll(this);
1215       if (ShouldTrackPressure) {
1216         dbgs() << "  Pressure Diff      : ";
1217         getPressureDiff(&SU).dump(*TRI);
1218       }
1219       dbgs() << "  Single Issue       : ";
1220       if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1221          SchedModel.mustEndGroup(SU.getInstr()))
1222         dbgs() << "true;";
1223       else
1224         dbgs() << "false;";
1225       dbgs() << '\n';
1226     }
1227     if (ExitSU.getInstr() != nullptr)
1228       ExitSU.dumpAll(this);
1229   );
1230   if (ViewMISchedDAGs) viewGraph();
1231
1232   // Initialize ready queues now that the DAG and priority data are finalized.
1233   initQueues(TopRoots, BotRoots);
1234
1235   bool IsTopNode = false;
1236   while (true) {
1237     DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1238     SUnit *SU = SchedImpl->pickNode(IsTopNode);
1239     if (!SU) break;
1240
1241     assert(!SU->isScheduled && "Node already scheduled");
1242     if (!checkSchedLimit())
1243       break;
1244
1245     scheduleMI(SU, IsTopNode);
1246
1247     if (DFSResult) {
1248       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1249       if (!ScheduledTrees.test(SubtreeID)) {
1250         ScheduledTrees.set(SubtreeID);
1251         DFSResult->scheduleTree(SubtreeID);
1252         SchedImpl->scheduleTree(SubtreeID);
1253       }
1254     }
1255
1256     // Notify the scheduling strategy after updating the DAG.
1257     SchedImpl->schedNode(SU, IsTopNode);
1258
1259     updateQueues(SU, IsTopNode);
1260   }
1261   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1262
1263   placeDebugValues();
1264
1265   DEBUG({
1266     dbgs() << "*** Final schedule for "
1267            << printMBBReference(*begin()->getParent()) << " ***\n";
1268     dumpSchedule();
1269     dbgs() << '\n';
1270   });
1271 }
1272
1273 /// Build the DAG and setup three register pressure trackers.
1274 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1275   if (!ShouldTrackPressure) {
1276     RPTracker.reset();
1277     RegionCriticalPSets.clear();
1278     buildSchedGraph(AA);
1279     return;
1280   }
1281
1282   // Initialize the register pressure tracker used by buildSchedGraph.
1283   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1284                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1285
1286   // Account for liveness generate by the region boundary.
1287   if (LiveRegionEnd != RegionEnd)
1288     RPTracker.recede();
1289
1290   // Build the DAG, and compute current register pressure.
1291   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1292
1293   // Initialize top/bottom trackers after computing region pressure.
1294   initRegPressure();
1295 }
1296
1297 void ScheduleDAGMILive::computeDFSResult() {
1298   if (!DFSResult)
1299     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1300   DFSResult->clear();
1301   ScheduledTrees.clear();
1302   DFSResult->resize(SUnits.size());
1303   DFSResult->compute(SUnits);
1304   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1305 }
1306
1307 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1308 /// only provides the critical path for single block loops. To handle loops that
1309 /// span blocks, we could use the vreg path latencies provided by
1310 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1311 /// available for use in the scheduler.
1312 ///
1313 /// The cyclic path estimation identifies a def-use pair that crosses the back
1314 /// edge and considers the depth and height of the nodes. For example, consider
1315 /// the following instruction sequence where each instruction has unit latency
1316 /// and defines an epomymous virtual register:
1317 ///
1318 /// a->b(a,c)->c(b)->d(c)->exit
1319 ///
1320 /// The cyclic critical path is a two cycles: b->c->b
1321 /// The acyclic critical path is four cycles: a->b->c->d->exit
1322 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1323 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1324 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1325 /// LiveInDepth = depth(b) = len(a->b) = 1
1326 ///
1327 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1328 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1329 /// CyclicCriticalPath = min(2, 2) = 2
1330 ///
1331 /// This could be relevant to PostRA scheduling, but is currently implemented
1332 /// assuming LiveIntervals.
1333 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1334   // This only applies to single block loop.
1335   if (!BB->isSuccessor(BB))
1336     return 0;
1337
1338   unsigned MaxCyclicLatency = 0;
1339   // Visit each live out vreg def to find def/use pairs that cross iterations.
1340   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1341     unsigned Reg = P.RegUnit;
1342     if (!TRI->isVirtualRegister(Reg))
1343         continue;
1344     const LiveInterval &LI = LIS->getInterval(Reg);
1345     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1346     if (!DefVNI)
1347       continue;
1348
1349     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1350     const SUnit *DefSU = getSUnit(DefMI);
1351     if (!DefSU)
1352       continue;
1353
1354     unsigned LiveOutHeight = DefSU->getHeight();
1355     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1356     // Visit all local users of the vreg def.
1357     for (const VReg2SUnit &V2SU
1358          : make_range(VRegUses.find(Reg), VRegUses.end())) {
1359       SUnit *SU = V2SU.SU;
1360       if (SU == &ExitSU)
1361         continue;
1362
1363       // Only consider uses of the phi.
1364       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1365       if (!LRQ.valueIn()->isPHIDef())
1366         continue;
1367
1368       // Assume that a path spanning two iterations is a cycle, which could
1369       // overestimate in strange cases. This allows cyclic latency to be
1370       // estimated as the minimum slack of the vreg's depth or height.
1371       unsigned CyclicLatency = 0;
1372       if (LiveOutDepth > SU->getDepth())
1373         CyclicLatency = LiveOutDepth - SU->getDepth();
1374
1375       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1376       if (LiveInHeight > LiveOutHeight) {
1377         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1378           CyclicLatency = LiveInHeight - LiveOutHeight;
1379       } else
1380         CyclicLatency = 0;
1381
1382       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1383             << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1384       if (CyclicLatency > MaxCyclicLatency)
1385         MaxCyclicLatency = CyclicLatency;
1386     }
1387   }
1388   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1389   return MaxCyclicLatency;
1390 }
1391
1392 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1393 /// the Top RP tracker in case the region beginning has changed.
1394 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1395                                    ArrayRef<SUnit*> BotRoots) {
1396   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1397   if (ShouldTrackPressure) {
1398     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1399     TopRPTracker.setPos(CurrentTop);
1400   }
1401 }
1402
1403 /// Move an instruction and update register pressure.
1404 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1405   // Move the instruction to its new location in the instruction stream.
1406   MachineInstr *MI = SU->getInstr();
1407
1408   if (IsTopNode) {
1409     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1410     if (&*CurrentTop == MI)
1411       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1412     else {
1413       moveInstruction(MI, CurrentTop);
1414       TopRPTracker.setPos(MI);
1415     }
1416
1417     if (ShouldTrackPressure) {
1418       // Update top scheduled pressure.
1419       RegisterOperands RegOpers;
1420       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1421       if (ShouldTrackLaneMasks) {
1422         // Adjust liveness and add missing dead+read-undef flags.
1423         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1424         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1425       } else {
1426         // Adjust for missing dead-def flags.
1427         RegOpers.detectDeadDefs(*MI, *LIS);
1428       }
1429
1430       TopRPTracker.advance(RegOpers);
1431       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1432       DEBUG(
1433         dbgs() << "Top Pressure:\n";
1434         dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1435       );
1436
1437       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1438     }
1439   } else {
1440     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1441     MachineBasicBlock::iterator priorII =
1442       priorNonDebug(CurrentBottom, CurrentTop);
1443     if (&*priorII == MI)
1444       CurrentBottom = priorII;
1445     else {
1446       if (&*CurrentTop == MI) {
1447         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1448         TopRPTracker.setPos(CurrentTop);
1449       }
1450       moveInstruction(MI, CurrentBottom);
1451       CurrentBottom = MI;
1452     }
1453     if (ShouldTrackPressure) {
1454       RegisterOperands RegOpers;
1455       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1456       if (ShouldTrackLaneMasks) {
1457         // Adjust liveness and add missing dead+read-undef flags.
1458         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1459         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1460       } else {
1461         // Adjust for missing dead-def flags.
1462         RegOpers.detectDeadDefs(*MI, *LIS);
1463       }
1464
1465       if (BotRPTracker.getPos() != CurrentBottom)
1466         BotRPTracker.recedeSkipDebugValues();
1467       SmallVector<RegisterMaskPair, 8> LiveUses;
1468       BotRPTracker.recede(RegOpers, &LiveUses);
1469       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1470       DEBUG(
1471         dbgs() << "Bottom Pressure:\n";
1472         dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1473       );
1474
1475       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1476       updatePressureDiffs(LiveUses);
1477     }
1478   }
1479 }
1480
1481 //===----------------------------------------------------------------------===//
1482 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1483 //===----------------------------------------------------------------------===//
1484
1485 namespace {
1486
1487 /// \brief Post-process the DAG to create cluster edges between neighboring
1488 /// loads or between neighboring stores.
1489 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1490   struct MemOpInfo {
1491     SUnit *SU;
1492     unsigned BaseReg;
1493     int64_t Offset;
1494
1495     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1496         : SU(su), BaseReg(reg), Offset(ofs) {}
1497
1498     bool operator<(const MemOpInfo&RHS) const {
1499       return std::tie(BaseReg, Offset, SU->NodeNum) <
1500              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1501     }
1502   };
1503
1504   const TargetInstrInfo *TII;
1505   const TargetRegisterInfo *TRI;
1506   bool IsLoad;
1507
1508 public:
1509   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1510                            const TargetRegisterInfo *tri, bool IsLoad)
1511       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1512
1513   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1514
1515 protected:
1516   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1517 };
1518
1519 class StoreClusterMutation : public BaseMemOpClusterMutation {
1520 public:
1521   StoreClusterMutation(const TargetInstrInfo *tii,
1522                        const TargetRegisterInfo *tri)
1523       : BaseMemOpClusterMutation(tii, tri, false) {}
1524 };
1525
1526 class LoadClusterMutation : public BaseMemOpClusterMutation {
1527 public:
1528   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1529       : BaseMemOpClusterMutation(tii, tri, true) {}
1530 };
1531
1532 } // end anonymous namespace
1533
1534 namespace llvm {
1535
1536 std::unique_ptr<ScheduleDAGMutation>
1537 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1538                              const TargetRegisterInfo *TRI) {
1539   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1540                             : nullptr;
1541 }
1542
1543 std::unique_ptr<ScheduleDAGMutation>
1544 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1545                               const TargetRegisterInfo *TRI) {
1546   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1547                             : nullptr;
1548 }
1549
1550 } // end namespace llvm
1551
1552 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1553     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1554   SmallVector<MemOpInfo, 32> MemOpRecords;
1555   for (SUnit *SU : MemOps) {
1556     unsigned BaseReg;
1557     int64_t Offset;
1558     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1559       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1560   }
1561   if (MemOpRecords.size() < 2)
1562     return;
1563
1564   std::sort(MemOpRecords.begin(), MemOpRecords.end());
1565   unsigned ClusterLength = 1;
1566   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1567     SUnit *SUa = MemOpRecords[Idx].SU;
1568     SUnit *SUb = MemOpRecords[Idx+1].SU;
1569     if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1570                                  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1571                                  ClusterLength) &&
1572         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1573       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1574             << SUb->NodeNum << ")\n");
1575       // Copy successor edges from SUa to SUb. Interleaving computation
1576       // dependent on SUa can prevent load combining due to register reuse.
1577       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1578       // loads should have effectively the same inputs.
1579       for (const SDep &Succ : SUa->Succs) {
1580         if (Succ.getSUnit() == SUb)
1581           continue;
1582         DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
1583         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1584       }
1585       ++ClusterLength;
1586     } else
1587       ClusterLength = 1;
1588   }
1589 }
1590
1591 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1592 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
1593   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1594
1595   // Map DAG NodeNum to store chain ID.
1596   DenseMap<unsigned, unsigned> StoreChainIDs;
1597   // Map each store chain to a set of dependent MemOps.
1598   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1599   for (SUnit &SU : DAG->SUnits) {
1600     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1601         (!IsLoad && !SU.getInstr()->mayStore()))
1602       continue;
1603
1604     unsigned ChainPredID = DAG->SUnits.size();
1605     for (const SDep &Pred : SU.Preds) {
1606       if (Pred.isCtrl()) {
1607         ChainPredID = Pred.getSUnit()->NodeNum;
1608         break;
1609       }
1610     }
1611     // Check if this chain-like pred has been seen
1612     // before. ChainPredID==MaxNodeID at the top of the schedule.
1613     unsigned NumChains = StoreChainDependents.size();
1614     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1615       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1616     if (Result.second)
1617       StoreChainDependents.resize(NumChains + 1);
1618     StoreChainDependents[Result.first->second].push_back(&SU);
1619   }
1620
1621   // Iterate over the store chains.
1622   for (auto &SCD : StoreChainDependents)
1623     clusterNeighboringMemOps(SCD, DAG);
1624 }
1625
1626 //===----------------------------------------------------------------------===//
1627 // CopyConstrain - DAG post-processing to encourage copy elimination.
1628 //===----------------------------------------------------------------------===//
1629
1630 namespace {
1631
1632 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1633 /// the one use that defines the copy's source vreg, most likely an induction
1634 /// variable increment.
1635 class CopyConstrain : public ScheduleDAGMutation {
1636   // Transient state.
1637   SlotIndex RegionBeginIdx;
1638
1639   // RegionEndIdx is the slot index of the last non-debug instruction in the
1640   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1641   SlotIndex RegionEndIdx;
1642
1643 public:
1644   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1645
1646   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1647
1648 protected:
1649   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1650 };
1651
1652 } // end anonymous namespace
1653
1654 namespace llvm {
1655
1656 std::unique_ptr<ScheduleDAGMutation>
1657 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1658                                const TargetRegisterInfo *TRI) {
1659   return llvm::make_unique<CopyConstrain>(TII, TRI);
1660 }
1661
1662 } // end namespace llvm
1663
1664 /// constrainLocalCopy handles two possibilities:
1665 /// 1) Local src:
1666 /// I0:     = dst
1667 /// I1: src = ...
1668 /// I2:     = dst
1669 /// I3: dst = src (copy)
1670 /// (create pred->succ edges I0->I1, I2->I1)
1671 ///
1672 /// 2) Local copy:
1673 /// I0: dst = src (copy)
1674 /// I1:     = dst
1675 /// I2: src = ...
1676 /// I3:     = dst
1677 /// (create pred->succ edges I1->I2, I3->I2)
1678 ///
1679 /// Although the MachineScheduler is currently constrained to single blocks,
1680 /// this algorithm should handle extended blocks. An EBB is a set of
1681 /// contiguously numbered blocks such that the previous block in the EBB is
1682 /// always the single predecessor.
1683 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1684   LiveIntervals *LIS = DAG->getLIS();
1685   MachineInstr *Copy = CopySU->getInstr();
1686
1687   // Check for pure vreg copies.
1688   const MachineOperand &SrcOp = Copy->getOperand(1);
1689   unsigned SrcReg = SrcOp.getReg();
1690   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1691     return;
1692
1693   const MachineOperand &DstOp = Copy->getOperand(0);
1694   unsigned DstReg = DstOp.getReg();
1695   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1696     return;
1697
1698   // Check if either the dest or source is local. If it's live across a back
1699   // edge, it's not local. Note that if both vregs are live across the back
1700   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1701   // If both the copy's source and dest are local live intervals, then we
1702   // should treat the dest as the global for the purpose of adding
1703   // constraints. This adds edges from source's other uses to the copy.
1704   unsigned LocalReg = SrcReg;
1705   unsigned GlobalReg = DstReg;
1706   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1707   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1708     LocalReg = DstReg;
1709     GlobalReg = SrcReg;
1710     LocalLI = &LIS->getInterval(LocalReg);
1711     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1712       return;
1713   }
1714   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1715
1716   // Find the global segment after the start of the local LI.
1717   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1718   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1719   // local live range. We could create edges from other global uses to the local
1720   // start, but the coalescer should have already eliminated these cases, so
1721   // don't bother dealing with it.
1722   if (GlobalSegment == GlobalLI->end())
1723     return;
1724
1725   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1726   // returned the next global segment. But if GlobalSegment overlaps with
1727   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1728   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1729   if (GlobalSegment->contains(LocalLI->beginIndex()))
1730     ++GlobalSegment;
1731
1732   if (GlobalSegment == GlobalLI->end())
1733     return;
1734
1735   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1736   if (GlobalSegment != GlobalLI->begin()) {
1737     // Two address defs have no hole.
1738     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1739                                GlobalSegment->start)) {
1740       return;
1741     }
1742     // If the prior global segment may be defined by the same two-address
1743     // instruction that also defines LocalLI, then can't make a hole here.
1744     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1745                                LocalLI->beginIndex())) {
1746       return;
1747     }
1748     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1749     // it would be a disconnected component in the live range.
1750     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1751            "Disconnected LRG within the scheduling region.");
1752   }
1753   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1754   if (!GlobalDef)
1755     return;
1756
1757   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1758   if (!GlobalSU)
1759     return;
1760
1761   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1762   // constraining the uses of the last local def to precede GlobalDef.
1763   SmallVector<SUnit*,8> LocalUses;
1764   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1765   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1766   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1767   for (const SDep &Succ : LastLocalSU->Succs) {
1768     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1769       continue;
1770     if (Succ.getSUnit() == GlobalSU)
1771       continue;
1772     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1773       return;
1774     LocalUses.push_back(Succ.getSUnit());
1775   }
1776   // Open the top of the GlobalLI hole by constraining any earlier global uses
1777   // to precede the start of LocalLI.
1778   SmallVector<SUnit*,8> GlobalUses;
1779   MachineInstr *FirstLocalDef =
1780     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1781   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1782   for (const SDep &Pred : GlobalSU->Preds) {
1783     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1784       continue;
1785     if (Pred.getSUnit() == FirstLocalSU)
1786       continue;
1787     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1788       return;
1789     GlobalUses.push_back(Pred.getSUnit());
1790   }
1791   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1792   // Add the weak edges.
1793   for (SmallVectorImpl<SUnit*>::const_iterator
1794          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1795     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1796           << GlobalSU->NodeNum << ")\n");
1797     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1798   }
1799   for (SmallVectorImpl<SUnit*>::const_iterator
1800          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1801     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1802           << FirstLocalSU->NodeNum << ")\n");
1803     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1804   }
1805 }
1806
1807 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1808 /// copy elimination.
1809 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1810   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1811   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1812
1813   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1814   if (FirstPos == DAG->end())
1815     return;
1816   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1817   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1818       *priorNonDebug(DAG->end(), DAG->begin()));
1819
1820   for (SUnit &SU : DAG->SUnits) {
1821     if (!SU.getInstr()->isCopy())
1822       continue;
1823
1824     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1825   }
1826 }
1827
1828 //===----------------------------------------------------------------------===//
1829 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1830 // and possibly other custom schedulers.
1831 //===----------------------------------------------------------------------===//
1832
1833 static const unsigned InvalidCycle = ~0U;
1834
1835 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1836
1837 /// Given a Count of resource usage and a Latency value, return true if a
1838 /// SchedBoundary becomes resource limited.
1839 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1840                                unsigned Latency) {
1841   return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1842 }
1843
1844 void SchedBoundary::reset() {
1845   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1846   // Destroying and reconstructing it is very expensive though. So keep
1847   // invalid, placeholder HazardRecs.
1848   if (HazardRec && HazardRec->isEnabled()) {
1849     delete HazardRec;
1850     HazardRec = nullptr;
1851   }
1852   Available.clear();
1853   Pending.clear();
1854   CheckPending = false;
1855   CurrCycle = 0;
1856   CurrMOps = 0;
1857   MinReadyCycle = std::numeric_limits<unsigned>::max();
1858   ExpectedLatency = 0;
1859   DependentLatency = 0;
1860   RetiredMOps = 0;
1861   MaxExecutedResCount = 0;
1862   ZoneCritResIdx = 0;
1863   IsResourceLimited = false;
1864   ReservedCycles.clear();
1865 #ifndef NDEBUG
1866   // Track the maximum number of stall cycles that could arise either from the
1867   // latency of a DAG edge or the number of cycles that a processor resource is
1868   // reserved (SchedBoundary::ReservedCycles).
1869   MaxObservedStall = 0;
1870 #endif
1871   // Reserve a zero-count for invalid CritResIdx.
1872   ExecutedResCounts.resize(1);
1873   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1874 }
1875
1876 void SchedRemainder::
1877 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1878   reset();
1879   if (!SchedModel->hasInstrSchedModel())
1880     return;
1881   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1882   for (SUnit &SU : DAG->SUnits) {
1883     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1884     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1885       * SchedModel->getMicroOpFactor();
1886     for (TargetSchedModel::ProcResIter
1887            PI = SchedModel->getWriteProcResBegin(SC),
1888            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1889       unsigned PIdx = PI->ProcResourceIdx;
1890       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1891       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1892     }
1893   }
1894 }
1895
1896 void SchedBoundary::
1897 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1898   reset();
1899   DAG = dag;
1900   SchedModel = smodel;
1901   Rem = rem;
1902   if (SchedModel->hasInstrSchedModel()) {
1903     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1904     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1905   }
1906 }
1907
1908 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1909 /// these "soft stalls" differently than the hard stall cycles based on CPU
1910 /// resources and computed by checkHazard(). A fully in-order model
1911 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1912 /// available for scheduling until they are ready. However, a weaker in-order
1913 /// model may use this for heuristics. For example, if a processor has in-order
1914 /// behavior when reading certain resources, this may come into play.
1915 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1916   if (!SU->isUnbuffered)
1917     return 0;
1918
1919   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1920   if (ReadyCycle > CurrCycle)
1921     return ReadyCycle - CurrCycle;
1922   return 0;
1923 }
1924
1925 /// Compute the next cycle at which the given processor resource can be
1926 /// scheduled.
1927 unsigned SchedBoundary::
1928 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1929   unsigned NextUnreserved = ReservedCycles[PIdx];
1930   // If this resource has never been used, always return cycle zero.
1931   if (NextUnreserved == InvalidCycle)
1932     return 0;
1933   // For bottom-up scheduling add the cycles needed for the current operation.
1934   if (!isTop())
1935     NextUnreserved += Cycles;
1936   return NextUnreserved;
1937 }
1938
1939 /// Does this SU have a hazard within the current instruction group.
1940 ///
1941 /// The scheduler supports two modes of hazard recognition. The first is the
1942 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1943 /// supports highly complicated in-order reservation tables
1944 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1945 ///
1946 /// The second is a streamlined mechanism that checks for hazards based on
1947 /// simple counters that the scheduler itself maintains. It explicitly checks
1948 /// for instruction dispatch limitations, including the number of micro-ops that
1949 /// can dispatch per cycle.
1950 ///
1951 /// TODO: Also check whether the SU must start a new group.
1952 bool SchedBoundary::checkHazard(SUnit *SU) {
1953   if (HazardRec->isEnabled()
1954       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1955     return true;
1956   }
1957
1958   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1959   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1960     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1961           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1962     return true;
1963   }
1964
1965   if (CurrMOps > 0 &&
1966       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1967        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1968     DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
1969                  << (isTop()? "begin" : "end") << " group\n");
1970     return true;
1971   }
1972
1973   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1974     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1975     for (const MCWriteProcResEntry &PE :
1976           make_range(SchedModel->getWriteProcResBegin(SC),
1977                      SchedModel->getWriteProcResEnd(SC))) {
1978       unsigned ResIdx = PE.ProcResourceIdx;
1979       unsigned Cycles = PE.Cycles;
1980       unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1981       if (NRCycle > CurrCycle) {
1982 #ifndef NDEBUG
1983         MaxObservedStall = std::max(Cycles, MaxObservedStall);
1984 #endif
1985         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
1986               << SchedModel->getResourceName(ResIdx)
1987               << "=" << NRCycle << "c\n");
1988         return true;
1989       }
1990     }
1991   }
1992   return false;
1993 }
1994
1995 // Find the unscheduled node in ReadySUs with the highest latency.
1996 unsigned SchedBoundary::
1997 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1998   SUnit *LateSU = nullptr;
1999   unsigned RemLatency = 0;
2000   for (SUnit *SU : ReadySUs) {
2001     unsigned L = getUnscheduledLatency(SU);
2002     if (L > RemLatency) {
2003       RemLatency = L;
2004       LateSU = SU;
2005     }
2006   }
2007   if (LateSU) {
2008     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2009           << LateSU->NodeNum << ") " << RemLatency << "c\n");
2010   }
2011   return RemLatency;
2012 }
2013
2014 // Count resources in this zone and the remaining unscheduled
2015 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2016 // resource index, or zero if the zone is issue limited.
2017 unsigned SchedBoundary::
2018 getOtherResourceCount(unsigned &OtherCritIdx) {
2019   OtherCritIdx = 0;
2020   if (!SchedModel->hasInstrSchedModel())
2021     return 0;
2022
2023   unsigned OtherCritCount = Rem->RemIssueCount
2024     + (RetiredMOps * SchedModel->getMicroOpFactor());
2025   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2026         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2027   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2028        PIdx != PEnd; ++PIdx) {
2029     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2030     if (OtherCount > OtherCritCount) {
2031       OtherCritCount = OtherCount;
2032       OtherCritIdx = PIdx;
2033     }
2034   }
2035   if (OtherCritIdx) {
2036     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2037           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2038           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2039   }
2040   return OtherCritCount;
2041 }
2042
2043 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2044   assert(SU->getInstr() && "Scheduled SUnit must have instr");
2045
2046 #ifndef NDEBUG
2047   // ReadyCycle was been bumped up to the CurrCycle when this node was
2048   // scheduled, but CurrCycle may have been eagerly advanced immediately after
2049   // scheduling, so may now be greater than ReadyCycle.
2050   if (ReadyCycle > CurrCycle)
2051     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2052 #endif
2053
2054   if (ReadyCycle < MinReadyCycle)
2055     MinReadyCycle = ReadyCycle;
2056
2057   // Check for interlocks first. For the purpose of other heuristics, an
2058   // instruction that cannot issue appears as if it's not in the ReadyQueue.
2059   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2060   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2061       Available.size() >= ReadyListLimit)
2062     Pending.push(SU);
2063   else
2064     Available.push(SU);
2065 }
2066
2067 /// Move the boundary of scheduled code by one cycle.
2068 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2069   if (SchedModel->getMicroOpBufferSize() == 0) {
2070     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2071            "MinReadyCycle uninitialized");
2072     if (MinReadyCycle > NextCycle)
2073       NextCycle = MinReadyCycle;
2074   }
2075   // Update the current micro-ops, which will issue in the next cycle.
2076   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2077   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2078
2079   // Decrement DependentLatency based on the next cycle.
2080   if ((NextCycle - CurrCycle) > DependentLatency)
2081     DependentLatency = 0;
2082   else
2083     DependentLatency -= (NextCycle - CurrCycle);
2084
2085   if (!HazardRec->isEnabled()) {
2086     // Bypass HazardRec virtual calls.
2087     CurrCycle = NextCycle;
2088   } else {
2089     // Bypass getHazardType calls in case of long latency.
2090     for (; CurrCycle != NextCycle; ++CurrCycle) {
2091       if (isTop())
2092         HazardRec->AdvanceCycle();
2093       else
2094         HazardRec->RecedeCycle();
2095     }
2096   }
2097   CheckPending = true;
2098   IsResourceLimited =
2099       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2100                          getScheduledLatency());
2101
2102   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2103 }
2104
2105 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2106   ExecutedResCounts[PIdx] += Count;
2107   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2108     MaxExecutedResCount = ExecutedResCounts[PIdx];
2109 }
2110
2111 /// Add the given processor resource to this scheduled zone.
2112 ///
2113 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2114 /// during which this resource is consumed.
2115 ///
2116 /// \return the next cycle at which the instruction may execute without
2117 /// oversubscribing resources.
2118 unsigned SchedBoundary::
2119 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2120   unsigned Factor = SchedModel->getResourceFactor(PIdx);
2121   unsigned Count = Factor * Cycles;
2122   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
2123         << " +" << Cycles << "x" << Factor << "u\n");
2124
2125   // Update Executed resources counts.
2126   incExecutedResources(PIdx, Count);
2127   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2128   Rem->RemainingCounts[PIdx] -= Count;
2129
2130   // Check if this resource exceeds the current critical resource. If so, it
2131   // becomes the critical resource.
2132   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2133     ZoneCritResIdx = PIdx;
2134     DEBUG(dbgs() << "  *** Critical resource "
2135           << SchedModel->getResourceName(PIdx) << ": "
2136           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2137   }
2138   // For reserved resources, record the highest cycle using the resource.
2139   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2140   if (NextAvailable > CurrCycle) {
2141     DEBUG(dbgs() << "  Resource conflict: "
2142           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2143           << NextAvailable << "\n");
2144   }
2145   return NextAvailable;
2146 }
2147
2148 /// Move the boundary of scheduled code by one SUnit.
2149 void SchedBoundary::bumpNode(SUnit *SU) {
2150   // Update the reservation table.
2151   if (HazardRec->isEnabled()) {
2152     if (!isTop() && SU->isCall) {
2153       // Calls are scheduled with their preceding instructions. For bottom-up
2154       // scheduling, clear the pipeline state before emitting.
2155       HazardRec->Reset();
2156     }
2157     HazardRec->EmitInstruction(SU);
2158   }
2159   // checkHazard should prevent scheduling multiple instructions per cycle that
2160   // exceed the issue width.
2161   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2162   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2163   assert(
2164       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2165       "Cannot schedule this instruction's MicroOps in the current cycle.");
2166
2167   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2168   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2169
2170   unsigned NextCycle = CurrCycle;
2171   switch (SchedModel->getMicroOpBufferSize()) {
2172   case 0:
2173     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2174     break;
2175   case 1:
2176     if (ReadyCycle > NextCycle) {
2177       NextCycle = ReadyCycle;
2178       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2179     }
2180     break;
2181   default:
2182     // We don't currently model the OOO reorder buffer, so consider all
2183     // scheduled MOps to be "retired". We do loosely model in-order resource
2184     // latency. If this instruction uses an in-order resource, account for any
2185     // likely stall cycles.
2186     if (SU->isUnbuffered && ReadyCycle > NextCycle)
2187       NextCycle = ReadyCycle;
2188     break;
2189   }
2190   RetiredMOps += IncMOps;
2191
2192   // Update resource counts and critical resource.
2193   if (SchedModel->hasInstrSchedModel()) {
2194     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2195     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2196     Rem->RemIssueCount -= DecRemIssue;
2197     if (ZoneCritResIdx) {
2198       // Scale scheduled micro-ops for comparing with the critical resource.
2199       unsigned ScaledMOps =
2200         RetiredMOps * SchedModel->getMicroOpFactor();
2201
2202       // If scaled micro-ops are now more than the previous critical resource by
2203       // a full cycle, then micro-ops issue becomes critical.
2204       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2205           >= (int)SchedModel->getLatencyFactor()) {
2206         ZoneCritResIdx = 0;
2207         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2208               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2209       }
2210     }
2211     for (TargetSchedModel::ProcResIter
2212            PI = SchedModel->getWriteProcResBegin(SC),
2213            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2214       unsigned RCycle =
2215         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2216       if (RCycle > NextCycle)
2217         NextCycle = RCycle;
2218     }
2219     if (SU->hasReservedResource) {
2220       // For reserved resources, record the highest cycle using the resource.
2221       // For top-down scheduling, this is the cycle in which we schedule this
2222       // instruction plus the number of cycles the operations reserves the
2223       // resource. For bottom-up is it simply the instruction's cycle.
2224       for (TargetSchedModel::ProcResIter
2225              PI = SchedModel->getWriteProcResBegin(SC),
2226              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2227         unsigned PIdx = PI->ProcResourceIdx;
2228         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2229           if (isTop()) {
2230             ReservedCycles[PIdx] =
2231               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2232           }
2233           else
2234             ReservedCycles[PIdx] = NextCycle;
2235         }
2236       }
2237     }
2238   }
2239   // Update ExpectedLatency and DependentLatency.
2240   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2241   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2242   if (SU->getDepth() > TopLatency) {
2243     TopLatency = SU->getDepth();
2244     DEBUG(dbgs() << "  " << Available.getName()
2245           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2246   }
2247   if (SU->getHeight() > BotLatency) {
2248     BotLatency = SU->getHeight();
2249     DEBUG(dbgs() << "  " << Available.getName()
2250           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2251   }
2252   // If we stall for any reason, bump the cycle.
2253   if (NextCycle > CurrCycle)
2254     bumpCycle(NextCycle);
2255   else
2256     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2257     // resource limited. If a stall occurred, bumpCycle does this.
2258     IsResourceLimited =
2259         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2260                            getScheduledLatency());
2261
2262   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2263   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2264   // one cycle.  Since we commonly reach the max MOps here, opportunistically
2265   // bump the cycle to avoid uselessly checking everything in the readyQ.
2266   CurrMOps += IncMOps;
2267
2268   // Bump the cycle count for issue group constraints.
2269   // This must be done after NextCycle has been adjust for all other stalls.
2270   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2271   // currCycle to X.
2272   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2273       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2274     DEBUG(dbgs() << "  Bump cycle to "
2275                  << (isTop() ? "end" : "begin") << " group\n");
2276     bumpCycle(++NextCycle);
2277   }
2278
2279   while (CurrMOps >= SchedModel->getIssueWidth()) {
2280     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
2281           << " at cycle " << CurrCycle << '\n');
2282     bumpCycle(++NextCycle);
2283   }
2284   DEBUG(dumpScheduledState());
2285 }
2286
2287 /// Release pending ready nodes in to the available queue. This makes them
2288 /// visible to heuristics.
2289 void SchedBoundary::releasePending() {
2290   // If the available queue is empty, it is safe to reset MinReadyCycle.
2291   if (Available.empty())
2292     MinReadyCycle = std::numeric_limits<unsigned>::max();
2293
2294   // Check to see if any of the pending instructions are ready to issue.  If
2295   // so, add them to the available queue.
2296   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2297   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2298     SUnit *SU = *(Pending.begin()+i);
2299     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2300
2301     if (ReadyCycle < MinReadyCycle)
2302       MinReadyCycle = ReadyCycle;
2303
2304     if (!IsBuffered && ReadyCycle > CurrCycle)
2305       continue;
2306
2307     if (checkHazard(SU))
2308       continue;
2309
2310     if (Available.size() >= ReadyListLimit)
2311       break;
2312
2313     Available.push(SU);
2314     Pending.remove(Pending.begin()+i);
2315     --i; --e;
2316   }
2317   CheckPending = false;
2318 }
2319
2320 /// Remove SU from the ready set for this boundary.
2321 void SchedBoundary::removeReady(SUnit *SU) {
2322   if (Available.isInQueue(SU))
2323     Available.remove(Available.find(SU));
2324   else {
2325     assert(Pending.isInQueue(SU) && "bad ready count");
2326     Pending.remove(Pending.find(SU));
2327   }
2328 }
2329
2330 /// If this queue only has one ready candidate, return it. As a side effect,
2331 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2332 /// one node is ready. If multiple instructions are ready, return NULL.
2333 SUnit *SchedBoundary::pickOnlyChoice() {
2334   if (CheckPending)
2335     releasePending();
2336
2337   if (CurrMOps > 0) {
2338     // Defer any ready instrs that now have a hazard.
2339     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2340       if (checkHazard(*I)) {
2341         Pending.push(*I);
2342         I = Available.remove(I);
2343         continue;
2344       }
2345       ++I;
2346     }
2347   }
2348   for (unsigned i = 0; Available.empty(); ++i) {
2349 //  FIXME: Re-enable assert once PR20057 is resolved.
2350 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2351 //           "permanent hazard");
2352     (void)i;
2353     bumpCycle(CurrCycle + 1);
2354     releasePending();
2355   }
2356
2357   DEBUG(Pending.dump());
2358   DEBUG(Available.dump());
2359
2360   if (Available.size() == 1)
2361     return *Available.begin();
2362   return nullptr;
2363 }
2364
2365 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2366 // This is useful information to dump after bumpNode.
2367 // Note that the Queue contents are more useful before pickNodeFromQueue.
2368 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2369   unsigned ResFactor;
2370   unsigned ResCount;
2371   if (ZoneCritResIdx) {
2372     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2373     ResCount = getResourceCount(ZoneCritResIdx);
2374   } else {
2375     ResFactor = SchedModel->getMicroOpFactor();
2376     ResCount = RetiredMOps * ResFactor;
2377   }
2378   unsigned LFactor = SchedModel->getLatencyFactor();
2379   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2380          << "  Retired: " << RetiredMOps;
2381   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2382   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2383          << ResCount / ResFactor << " "
2384          << SchedModel->getResourceName(ZoneCritResIdx)
2385          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2386          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2387          << " limited.\n";
2388 }
2389 #endif
2390
2391 //===----------------------------------------------------------------------===//
2392 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2393 //===----------------------------------------------------------------------===//
2394
2395 void GenericSchedulerBase::SchedCandidate::
2396 initResourceDelta(const ScheduleDAGMI *DAG,
2397                   const TargetSchedModel *SchedModel) {
2398   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2399     return;
2400
2401   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2402   for (TargetSchedModel::ProcResIter
2403          PI = SchedModel->getWriteProcResBegin(SC),
2404          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2405     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2406       ResDelta.CritResources += PI->Cycles;
2407     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2408       ResDelta.DemandedResources += PI->Cycles;
2409   }
2410 }
2411
2412 /// Set the CandPolicy given a scheduling zone given the current resources and
2413 /// latencies inside and outside the zone.
2414 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2415                                      SchedBoundary &CurrZone,
2416                                      SchedBoundary *OtherZone) {
2417   // Apply preemptive heuristics based on the total latency and resources
2418   // inside and outside this zone. Potential stalls should be considered before
2419   // following this policy.
2420
2421   // Compute remaining latency. We need this both to determine whether the
2422   // overall schedule has become latency-limited and whether the instructions
2423   // outside this zone are resource or latency limited.
2424   //
2425   // The "dependent" latency is updated incrementally during scheduling as the
2426   // max height/depth of scheduled nodes minus the cycles since it was
2427   // scheduled:
2428   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2429   //
2430   // The "independent" latency is the max ready queue depth:
2431   //   ILat = max N.depth for N in Available|Pending
2432   //
2433   // RemainingLatency is the greater of independent and dependent latency.
2434   unsigned RemLatency = CurrZone.getDependentLatency();
2435   RemLatency = std::max(RemLatency,
2436                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2437   RemLatency = std::max(RemLatency,
2438                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2439
2440   // Compute the critical resource outside the zone.
2441   unsigned OtherCritIdx = 0;
2442   unsigned OtherCount =
2443     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2444
2445   bool OtherResLimited = false;
2446   if (SchedModel->hasInstrSchedModel())
2447     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2448                                          OtherCount, RemLatency);
2449
2450   // Schedule aggressively for latency in PostRA mode. We don't check for
2451   // acyclic latency during PostRA, and highly out-of-order processors will
2452   // skip PostRA scheduling.
2453   if (!OtherResLimited) {
2454     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2455       Policy.ReduceLatency |= true;
2456       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2457             << " RemainingLatency " << RemLatency << " + "
2458             << CurrZone.getCurrCycle() << "c > CritPath "
2459             << Rem.CriticalPath << "\n");
2460     }
2461   }
2462   // If the same resource is limiting inside and outside the zone, do nothing.
2463   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2464     return;
2465
2466   DEBUG(
2467     if (CurrZone.isResourceLimited()) {
2468       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2469              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2470              << "\n";
2471     }
2472     if (OtherResLimited)
2473       dbgs() << "  RemainingLimit: "
2474              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2475     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2476       dbgs() << "  Latency limited both directions.\n");
2477
2478   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2479     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2480
2481   if (OtherResLimited)
2482     Policy.DemandResIdx = OtherCritIdx;
2483 }
2484
2485 #ifndef NDEBUG
2486 const char *GenericSchedulerBase::getReasonStr(
2487   GenericSchedulerBase::CandReason Reason) {
2488   switch (Reason) {
2489   case NoCand:         return "NOCAND    ";
2490   case Only1:          return "ONLY1     ";
2491   case PhysRegCopy:    return "PREG-COPY ";
2492   case RegExcess:      return "REG-EXCESS";
2493   case RegCritical:    return "REG-CRIT  ";
2494   case Stall:          return "STALL     ";
2495   case Cluster:        return "CLUSTER   ";
2496   case Weak:           return "WEAK      ";
2497   case RegMax:         return "REG-MAX   ";
2498   case ResourceReduce: return "RES-REDUCE";
2499   case ResourceDemand: return "RES-DEMAND";
2500   case TopDepthReduce: return "TOP-DEPTH ";
2501   case TopPathReduce:  return "TOP-PATH  ";
2502   case BotHeightReduce:return "BOT-HEIGHT";
2503   case BotPathReduce:  return "BOT-PATH  ";
2504   case NextDefUse:     return "DEF-USE   ";
2505   case NodeOrder:      return "ORDER     ";
2506   };
2507   llvm_unreachable("Unknown reason!");
2508 }
2509
2510 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2511   PressureChange P;
2512   unsigned ResIdx = 0;
2513   unsigned Latency = 0;
2514   switch (Cand.Reason) {
2515   default:
2516     break;
2517   case RegExcess:
2518     P = Cand.RPDelta.Excess;
2519     break;
2520   case RegCritical:
2521     P = Cand.RPDelta.CriticalMax;
2522     break;
2523   case RegMax:
2524     P = Cand.RPDelta.CurrentMax;
2525     break;
2526   case ResourceReduce:
2527     ResIdx = Cand.Policy.ReduceResIdx;
2528     break;
2529   case ResourceDemand:
2530     ResIdx = Cand.Policy.DemandResIdx;
2531     break;
2532   case TopDepthReduce:
2533     Latency = Cand.SU->getDepth();
2534     break;
2535   case TopPathReduce:
2536     Latency = Cand.SU->getHeight();
2537     break;
2538   case BotHeightReduce:
2539     Latency = Cand.SU->getHeight();
2540     break;
2541   case BotPathReduce:
2542     Latency = Cand.SU->getDepth();
2543     break;
2544   }
2545   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2546   if (P.isValid())
2547     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2548            << ":" << P.getUnitInc() << " ";
2549   else
2550     dbgs() << "      ";
2551   if (ResIdx)
2552     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2553   else
2554     dbgs() << "         ";
2555   if (Latency)
2556     dbgs() << " " << Latency << " cycles ";
2557   else
2558     dbgs() << "          ";
2559   dbgs() << '\n';
2560 }
2561 #endif
2562
2563 /// Return true if this heuristic determines order.
2564 static bool tryLess(int TryVal, int CandVal,
2565                     GenericSchedulerBase::SchedCandidate &TryCand,
2566                     GenericSchedulerBase::SchedCandidate &Cand,
2567                     GenericSchedulerBase::CandReason Reason) {
2568   if (TryVal < CandVal) {
2569     TryCand.Reason = Reason;
2570     return true;
2571   }
2572   if (TryVal > CandVal) {
2573     if (Cand.Reason > Reason)
2574       Cand.Reason = Reason;
2575     return true;
2576   }
2577   return false;
2578 }
2579
2580 static bool tryGreater(int TryVal, int CandVal,
2581                        GenericSchedulerBase::SchedCandidate &TryCand,
2582                        GenericSchedulerBase::SchedCandidate &Cand,
2583                        GenericSchedulerBase::CandReason Reason) {
2584   if (TryVal > CandVal) {
2585     TryCand.Reason = Reason;
2586     return true;
2587   }
2588   if (TryVal < CandVal) {
2589     if (Cand.Reason > Reason)
2590       Cand.Reason = Reason;
2591     return true;
2592   }
2593   return false;
2594 }
2595
2596 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2597                        GenericSchedulerBase::SchedCandidate &Cand,
2598                        SchedBoundary &Zone) {
2599   if (Zone.isTop()) {
2600     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2601       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2602                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2603         return true;
2604     }
2605     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2606                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2607       return true;
2608   } else {
2609     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2610       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2611                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2612         return true;
2613     }
2614     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2615                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2616       return true;
2617   }
2618   return false;
2619 }
2620
2621 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2622   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2623         << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2624 }
2625
2626 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2627   tracePick(Cand.Reason, Cand.AtTop);
2628 }
2629
2630 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2631   assert(dag->hasVRegLiveness() &&
2632          "(PreRA)GenericScheduler needs vreg liveness");
2633   DAG = static_cast<ScheduleDAGMILive*>(dag);
2634   SchedModel = DAG->getSchedModel();
2635   TRI = DAG->TRI;
2636
2637   Rem.init(DAG, SchedModel);
2638   Top.init(DAG, SchedModel, &Rem);
2639   Bot.init(DAG, SchedModel, &Rem);
2640
2641   // Initialize resource counts.
2642
2643   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2644   // are disabled, then these HazardRecs will be disabled.
2645   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2646   if (!Top.HazardRec) {
2647     Top.HazardRec =
2648         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2649             Itin, DAG);
2650   }
2651   if (!Bot.HazardRec) {
2652     Bot.HazardRec =
2653         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2654             Itin, DAG);
2655   }
2656   TopCand.SU = nullptr;
2657   BotCand.SU = nullptr;
2658 }
2659
2660 /// Initialize the per-region scheduling policy.
2661 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2662                                   MachineBasicBlock::iterator End,
2663                                   unsigned NumRegionInstrs) {
2664   const MachineFunction &MF = *Begin->getMF();
2665   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2666
2667   // Avoid setting up the register pressure tracker for small regions to save
2668   // compile time. As a rough heuristic, only track pressure when the number of
2669   // schedulable instructions exceeds half the integer register file.
2670   RegionPolicy.ShouldTrackPressure = true;
2671   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2672     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2673     if (TLI->isTypeLegal(LegalIntVT)) {
2674       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2675         TLI->getRegClassFor(LegalIntVT));
2676       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2677     }
2678   }
2679
2680   // For generic targets, we default to bottom-up, because it's simpler and more
2681   // compile-time optimizations have been implemented in that direction.
2682   RegionPolicy.OnlyBottomUp = true;
2683
2684   // Allow the subtarget to override default policy.
2685   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2686
2687   // After subtarget overrides, apply command line options.
2688   if (!EnableRegPressure)
2689     RegionPolicy.ShouldTrackPressure = false;
2690
2691   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2692   // e.g. -misched-bottomup=false allows scheduling in both directions.
2693   assert((!ForceTopDown || !ForceBottomUp) &&
2694          "-misched-topdown incompatible with -misched-bottomup");
2695   if (ForceBottomUp.getNumOccurrences() > 0) {
2696     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2697     if (RegionPolicy.OnlyBottomUp)
2698       RegionPolicy.OnlyTopDown = false;
2699   }
2700   if (ForceTopDown.getNumOccurrences() > 0) {
2701     RegionPolicy.OnlyTopDown = ForceTopDown;
2702     if (RegionPolicy.OnlyTopDown)
2703       RegionPolicy.OnlyBottomUp = false;
2704   }
2705 }
2706
2707 void GenericScheduler::dumpPolicy() const {
2708   // Cannot completely remove virtual function even in release mode.
2709 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2710   dbgs() << "GenericScheduler RegionPolicy: "
2711          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2712          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2713          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2714          << "\n";
2715 #endif
2716 }
2717
2718 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2719 /// critical path by more cycles than it takes to drain the instruction buffer.
2720 /// We estimate an upper bounds on in-flight instructions as:
2721 ///
2722 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2723 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2724 /// InFlightResources = InFlightIterations * LoopResources
2725 ///
2726 /// TODO: Check execution resources in addition to IssueCount.
2727 void GenericScheduler::checkAcyclicLatency() {
2728   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2729     return;
2730
2731   // Scaled number of cycles per loop iteration.
2732   unsigned IterCount =
2733     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2734              Rem.RemIssueCount);
2735   // Scaled acyclic critical path.
2736   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2737   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2738   unsigned InFlightCount =
2739     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2740   unsigned BufferLimit =
2741     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2742
2743   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2744
2745   DEBUG(dbgs() << "IssueCycles="
2746         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2747         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2748         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2749         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2750         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2751         if (Rem.IsAcyclicLatencyLimited)
2752           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2753 }
2754
2755 void GenericScheduler::registerRoots() {
2756   Rem.CriticalPath = DAG->ExitSU.getDepth();
2757
2758   // Some roots may not feed into ExitSU. Check all of them in case.
2759   for (const SUnit *SU : Bot.Available) {
2760     if (SU->getDepth() > Rem.CriticalPath)
2761       Rem.CriticalPath = SU->getDepth();
2762   }
2763   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2764   if (DumpCriticalPathLength) {
2765     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2766   }
2767
2768   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2769     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2770     checkAcyclicLatency();
2771   }
2772 }
2773
2774 static bool tryPressure(const PressureChange &TryP,
2775                         const PressureChange &CandP,
2776                         GenericSchedulerBase::SchedCandidate &TryCand,
2777                         GenericSchedulerBase::SchedCandidate &Cand,
2778                         GenericSchedulerBase::CandReason Reason,
2779                         const TargetRegisterInfo *TRI,
2780                         const MachineFunction &MF) {
2781   // If one candidate decreases and the other increases, go with it.
2782   // Invalid candidates have UnitInc==0.
2783   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2784                  Reason)) {
2785     return true;
2786   }
2787   // Do not compare the magnitude of pressure changes between top and bottom
2788   // boundary.
2789   if (Cand.AtTop != TryCand.AtTop)
2790     return false;
2791
2792   // If both candidates affect the same set in the same boundary, go with the
2793   // smallest increase.
2794   unsigned TryPSet = TryP.getPSetOrMax();
2795   unsigned CandPSet = CandP.getPSetOrMax();
2796   if (TryPSet == CandPSet) {
2797     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2798                    Reason);
2799   }
2800
2801   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2802                                  std::numeric_limits<int>::max();
2803
2804   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2805                                    std::numeric_limits<int>::max();
2806
2807   // If the candidates are decreasing pressure, reverse priority.
2808   if (TryP.getUnitInc() < 0)
2809     std::swap(TryRank, CandRank);
2810   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2811 }
2812
2813 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2814   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2815 }
2816
2817 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2818 /// their physreg def/use.
2819 ///
2820 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2821 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2822 /// with the operation that produces or consumes the physreg. We'll do this when
2823 /// regalloc has support for parallel copies.
2824 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2825   const MachineInstr *MI = SU->getInstr();
2826   if (!MI->isCopy())
2827     return 0;
2828
2829   unsigned ScheduledOper = isTop ? 1 : 0;
2830   unsigned UnscheduledOper = isTop ? 0 : 1;
2831   // If we have already scheduled the physreg produce/consumer, immediately
2832   // schedule the copy.
2833   if (TargetRegisterInfo::isPhysicalRegister(
2834         MI->getOperand(ScheduledOper).getReg()))
2835     return 1;
2836   // If the physreg is at the boundary, defer it. Otherwise schedule it
2837   // immediately to free the dependent. We can hoist the copy later.
2838   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2839   if (TargetRegisterInfo::isPhysicalRegister(
2840         MI->getOperand(UnscheduledOper).getReg()))
2841     return AtBoundary ? -1 : 1;
2842   return 0;
2843 }
2844
2845 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2846                                      bool AtTop,
2847                                      const RegPressureTracker &RPTracker,
2848                                      RegPressureTracker &TempTracker) {
2849   Cand.SU = SU;
2850   Cand.AtTop = AtTop;
2851   if (DAG->isTrackingPressure()) {
2852     if (AtTop) {
2853       TempTracker.getMaxDownwardPressureDelta(
2854         Cand.SU->getInstr(),
2855         Cand.RPDelta,
2856         DAG->getRegionCriticalPSets(),
2857         DAG->getRegPressure().MaxSetPressure);
2858     } else {
2859       if (VerifyScheduling) {
2860         TempTracker.getMaxUpwardPressureDelta(
2861           Cand.SU->getInstr(),
2862           &DAG->getPressureDiff(Cand.SU),
2863           Cand.RPDelta,
2864           DAG->getRegionCriticalPSets(),
2865           DAG->getRegPressure().MaxSetPressure);
2866       } else {
2867         RPTracker.getUpwardPressureDelta(
2868           Cand.SU->getInstr(),
2869           DAG->getPressureDiff(Cand.SU),
2870           Cand.RPDelta,
2871           DAG->getRegionCriticalPSets(),
2872           DAG->getRegPressure().MaxSetPressure);
2873       }
2874     }
2875   }
2876   DEBUG(if (Cand.RPDelta.Excess.isValid())
2877           dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
2878                  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2879                  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2880 }
2881
2882 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2883 /// hierarchical. This may be more efficient than a graduated cost model because
2884 /// we don't need to evaluate all aspects of the model for each node in the
2885 /// queue. But it's really done to make the heuristics easier to debug and
2886 /// statistically analyze.
2887 ///
2888 /// \param Cand provides the policy and current best candidate.
2889 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2890 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2891 //              if Cand is from a different zone than TryCand.
2892 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2893                                     SchedCandidate &TryCand,
2894                                     SchedBoundary *Zone) {
2895   // Initialize the candidate if needed.
2896   if (!Cand.isValid()) {
2897     TryCand.Reason = NodeOrder;
2898     return;
2899   }
2900
2901   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2902                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2903                  TryCand, Cand, PhysRegCopy))
2904     return;
2905
2906   // Avoid exceeding the target's limit.
2907   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2908                                                Cand.RPDelta.Excess,
2909                                                TryCand, Cand, RegExcess, TRI,
2910                                                DAG->MF))
2911     return;
2912
2913   // Avoid increasing the max critical pressure in the scheduled region.
2914   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2915                                                Cand.RPDelta.CriticalMax,
2916                                                TryCand, Cand, RegCritical, TRI,
2917                                                DAG->MF))
2918     return;
2919
2920   // We only compare a subset of features when comparing nodes between
2921   // Top and Bottom boundary. Some properties are simply incomparable, in many
2922   // other instances we should only override the other boundary if something
2923   // is a clear good pick on one boundary. Skip heuristics that are more
2924   // "tie-breaking" in nature.
2925   bool SameBoundary = Zone != nullptr;
2926   if (SameBoundary) {
2927     // For loops that are acyclic path limited, aggressively schedule for
2928     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2929     // heuristics to take precedence.
2930     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2931         tryLatency(TryCand, Cand, *Zone))
2932       return;
2933
2934     // Prioritize instructions that read unbuffered resources by stall cycles.
2935     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2936                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2937       return;
2938   }
2939
2940   // Keep clustered nodes together to encourage downstream peephole
2941   // optimizations which may reduce resource requirements.
2942   //
2943   // This is a best effort to set things up for a post-RA pass. Optimizations
2944   // like generating loads of multiple registers should ideally be done within
2945   // the scheduler pass by combining the loads during DAG postprocessing.
2946   const SUnit *CandNextClusterSU =
2947     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2948   const SUnit *TryCandNextClusterSU =
2949     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2950   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2951                  Cand.SU == CandNextClusterSU,
2952                  TryCand, Cand, Cluster))
2953     return;
2954
2955   if (SameBoundary) {
2956     // Weak edges are for clustering and other constraints.
2957     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2958                 getWeakLeft(Cand.SU, Cand.AtTop),
2959                 TryCand, Cand, Weak))
2960       return;
2961   }
2962
2963   // Avoid increasing the max pressure of the entire region.
2964   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2965                                                Cand.RPDelta.CurrentMax,
2966                                                TryCand, Cand, RegMax, TRI,
2967                                                DAG->MF))
2968     return;
2969
2970   if (SameBoundary) {
2971     // Avoid critical resource consumption and balance the schedule.
2972     TryCand.initResourceDelta(DAG, SchedModel);
2973     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2974                 TryCand, Cand, ResourceReduce))
2975       return;
2976     if (tryGreater(TryCand.ResDelta.DemandedResources,
2977                    Cand.ResDelta.DemandedResources,
2978                    TryCand, Cand, ResourceDemand))
2979       return;
2980
2981     // Avoid serializing long latency dependence chains.
2982     // For acyclic path limited loops, latency was already checked above.
2983     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2984         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2985       return;
2986
2987     // Fall through to original instruction order.
2988     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2989         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2990       TryCand.Reason = NodeOrder;
2991     }
2992   }
2993 }
2994
2995 /// Pick the best candidate from the queue.
2996 ///
2997 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2998 /// DAG building. To adjust for the current scheduling location we need to
2999 /// maintain the number of vreg uses remaining to be top-scheduled.
3000 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3001                                          const CandPolicy &ZonePolicy,
3002                                          const RegPressureTracker &RPTracker,
3003                                          SchedCandidate &Cand) {
3004   // getMaxPressureDelta temporarily modifies the tracker.
3005   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3006
3007   ReadyQueue &Q = Zone.Available;
3008   for (SUnit *SU : Q) {
3009
3010     SchedCandidate TryCand(ZonePolicy);
3011     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3012     // Pass SchedBoundary only when comparing nodes from the same boundary.
3013     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3014     tryCandidate(Cand, TryCand, ZoneArg);
3015     if (TryCand.Reason != NoCand) {
3016       // Initialize resource delta if needed in case future heuristics query it.
3017       if (TryCand.ResDelta == SchedResourceDelta())
3018         TryCand.initResourceDelta(DAG, SchedModel);
3019       Cand.setBest(TryCand);
3020       DEBUG(traceCandidate(Cand));
3021     }
3022   }
3023 }
3024
3025 /// Pick the best candidate node from either the top or bottom queue.
3026 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3027   // Schedule as far as possible in the direction of no choice. This is most
3028   // efficient, but also provides the best heuristics for CriticalPSets.
3029   if (SUnit *SU = Bot.pickOnlyChoice()) {
3030     IsTopNode = false;
3031     tracePick(Only1, false);
3032     return SU;
3033   }
3034   if (SUnit *SU = Top.pickOnlyChoice()) {
3035     IsTopNode = true;
3036     tracePick(Only1, true);
3037     return SU;
3038   }
3039   // Set the bottom-up policy based on the state of the current bottom zone and
3040   // the instructions outside the zone, including the top zone.
3041   CandPolicy BotPolicy;
3042   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3043   // Set the top-down policy based on the state of the current top zone and
3044   // the instructions outside the zone, including the bottom zone.
3045   CandPolicy TopPolicy;
3046   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3047
3048   // See if BotCand is still valid (because we previously scheduled from Top).
3049   DEBUG(dbgs() << "Picking from Bot:\n");
3050   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3051       BotCand.Policy != BotPolicy) {
3052     BotCand.reset(CandPolicy());
3053     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3054     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3055   } else {
3056     DEBUG(traceCandidate(BotCand));
3057 #ifndef NDEBUG
3058     if (VerifyScheduling) {
3059       SchedCandidate TCand;
3060       TCand.reset(CandPolicy());
3061       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3062       assert(TCand.SU == BotCand.SU &&
3063              "Last pick result should correspond to re-picking right now");
3064     }
3065 #endif
3066   }
3067
3068   // Check if the top Q has a better candidate.
3069   DEBUG(dbgs() << "Picking from Top:\n");
3070   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3071       TopCand.Policy != TopPolicy) {
3072     TopCand.reset(CandPolicy());
3073     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3074     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3075   } else {
3076     DEBUG(traceCandidate(TopCand));
3077 #ifndef NDEBUG
3078     if (VerifyScheduling) {
3079       SchedCandidate TCand;
3080       TCand.reset(CandPolicy());
3081       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3082       assert(TCand.SU == TopCand.SU &&
3083            "Last pick result should correspond to re-picking right now");
3084     }
3085 #endif
3086   }
3087
3088   // Pick best from BotCand and TopCand.
3089   assert(BotCand.isValid());
3090   assert(TopCand.isValid());
3091   SchedCandidate Cand = BotCand;
3092   TopCand.Reason = NoCand;
3093   tryCandidate(Cand, TopCand, nullptr);
3094   if (TopCand.Reason != NoCand) {
3095     Cand.setBest(TopCand);
3096     DEBUG(traceCandidate(Cand));
3097   }
3098
3099   IsTopNode = Cand.AtTop;
3100   tracePick(Cand);
3101   return Cand.SU;
3102 }
3103
3104 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3105 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3106   if (DAG->top() == DAG->bottom()) {
3107     assert(Top.Available.empty() && Top.Pending.empty() &&
3108            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3109     return nullptr;
3110   }
3111   SUnit *SU;
3112   do {
3113     if (RegionPolicy.OnlyTopDown) {
3114       SU = Top.pickOnlyChoice();
3115       if (!SU) {
3116         CandPolicy NoPolicy;
3117         TopCand.reset(NoPolicy);
3118         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3119         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3120         tracePick(TopCand);
3121         SU = TopCand.SU;
3122       }
3123       IsTopNode = true;
3124     } else if (RegionPolicy.OnlyBottomUp) {
3125       SU = Bot.pickOnlyChoice();
3126       if (!SU) {
3127         CandPolicy NoPolicy;
3128         BotCand.reset(NoPolicy);
3129         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3130         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3131         tracePick(BotCand);
3132         SU = BotCand.SU;
3133       }
3134       IsTopNode = false;
3135     } else {
3136       SU = pickNodeBidirectional(IsTopNode);
3137     }
3138   } while (SU->isScheduled);
3139
3140   if (SU->isTopReady())
3141     Top.removeReady(SU);
3142   if (SU->isBottomReady())
3143     Bot.removeReady(SU);
3144
3145   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3146   return SU;
3147 }
3148
3149 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
3150   MachineBasicBlock::iterator InsertPos = SU->getInstr();
3151   if (!isTop)
3152     ++InsertPos;
3153   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3154
3155   // Find already scheduled copies with a single physreg dependence and move
3156   // them just above the scheduled instruction.
3157   for (SDep &Dep : Deps) {
3158     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3159       continue;
3160     SUnit *DepSU = Dep.getSUnit();
3161     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3162       continue;
3163     MachineInstr *Copy = DepSU->getInstr();
3164     if (!Copy->isCopy())
3165       continue;
3166     DEBUG(dbgs() << "  Rescheduling physreg copy ";
3167           Dep.getSUnit()->dump(DAG));
3168     DAG->moveInstruction(Copy, InsertPos);
3169   }
3170 }
3171
3172 /// Update the scheduler's state after scheduling a node. This is the same node
3173 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3174 /// update it's state based on the current cycle before MachineSchedStrategy
3175 /// does.
3176 ///
3177 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3178 /// them here. See comments in biasPhysRegCopy.
3179 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3180   if (IsTopNode) {
3181     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3182     Top.bumpNode(SU);
3183     if (SU->hasPhysRegUses)
3184       reschedulePhysRegCopies(SU, true);
3185   } else {
3186     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3187     Bot.bumpNode(SU);
3188     if (SU->hasPhysRegDefs)
3189       reschedulePhysRegCopies(SU, false);
3190   }
3191 }
3192
3193 /// Create the standard converging machine scheduler. This will be used as the
3194 /// default scheduler if the target does not set a default.
3195 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3196   ScheduleDAGMILive *DAG =
3197       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3198   // Register DAG post-processors.
3199   //
3200   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3201   // data and pass it to later mutations. Have a single mutation that gathers
3202   // the interesting nodes in one pass.
3203   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3204   return DAG;
3205 }
3206
3207 static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3208   return createGenericSchedLive(C);
3209 }
3210
3211 static MachineSchedRegistry
3212 GenericSchedRegistry("converge", "Standard converging scheduler.",
3213                      createConveringSched);
3214
3215 //===----------------------------------------------------------------------===//
3216 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3217 //===----------------------------------------------------------------------===//
3218
3219 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3220   DAG = Dag;
3221   SchedModel = DAG->getSchedModel();
3222   TRI = DAG->TRI;
3223
3224   Rem.init(DAG, SchedModel);
3225   Top.init(DAG, SchedModel, &Rem);
3226   BotRoots.clear();
3227
3228   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3229   // or are disabled, then these HazardRecs will be disabled.
3230   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3231   if (!Top.HazardRec) {
3232     Top.HazardRec =
3233         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3234             Itin, DAG);
3235   }
3236 }
3237
3238 void PostGenericScheduler::registerRoots() {
3239   Rem.CriticalPath = DAG->ExitSU.getDepth();
3240
3241   // Some roots may not feed into ExitSU. Check all of them in case.
3242   for (const SUnit *SU : BotRoots) {
3243     if (SU->getDepth() > Rem.CriticalPath)
3244       Rem.CriticalPath = SU->getDepth();
3245   }
3246   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3247   if (DumpCriticalPathLength) {
3248     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3249   }
3250 }
3251
3252 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3253 ///
3254 /// \param Cand provides the policy and current best candidate.
3255 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3256 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3257                                         SchedCandidate &TryCand) {
3258   // Initialize the candidate if needed.
3259   if (!Cand.isValid()) {
3260     TryCand.Reason = NodeOrder;
3261     return;
3262   }
3263
3264   // Prioritize instructions that read unbuffered resources by stall cycles.
3265   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3266               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3267     return;
3268
3269   // Keep clustered nodes together.
3270   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3271                  Cand.SU == DAG->getNextClusterSucc(),
3272                  TryCand, Cand, Cluster))
3273     return;
3274
3275   // Avoid critical resource consumption and balance the schedule.
3276   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3277               TryCand, Cand, ResourceReduce))
3278     return;
3279   if (tryGreater(TryCand.ResDelta.DemandedResources,
3280                  Cand.ResDelta.DemandedResources,
3281                  TryCand, Cand, ResourceDemand))
3282     return;
3283
3284   // Avoid serializing long latency dependence chains.
3285   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3286     return;
3287   }
3288
3289   // Fall through to original instruction order.
3290   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3291     TryCand.Reason = NodeOrder;
3292 }
3293
3294 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3295   ReadyQueue &Q = Top.Available;
3296   for (SUnit *SU : Q) {
3297     SchedCandidate TryCand(Cand.Policy);
3298     TryCand.SU = SU;
3299     TryCand.AtTop = true;
3300     TryCand.initResourceDelta(DAG, SchedModel);
3301     tryCandidate(Cand, TryCand);
3302     if (TryCand.Reason != NoCand) {
3303       Cand.setBest(TryCand);
3304       DEBUG(traceCandidate(Cand));
3305     }
3306   }
3307 }
3308
3309 /// Pick the next node to schedule.
3310 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3311   if (DAG->top() == DAG->bottom()) {
3312     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3313     return nullptr;
3314   }
3315   SUnit *SU;
3316   do {
3317     SU = Top.pickOnlyChoice();
3318     if (SU) {
3319       tracePick(Only1, true);
3320     } else {
3321       CandPolicy NoPolicy;
3322       SchedCandidate TopCand(NoPolicy);
3323       // Set the top-down policy based on the state of the current top zone and
3324       // the instructions outside the zone, including the bottom zone.
3325       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3326       pickNodeFromQueue(TopCand);
3327       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3328       tracePick(TopCand);
3329       SU = TopCand.SU;
3330     }
3331   } while (SU->isScheduled);
3332
3333   IsTopNode = true;
3334   Top.removeReady(SU);
3335
3336   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3337   return SU;
3338 }
3339
3340 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3341 /// scheduled/remaining flags in the DAG nodes.
3342 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3343   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3344   Top.bumpNode(SU);
3345 }
3346
3347 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3348   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3349                            /*RemoveKillFlags=*/true);
3350 }
3351
3352 //===----------------------------------------------------------------------===//
3353 // ILP Scheduler. Currently for experimental analysis of heuristics.
3354 //===----------------------------------------------------------------------===//
3355
3356 namespace {
3357
3358 /// \brief Order nodes by the ILP metric.
3359 struct ILPOrder {
3360   const SchedDFSResult *DFSResult = nullptr;
3361   const BitVector *ScheduledTrees = nullptr;
3362   bool MaximizeILP;
3363
3364   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3365
3366   /// \brief Apply a less-than relation on node priority.
3367   ///
3368   /// (Return true if A comes after B in the Q.)
3369   bool operator()(const SUnit *A, const SUnit *B) const {
3370     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3371     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3372     if (SchedTreeA != SchedTreeB) {
3373       // Unscheduled trees have lower priority.
3374       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3375         return ScheduledTrees->test(SchedTreeB);
3376
3377       // Trees with shallower connections have have lower priority.
3378       if (DFSResult->getSubtreeLevel(SchedTreeA)
3379           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3380         return DFSResult->getSubtreeLevel(SchedTreeA)
3381           < DFSResult->getSubtreeLevel(SchedTreeB);
3382       }
3383     }
3384     if (MaximizeILP)
3385       return DFSResult->getILP(A) < DFSResult->getILP(B);
3386     else
3387       return DFSResult->getILP(A) > DFSResult->getILP(B);
3388   }
3389 };
3390
3391 /// \brief Schedule based on the ILP metric.
3392 class ILPScheduler : public MachineSchedStrategy {
3393   ScheduleDAGMILive *DAG = nullptr;
3394   ILPOrder Cmp;
3395
3396   std::vector<SUnit*> ReadyQ;
3397
3398 public:
3399   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3400
3401   void initialize(ScheduleDAGMI *dag) override {
3402     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3403     DAG = static_cast<ScheduleDAGMILive*>(dag);
3404     DAG->computeDFSResult();
3405     Cmp.DFSResult = DAG->getDFSResult();
3406     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3407     ReadyQ.clear();
3408   }
3409
3410   void registerRoots() override {
3411     // Restore the heap in ReadyQ with the updated DFS results.
3412     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3413   }
3414
3415   /// Implement MachineSchedStrategy interface.
3416   /// -----------------------------------------
3417
3418   /// Callback to select the highest priority node from the ready Q.
3419   SUnit *pickNode(bool &IsTopNode) override {
3420     if (ReadyQ.empty()) return nullptr;
3421     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3422     SUnit *SU = ReadyQ.back();
3423     ReadyQ.pop_back();
3424     IsTopNode = false;
3425     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3426           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3427           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3428           << DAG->getDFSResult()->getSubtreeLevel(
3429             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3430           << "Scheduling " << *SU->getInstr());
3431     return SU;
3432   }
3433
3434   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3435   void scheduleTree(unsigned SubtreeID) override {
3436     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3437   }
3438
3439   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3440   /// DFSResults, and resort the priority Q.
3441   void schedNode(SUnit *SU, bool IsTopNode) override {
3442     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3443   }
3444
3445   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3446
3447   void releaseBottomNode(SUnit *SU) override {
3448     ReadyQ.push_back(SU);
3449     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3450   }
3451 };
3452
3453 } // end anonymous namespace
3454
3455 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3456   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3457 }
3458 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3459   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3460 }
3461
3462 static MachineSchedRegistry ILPMaxRegistry(
3463   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3464 static MachineSchedRegistry ILPMinRegistry(
3465   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3466
3467 //===----------------------------------------------------------------------===//
3468 // Machine Instruction Shuffler for Correctness Testing
3469 //===----------------------------------------------------------------------===//
3470
3471 #ifndef NDEBUG
3472 namespace {
3473
3474 /// Apply a less-than relation on the node order, which corresponds to the
3475 /// instruction order prior to scheduling. IsReverse implements greater-than.
3476 template<bool IsReverse>
3477 struct SUnitOrder {
3478   bool operator()(SUnit *A, SUnit *B) const {
3479     if (IsReverse)
3480       return A->NodeNum > B->NodeNum;
3481     else
3482       return A->NodeNum < B->NodeNum;
3483   }
3484 };
3485
3486 /// Reorder instructions as much as possible.
3487 class InstructionShuffler : public MachineSchedStrategy {
3488   bool IsAlternating;
3489   bool IsTopDown;
3490
3491   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3492   // gives nodes with a higher number higher priority causing the latest
3493   // instructions to be scheduled first.
3494   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3495     TopQ;
3496
3497   // When scheduling bottom-up, use greater-than as the queue priority.
3498   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3499     BottomQ;
3500
3501 public:
3502   InstructionShuffler(bool alternate, bool topdown)
3503     : IsAlternating(alternate), IsTopDown(topdown) {}
3504
3505   void initialize(ScheduleDAGMI*) override {
3506     TopQ.clear();
3507     BottomQ.clear();
3508   }
3509
3510   /// Implement MachineSchedStrategy interface.
3511   /// -----------------------------------------
3512
3513   SUnit *pickNode(bool &IsTopNode) override {
3514     SUnit *SU;
3515     if (IsTopDown) {
3516       do {
3517         if (TopQ.empty()) return nullptr;
3518         SU = TopQ.top();
3519         TopQ.pop();
3520       } while (SU->isScheduled);
3521       IsTopNode = true;
3522     } else {
3523       do {
3524         if (BottomQ.empty()) return nullptr;
3525         SU = BottomQ.top();
3526         BottomQ.pop();
3527       } while (SU->isScheduled);
3528       IsTopNode = false;
3529     }
3530     if (IsAlternating)
3531       IsTopDown = !IsTopDown;
3532     return SU;
3533   }
3534
3535   void schedNode(SUnit *SU, bool IsTopNode) override {}
3536
3537   void releaseTopNode(SUnit *SU) override {
3538     TopQ.push(SU);
3539   }
3540   void releaseBottomNode(SUnit *SU) override {
3541     BottomQ.push(SU);
3542   }
3543 };
3544
3545 } // end anonymous namespace
3546
3547 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3548   bool Alternate = !ForceTopDown && !ForceBottomUp;
3549   bool TopDown = !ForceBottomUp;
3550   assert((TopDown || !ForceTopDown) &&
3551          "-misched-topdown incompatible with -misched-bottomup");
3552   return new ScheduleDAGMILive(
3553       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3554 }
3555
3556 static MachineSchedRegistry ShufflerRegistry(
3557   "shuffle", "Shuffle machine instructions alternating directions",
3558   createInstructionShuffler);
3559 #endif // !NDEBUG
3560
3561 //===----------------------------------------------------------------------===//
3562 // GraphWriter support for ScheduleDAGMILive.
3563 //===----------------------------------------------------------------------===//
3564
3565 #ifndef NDEBUG
3566 namespace llvm {
3567
3568 template<> struct GraphTraits<
3569   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3570
3571 template<>
3572 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3573   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3574
3575   static std::string getGraphName(const ScheduleDAG *G) {
3576     return G->MF.getName();
3577   }
3578
3579   static bool renderGraphFromBottomUp() {
3580     return true;
3581   }
3582
3583   static bool isNodeHidden(const SUnit *Node) {
3584     if (ViewMISchedCutoff == 0)
3585       return false;
3586     return (Node->Preds.size() > ViewMISchedCutoff
3587          || Node->Succs.size() > ViewMISchedCutoff);
3588   }
3589
3590   /// If you want to override the dot attributes printed for a particular
3591   /// edge, override this method.
3592   static std::string getEdgeAttributes(const SUnit *Node,
3593                                        SUnitIterator EI,
3594                                        const ScheduleDAG *Graph) {
3595     if (EI.isArtificialDep())
3596       return "color=cyan,style=dashed";
3597     if (EI.isCtrlDep())
3598       return "color=blue,style=dashed";
3599     return "";
3600   }
3601
3602   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3603     std::string Str;
3604     raw_string_ostream SS(Str);
3605     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3606     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3607       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3608     SS << "SU:" << SU->NodeNum;
3609     if (DFS)
3610       SS << " I:" << DFS->getNumInstrs(SU);
3611     return SS.str();
3612   }
3613
3614   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3615     return G->getGraphNodeLabel(SU);
3616   }
3617
3618   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3619     std::string Str("shape=Mrecord");
3620     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3621     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3622       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3623     if (DFS) {
3624       Str += ",style=filled,fillcolor=\"#";
3625       Str += DOT::getColorString(DFS->getSubtreeID(N));
3626       Str += '"';
3627     }
3628     return Str;
3629   }
3630 };
3631
3632 } // end namespace llvm
3633 #endif // NDEBUG
3634
3635 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3636 /// rendered using 'dot'.
3637 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3638 #ifndef NDEBUG
3639   ViewGraph(this, Name, false, Title);
3640 #else
3641   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3642          << "systems with Graphviz or gv!\n";
3643 #endif  // NDEBUG
3644 }
3645
3646 /// Out-of-line implementation with no arguments is handy for gdb.
3647 void ScheduleDAGMI::viewGraph() {
3648   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3649 }