1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// SI Machine Scheduler interface
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17 #include "SIInstrInfo.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineScheduler.h"
20 #include "llvm/CodeGen/RegisterPressure.h"
21 #include "llvm/CodeGen/ScheduleDAG.h"
31 enum SIScheduleCandReason {
40 struct SISchedulerCandidate {
41 // The reason for this candidate.
42 SIScheduleCandReason Reason = NoCand;
44 // Set of reasons that apply to multiple candidates.
45 uint32_t RepeatReasonSet = 0;
47 SISchedulerCandidate() = default;
49 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
53 class SIScheduleDAGMI;
54 class SIScheduleBlockCreator;
56 enum SIScheduleBlockLinkKind {
61 class SIScheduleBlock {
63 SIScheduleBlockCreator *BC;
65 std::vector<SUnit*> SUnits;
66 std::map<unsigned, unsigned> NodeNum2Index;
67 std::vector<SUnit*> TopReadySUs;
68 std::vector<SUnit*> ScheduledSUnits;
70 /// The top of the unscheduled zone.
71 IntervalPressure TopPressure;
72 RegPressureTracker TopRPTracker;
74 // Pressure: number of said class of registers needed to
75 // store the live virtual and real registers.
76 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77 // Pressure of additional registers required inside the block.
78 std::vector<unsigned> InternalAdditionnalPressure;
79 // Pressure of input and output registers
80 std::vector<unsigned> LiveInPressure;
81 std::vector<unsigned> LiveOutPressure;
82 // Registers required by the block, and outputs.
83 // We do track only virtual registers.
84 // Note that some registers are not 32 bits,
85 // and thus the pressure is not equal
86 // to the number of live registers.
87 std::set<unsigned> LiveInRegs;
88 std::set<unsigned> LiveOutRegs;
90 bool Scheduled = false;
91 bool HighLatencyBlock = false;
93 std::vector<unsigned> HasLowLatencyNonWaitedParent;
95 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
98 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
99 // All blocks successors, and the kind of link
100 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
101 unsigned NumHighLatencySuccessors = 0;
104 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108 ~SIScheduleBlock() = default;
110 unsigned getID() const { return ID; }
112 /// Functions for Block construction.
113 void addUnit(SUnit *SU);
115 // When all SUs have been added.
116 void finalizeUnits();
118 // Add block pred, which has instruction predecessor of SU.
119 void addPred(SIScheduleBlock *Pred);
120 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
123 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
124 getSuccs() const { return Succs; }
126 unsigned Height; // Maximum topdown path length to block without outputs
127 unsigned Depth; // Maximum bottomup path length to block without inputs
129 unsigned getNumHighLatencySuccessors() const {
130 return NumHighLatencySuccessors;
133 bool isHighLatencyBlock() { return HighLatencyBlock; }
135 // This is approximative.
136 // Ideally should take into accounts some instructions (rcp, etc)
137 // are 4 times slower.
138 int getCost() { return SUnits.size(); }
140 // The block Predecessors and Successors must be all registered
141 // before fastSchedule().
142 // Fast schedule with no particular requirement.
145 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147 // Complete schedule that will try to minimize reg pressure and
148 // low latencies, and will fill liveins and liveouts.
149 // Needs all MIs to be grouped between BeginBlock and EndBlock.
150 // The MIs can be moved after the scheduling,
151 // it is just used to allow correct track of live registers.
152 void schedule(MachineBasicBlock::iterator BeginBlock,
153 MachineBasicBlock::iterator EndBlock);
155 bool isScheduled() { return Scheduled; }
157 // Needs the block to be scheduled inside
158 // TODO: find a way to compute it.
159 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
160 return InternalAdditionnalPressure;
163 std::set<unsigned> &getInRegs() { return LiveInRegs; }
164 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166 void printDebug(bool Full);
169 struct SISchedCandidate : SISchedulerCandidate {
170 // The best SUnit candidate.
176 unsigned LowLatencyOffset;
177 bool HasLowLatencyNonWaitedParent;
179 SISchedCandidate() = default;
181 bool isValid() const { return SU; }
183 // Copy the status of another candidate without changing policy.
184 void setBest(SISchedCandidate &Best) {
185 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187 Reason = Best.Reason;
188 SGPRUsage = Best.SGPRUsage;
189 VGPRUsage = Best.VGPRUsage;
190 IsLowLatency = Best.IsLowLatency;
191 LowLatencyOffset = Best.LowLatencyOffset;
192 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
198 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
199 void releaseSucc(SUnit *SU, SDep *SuccEdge);
200 // InOrOutBlock: restrict to links pointing inside the block (true),
201 // or restrict to links pointing outside the block (false).
202 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204 void nodeScheduled(SUnit *SU);
205 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
206 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208 void traceCandidate(const SISchedCandidate &Cand);
209 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
210 MachineBasicBlock::iterator EndBlock);
213 struct SIScheduleBlocks {
214 std::vector<SIScheduleBlock*> Blocks;
215 std::vector<int> TopDownIndex2Block;
216 std::vector<int> TopDownBlock2Index;
219 enum SISchedulerBlockCreatorVariant {
222 LatenciesAlonePlusConsecutive
225 class SIScheduleBlockCreator {
226 SIScheduleDAGMI *DAG;
227 // unique_ptr handles freeing memory for us.
228 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
229 std::map<SISchedulerBlockCreatorVariant,
230 SIScheduleBlocks> Blocks;
231 std::vector<SIScheduleBlock*> CurrentBlocks;
232 std::vector<int> Node2CurrentBlock;
235 // Maps topological index to the node number.
236 std::vector<int> TopDownIndex2Block;
237 std::vector<int> TopDownBlock2Index;
238 std::vector<int> BottomUpIndex2Block;
240 // 0 -> Color not given.
241 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242 // Above -> Other groups.
244 int NextNonReservedID;
245 std::vector<int> CurrentColoring;
246 std::vector<int> CurrentTopDownReservedDependencyColoring;
247 std::vector<int> CurrentBottomUpReservedDependencyColoring;
250 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
253 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
255 bool isSUInBlock(SUnit *SU, unsigned ID);
258 // Give a Reserved color to every high latency.
259 void colorHighLatenciesAlone();
261 // Create groups of high latencies with a Reserved color.
262 void colorHighLatenciesGroups();
264 // Compute coloring for topdown and bottom traversals with
265 // different colors depending on dependencies on Reserved colors.
266 void colorComputeReservedDependencies();
268 // Give color to all non-colored SUs according to Reserved groups dependencies.
269 void colorAccordingToReservedDependencies();
271 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
272 // The new colors are computed according to the dependencies on the other blocks
273 // formed with colorAccordingToReservedDependencies.
274 void colorEndsAccordingToDependencies();
276 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
277 void colorForceConsecutiveOrderInGroup();
279 // Merge Constant loads that have all their users into another group to the group.
280 // (TODO: else if all their users depend on the same group, put them there)
281 void colorMergeConstantLoadsNextGroup();
283 // Merge SUs that have all their users into another group to the group
284 void colorMergeIfPossibleNextGroup();
286 // Merge SUs that have all their users into another group to the group,
287 // but only for Reserved groups.
288 void colorMergeIfPossibleNextGroupOnlyForReserved();
290 // Merge SUs that have all their users into another group to the group,
291 // but only if the group is no more than a few SUs.
292 void colorMergeIfPossibleSmallGroupsToNextGroup();
294 // Divides Blocks with important size.
295 // Idea of implementation: attribute new colors depending on topdown and
296 // bottom up links to other blocks.
297 void cutHugeBlocks();
299 // Put in one group all instructions with no users in this scheduling region
300 // (we'd want these groups be at the end).
301 void regroupNoUserInstructions();
303 // Give Reserved color to export instructions
306 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
308 void topologicalSort();
310 void scheduleInsideBlocks();
315 enum SISchedulerBlockSchedulerVariant {
316 BlockLatencyRegUsage,
317 BlockRegUsageLatency,
321 class SIScheduleBlockScheduler {
322 SIScheduleDAGMI *DAG;
323 SISchedulerBlockSchedulerVariant Variant;
324 std::vector<SIScheduleBlock*> Blocks;
326 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
327 std::set<unsigned> LiveRegs;
328 // Num of schedulable unscheduled blocks reading the register.
329 std::map<unsigned, unsigned> LiveRegsConsumers;
331 std::vector<unsigned> LastPosHighLatencyParentScheduled;
332 int LastPosWaitedHighLatency;
334 std::vector<SIScheduleBlock*> BlocksScheduled;
335 unsigned NumBlockScheduled;
336 std::vector<SIScheduleBlock*> ReadyBlocks;
338 unsigned VregCurrentUsage;
339 unsigned SregCurrentUsage;
341 // Currently is only approximation.
342 unsigned maxVregUsage;
343 unsigned maxSregUsage;
345 std::vector<unsigned> BlockNumPredsLeft;
346 std::vector<unsigned> BlockNumSuccsLeft;
349 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
350 SISchedulerBlockSchedulerVariant Variant,
351 SIScheduleBlocks BlocksStruct);
352 ~SIScheduleBlockScheduler() = default;
354 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
356 unsigned getVGPRUsage() { return maxVregUsage; }
357 unsigned getSGPRUsage() { return maxSregUsage; }
360 struct SIBlockSchedCandidate : SISchedulerCandidate {
361 // The best Block candidate.
362 SIScheduleBlock *Block = nullptr;
366 unsigned NumSuccessors;
367 unsigned NumHighLatencySuccessors;
368 unsigned LastPosHighLatParentScheduled;
371 SIBlockSchedCandidate() = default;
373 bool isValid() const { return Block; }
375 // Copy the status of another candidate without changing policy.
376 void setBest(SIBlockSchedCandidate &Best) {
377 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
379 Reason = Best.Reason;
380 IsHighLatency = Best.IsHighLatency;
381 VGPRUsageDiff = Best.VGPRUsageDiff;
382 NumSuccessors = Best.NumSuccessors;
383 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
384 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
385 Height = Best.Height;
389 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
390 SIBlockSchedCandidate &TryCand);
391 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
392 SIBlockSchedCandidate &TryCand);
393 SIScheduleBlock *pickBlock();
395 void addLiveRegs(std::set<unsigned> &Regs);
396 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
397 void releaseBlockSuccs(SIScheduleBlock *Parent);
398 void blockScheduled(SIScheduleBlock *Block);
400 // Check register pressure change
401 // by scheduling a block with these LiveIn and LiveOut.
402 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
403 std::set<unsigned> &OutRegs);
408 struct SIScheduleBlockResult {
409 std::vector<unsigned> SUs;
410 unsigned MaxSGPRUsage;
411 unsigned MaxVGPRUsage;
415 SIScheduleDAGMI *DAG;
416 SIScheduleBlockCreator BlockCreator;
419 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
421 ~SIScheduler() = default;
423 struct SIScheduleBlockResult
424 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
425 SISchedulerBlockSchedulerVariant ScheduleVariant);
428 class SIScheduleDAGMI final : public ScheduleDAGMILive {
429 const SIInstrInfo *SITII;
430 const SIRegisterInfo *SITRI;
432 std::vector<SUnit> SUnitsLinksBackup;
434 // For moveLowLatencies. After all Scheduling variants are tested.
435 std::vector<unsigned> ScheduledSUnits;
436 std::vector<unsigned> ScheduledSUnitsInv;
442 SIScheduleDAGMI(MachineSchedContext *C);
444 ~SIScheduleDAGMI() override;
446 // Entry point for the schedule.
447 void schedule() override;
449 // To init Block's RPTracker.
450 void initRPTracker(RegPressureTracker &RPTracker) {
451 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
454 MachineBasicBlock *getBB() { return BB; }
455 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
456 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
457 LiveIntervals *getLIS() { return LIS; }
458 MachineRegisterInfo *getMRI() { return &MRI; }
459 const TargetRegisterInfo *getTRI() { return TRI; }
460 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
461 SUnit& getEntrySU() { return EntrySU; }
462 SUnit& getExitSU() { return ExitSU; }
464 void restoreSULinksLeft();
466 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469 unsigned &SgprUsage);
471 std::set<unsigned> getInRegs() {
472 std::set<unsigned> InRegs;
473 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
474 InRegs.insert(RegMaskPair.RegUnit);
479 std::set<unsigned> getOutRegs() {
480 std::set<unsigned> OutRegs;
481 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
482 OutRegs.insert(RegMaskPair.RegUnit);
487 unsigned getVGPRSetID() const { return VGPRSetID; }
488 unsigned getSGPRSetID() const { return SGPRSetID; }
491 void topologicalSort();
492 // After scheduling is done, improve low latency placements.
493 void moveLowLatencies();
496 // Some stats for scheduling inside blocks.
497 std::vector<unsigned> IsLowLatencySU;
498 std::vector<unsigned> LowLatencyOffset;
499 std::vector<unsigned> IsHighLatencySU;
501 // Maps topological index to the node number.
502 std::vector<int> TopDownIndex2SU;
503 std::vector<int> BottomUpIndex2SU;
506 } // end namespace llvm
508 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H