1 //===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief SI Machine Scheduler interface
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
18 #include "SIInstrInfo.h"
19 #include "llvm/CodeGen/MachineScheduler.h"
20 #include "llvm/CodeGen/RegisterPressure.h"
26 enum SIScheduleCandReason {
35 struct SISchedulerCandidate {
36 // The reason for this candidate.
37 SIScheduleCandReason Reason;
39 // Set of reasons that apply to multiple candidates.
40 uint32_t RepeatReasonSet;
42 SISchedulerCandidate()
43 : Reason(NoCand), RepeatReasonSet(0) {}
45 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
46 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
49 class SIScheduleDAGMI;
50 class SIScheduleBlockCreator;
52 class SIScheduleBlock {
54 SIScheduleBlockCreator *BC;
56 std::vector<SUnit*> SUnits;
57 std::map<unsigned, unsigned> NodeNum2Index;
58 std::vector<SUnit*> TopReadySUs;
59 std::vector<SUnit*> ScheduledSUnits;
61 /// The top of the unscheduled zone.
62 IntervalPressure TopPressure;
63 RegPressureTracker TopRPTracker;
65 // Pressure: number of said class of registers needed to
66 // store the live virtual and real registers.
67 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
68 // Pressure of additional registers required inside the block.
69 std::vector<unsigned> InternalAdditionnalPressure;
70 // Pressure of input and output registers
71 std::vector<unsigned> LiveInPressure;
72 std::vector<unsigned> LiveOutPressure;
73 // Registers required by the block, and outputs.
74 // We do track only virtual registers.
75 // Note that some registers are not 32 bits,
76 // and thus the pressure is not equal
77 // to the number of live registers.
78 std::set<unsigned> LiveInRegs;
79 std::set<unsigned> LiveOutRegs;
82 bool HighLatencyBlock;
84 std::vector<unsigned> HasLowLatencyNonWaitedParent;
86 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
89 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
90 std::vector<SIScheduleBlock*> Succs; // All blocks successors.
91 unsigned NumHighLatencySuccessors;
94 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
96 DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
97 TopRPTracker(TopPressure), Scheduled(false),
98 HighLatencyBlock(false), ID(ID),
99 Preds(), Succs(), NumHighLatencySuccessors(0) {};
101 ~SIScheduleBlock() {};
103 unsigned getID() const { return ID; }
105 /// Functions for Block construction.
106 void addUnit(SUnit *SU);
108 // When all SUs have been added.
109 void finalizeUnits();
111 // Add block pred, which has instruction predecessor of SU.
112 void addPred(SIScheduleBlock *Pred);
113 void addSucc(SIScheduleBlock *Succ);
115 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
116 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
118 unsigned Height; // Maximum topdown path length to block without outputs
119 unsigned Depth; // Maximum bottomup path length to block without inputs
121 unsigned getNumHighLatencySuccessors() const {
122 return NumHighLatencySuccessors;
125 bool isHighLatencyBlock() { return HighLatencyBlock; }
127 // This is approximative.
128 // Ideally should take into accounts some instructions (rcp, etc)
129 // are 4 times slower.
130 int getCost() { return SUnits.size(); }
132 // The block Predecessors and Successors must be all registered
133 // before fastSchedule().
134 // Fast schedule with no particular requirement.
137 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
139 // Complete schedule that will try to minimize reg pressure and
140 // low latencies, and will fill liveins and liveouts.
141 // Needs all MIs to be grouped between BeginBlock and EndBlock.
142 // The MIs can be moved after the scheduling,
143 // it is just used to allow correct track of live registers.
144 void schedule(MachineBasicBlock::iterator BeginBlock,
145 MachineBasicBlock::iterator EndBlock);
147 bool isScheduled() { return Scheduled; }
150 // Needs the block to be scheduled inside
151 // TODO: find a way to compute it.
152 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
153 return InternalAdditionnalPressure;
156 std::set<unsigned> &getInRegs() { return LiveInRegs; }
157 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
159 void printDebug(bool Full);
162 struct SISchedCandidate : SISchedulerCandidate {
163 // The best SUnit candidate.
169 unsigned LowLatencyOffset;
170 bool HasLowLatencyNonWaitedParent;
175 bool isValid() const { return SU; }
177 // Copy the status of another candidate without changing policy.
178 void setBest(SISchedCandidate &Best) {
179 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
181 Reason = Best.Reason;
182 SGPRUsage = Best.SGPRUsage;
183 VGPRUsage = Best.VGPRUsage;
184 IsLowLatency = Best.IsLowLatency;
185 LowLatencyOffset = Best.LowLatencyOffset;
186 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
192 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
193 void releaseSucc(SUnit *SU, SDep *SuccEdge);
194 // InOrOutBlock: restrict to links pointing inside the block (true),
195 // or restrict to links pointing outside the block (false).
196 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
198 void nodeScheduled(SUnit *SU);
199 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
200 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
202 void traceCandidate(const SISchedCandidate &Cand);
203 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
204 MachineBasicBlock::iterator EndBlock);
207 struct SIScheduleBlocks {
208 std::vector<SIScheduleBlock*> Blocks;
209 std::vector<int> TopDownIndex2Block;
210 std::vector<int> TopDownBlock2Index;
213 enum SISchedulerBlockCreatorVariant {
216 LatenciesAlonePlusConsecutive
219 class SIScheduleBlockCreator {
220 SIScheduleDAGMI *DAG;
221 // unique_ptr handles freeing memory for us.
222 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
223 std::map<SISchedulerBlockCreatorVariant,
224 SIScheduleBlocks> Blocks;
225 std::vector<SIScheduleBlock*> CurrentBlocks;
226 std::vector<int> Node2CurrentBlock;
229 // Maps topological index to the node number.
230 std::vector<int> TopDownIndex2Block;
231 std::vector<int> TopDownBlock2Index;
232 std::vector<int> BottomUpIndex2Block;
234 // 0 -> Color not given.
235 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
236 // Above -> Other groups.
238 int NextNonReservedID;
239 std::vector<int> CurrentColoring;
240 std::vector<int> CurrentTopDownReservedDependencyColoring;
241 std::vector<int> CurrentBottomUpReservedDependencyColoring;
244 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
245 ~SIScheduleBlockCreator();
248 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
250 bool isSUInBlock(SUnit *SU, unsigned ID);
253 // Give a Reserved color to every high latency.
254 void colorHighLatenciesAlone();
256 // Create groups of high latencies with a Reserved color.
257 void colorHighLatenciesGroups();
259 // Compute coloring for topdown and bottom traversals with
260 // different colors depending on dependencies on Reserved colors.
261 void colorComputeReservedDependencies();
263 // Give color to all non-colored SUs according to Reserved groups dependencies.
264 void colorAccordingToReservedDependencies();
266 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
267 // The new colors are computed according to the dependencies on the other blocks
268 // formed with colorAccordingToReservedDependencies.
269 void colorEndsAccordingToDependencies();
271 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
272 void colorForceConsecutiveOrderInGroup();
274 // Merge Constant loads that have all their users into another group to the group.
275 // (TODO: else if all their users depend on the same group, put them there)
276 void colorMergeConstantLoadsNextGroup();
278 // Merge SUs that have all their users into another group to the group
279 void colorMergeIfPossibleNextGroup();
281 // Merge SUs that have all their users into another group to the group,
282 // but only for Reserved groups.
283 void colorMergeIfPossibleNextGroupOnlyForReserved();
285 // Merge SUs that have all their users into another group to the group,
286 // but only if the group is no more than a few SUs.
287 void colorMergeIfPossibleSmallGroupsToNextGroup();
289 // Divides Blocks with important size.
290 // Idea of implementation: attribute new colors depending on topdown and
291 // bottom up links to other blocks.
292 void cutHugeBlocks();
294 // Put in one group all instructions with no users in this scheduling region
295 // (we'd want these groups be at the end).
296 void regroupNoUserInstructions();
298 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
300 void topologicalSort();
302 void scheduleInsideBlocks();
307 enum SISchedulerBlockSchedulerVariant {
308 BlockLatencyRegUsage,
309 BlockRegUsageLatency,
313 class SIScheduleBlockScheduler {
314 SIScheduleDAGMI *DAG;
315 SISchedulerBlockSchedulerVariant Variant;
316 std::vector<SIScheduleBlock*> Blocks;
318 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
319 std::set<unsigned> LiveRegs;
320 // Num of schedulable unscheduled blocks reading the register.
321 std::map<unsigned, unsigned> LiveRegsConsumers;
323 std::vector<unsigned> LastPosHighLatencyParentScheduled;
324 int LastPosWaitedHighLatency;
326 std::vector<SIScheduleBlock*> BlocksScheduled;
327 unsigned NumBlockScheduled;
328 std::vector<SIScheduleBlock*> ReadyBlocks;
330 unsigned VregCurrentUsage;
331 unsigned SregCurrentUsage;
333 // Currently is only approximation.
334 unsigned maxVregUsage;
335 unsigned maxSregUsage;
337 std::vector<unsigned> BlockNumPredsLeft;
338 std::vector<unsigned> BlockNumSuccsLeft;
341 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
342 SISchedulerBlockSchedulerVariant Variant,
343 SIScheduleBlocks BlocksStruct);
344 ~SIScheduleBlockScheduler() {};
346 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
348 unsigned getVGPRUsage() { return maxVregUsage; };
349 unsigned getSGPRUsage() { return maxSregUsage; };
352 struct SIBlockSchedCandidate : SISchedulerCandidate {
353 // The best Block candidate.
354 SIScheduleBlock *Block;
358 unsigned NumSuccessors;
359 unsigned NumHighLatencySuccessors;
360 unsigned LastPosHighLatParentScheduled;
363 SIBlockSchedCandidate()
366 bool isValid() const { return Block; }
368 // Copy the status of another candidate without changing policy.
369 void setBest(SIBlockSchedCandidate &Best) {
370 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
372 Reason = Best.Reason;
373 IsHighLatency = Best.IsHighLatency;
374 VGPRUsageDiff = Best.VGPRUsageDiff;
375 NumSuccessors = Best.NumSuccessors;
376 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
377 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
378 Height = Best.Height;
382 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
383 SIBlockSchedCandidate &TryCand);
384 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
385 SIBlockSchedCandidate &TryCand);
386 SIScheduleBlock *pickBlock();
388 void addLiveRegs(std::set<unsigned> &Regs);
389 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
390 void releaseBlockSuccs(SIScheduleBlock *Parent);
391 void blockScheduled(SIScheduleBlock *Block);
393 // Check register pressure change
394 // by scheduling a block with these LiveIn and LiveOut.
395 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
396 std::set<unsigned> &OutRegs);
401 struct SIScheduleBlockResult {
402 std::vector<unsigned> SUs;
403 unsigned MaxSGPRUsage;
404 unsigned MaxVGPRUsage;
408 SIScheduleDAGMI *DAG;
409 SIScheduleBlockCreator BlockCreator;
412 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
416 struct SIScheduleBlockResult
417 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
418 SISchedulerBlockSchedulerVariant ScheduleVariant);
421 class SIScheduleDAGMI final : public ScheduleDAGMILive {
422 const SIInstrInfo *SITII;
423 const SIRegisterInfo *SITRI;
425 std::vector<SUnit> SUnitsLinksBackup;
427 // For moveLowLatencies. After all Scheduling variants are tested.
428 std::vector<unsigned> ScheduledSUnits;
429 std::vector<unsigned> ScheduledSUnitsInv;
435 SIScheduleDAGMI(MachineSchedContext *C);
437 ~SIScheduleDAGMI() override;
439 // Entry point for the schedule.
440 void schedule() override;
442 // To init Block's RPTracker.
443 void initRPTracker(RegPressureTracker &RPTracker) {
444 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
447 MachineBasicBlock *getBB() { return BB; }
448 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
449 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
450 LiveIntervals *getLIS() { return LIS; }
451 MachineRegisterInfo *getMRI() { return &MRI; }
452 const TargetRegisterInfo *getTRI() { return TRI; }
453 SUnit& getEntrySU() { return EntrySU; };
454 SUnit& getExitSU() { return ExitSU; };
456 void restoreSULinksLeft();
458 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
461 unsigned &SgprUsage);
462 std::set<unsigned> getInRegs() {
463 std::set<unsigned> InRegs;
464 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
465 InRegs.insert(RegMaskPair.RegUnit);
470 unsigned getVGPRSetID() const { return VGPRSetID; }
471 unsigned getSGPRSetID() const { return SGPRSetID; }
474 void topologicalSort();
475 // After scheduling is done, improve low latency placements.
476 void moveLowLatencies();
479 // Some stats for scheduling inside blocks.
480 std::vector<unsigned> IsLowLatencySU;
481 std::vector<unsigned> LowLatencyOffset;
482 std::vector<unsigned> IsHighLatencySU;
484 // Maps topological index to the node number.
485 std::vector<int> TopDownIndex2SU;
486 std::vector<int> BottomUpIndex2SU;
491 #endif /* SIMACHINESCHEDULER_H_ */