]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
Update tcsh to 6.21.00.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / AMDGPU / SIMachineScheduler.h
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI Machine Scheduler interface
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18 #include "SIInstrInfo.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineScheduler.h"
21 #include "llvm/CodeGen/RegisterPressure.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 #include <cassert>
24 #include <cstdint>
25 #include <map>
26 #include <memory>
27 #include <set>
28 #include <vector>
29
30 namespace llvm {
31
32 enum SIScheduleCandReason {
33   NoCand,
34   RegUsage,
35   Latency,
36   Successor,
37   Depth,
38   NodeOrder
39 };
40
41 struct SISchedulerCandidate {
42   // The reason for this candidate.
43   SIScheduleCandReason Reason = NoCand;
44
45   // Set of reasons that apply to multiple candidates.
46   uint32_t RepeatReasonSet = 0;
47
48   SISchedulerCandidate() = default;
49
50   bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51   void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52 };
53
54 class SIScheduleDAGMI;
55 class SIScheduleBlockCreator;
56
57 enum SIScheduleBlockLinkKind {
58   NoData,
59   Data
60 };
61
62 class SIScheduleBlock {
63   SIScheduleDAGMI *DAG;
64   SIScheduleBlockCreator *BC;
65
66   std::vector<SUnit*> SUnits;
67   std::map<unsigned, unsigned> NodeNum2Index;
68   std::vector<SUnit*> TopReadySUs;
69   std::vector<SUnit*> ScheduledSUnits;
70
71   /// The top of the unscheduled zone.
72   IntervalPressure TopPressure;
73   RegPressureTracker TopRPTracker;
74
75   // Pressure: number of said class of registers needed to
76   // store the live virtual and real registers.
77   // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78   // Pressure of additional registers required inside the block.
79   std::vector<unsigned> InternalAdditionnalPressure;
80   // Pressure of input and output registers
81   std::vector<unsigned> LiveInPressure;
82   std::vector<unsigned> LiveOutPressure;
83   // Registers required by the block, and outputs.
84   // We do track only virtual registers.
85   // Note that some registers are not 32 bits,
86   // and thus the pressure is not equal
87   // to the number of live registers.
88   std::set<unsigned> LiveInRegs;
89   std::set<unsigned> LiveOutRegs;
90
91   bool Scheduled = false;
92   bool HighLatencyBlock = false;
93
94   std::vector<unsigned> HasLowLatencyNonWaitedParent;
95
96   // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97   unsigned ID;
98
99   std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
100   // All blocks successors, and the kind of link
101   std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
102   unsigned NumHighLatencySuccessors = 0;
103
104 public:
105   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106                   unsigned ID):
107     DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108
109   ~SIScheduleBlock() = default;
110
111   unsigned getID() const { return ID; }
112
113   /// Functions for Block construction.
114   void addUnit(SUnit *SU);
115
116   // When all SUs have been added.
117   void finalizeUnits();
118
119   // Add block pred, which has instruction predecessor of SU.
120   void addPred(SIScheduleBlock *Pred);
121   void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122
123   const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
124   ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
125     getSuccs() const { return Succs; }
126
127   unsigned Height;  // Maximum topdown path length to block without outputs
128   unsigned Depth;   // Maximum bottomup path length to block without inputs
129
130   unsigned getNumHighLatencySuccessors() const {
131     return NumHighLatencySuccessors;
132   }
133
134   bool isHighLatencyBlock() { return HighLatencyBlock; }
135
136   // This is approximative.
137   // Ideally should take into accounts some instructions (rcp, etc)
138   // are 4 times slower.
139   int getCost() { return SUnits.size(); }
140
141   // The block Predecessors and Successors must be all registered
142   // before fastSchedule().
143   // Fast schedule with no particular requirement.
144   void fastSchedule();
145
146   std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147
148   // Complete schedule that will try to minimize reg pressure and
149   // low latencies, and will fill liveins and liveouts.
150   // Needs all MIs to be grouped between BeginBlock and EndBlock.
151   // The MIs can be moved after the scheduling,
152   // it is just used to allow correct track of live registers.
153   void schedule(MachineBasicBlock::iterator BeginBlock,
154                 MachineBasicBlock::iterator EndBlock);
155
156   bool isScheduled() { return Scheduled; }
157
158   // Needs the block to be scheduled inside
159   // TODO: find a way to compute it.
160   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161     return InternalAdditionnalPressure;
162   }
163
164   std::set<unsigned> &getInRegs() { return LiveInRegs; }
165   std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166
167   void printDebug(bool Full);
168
169 private:
170   struct SISchedCandidate : SISchedulerCandidate {
171     // The best SUnit candidate.
172     SUnit *SU = nullptr;
173
174     unsigned SGPRUsage;
175     unsigned VGPRUsage;
176     bool IsLowLatency;
177     unsigned LowLatencyOffset;
178     bool HasLowLatencyNonWaitedParent;
179
180     SISchedCandidate() = default;
181
182     bool isValid() const { return SU; }
183
184     // Copy the status of another candidate without changing policy.
185     void setBest(SISchedCandidate &Best) {
186       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187       SU = Best.SU;
188       Reason = Best.Reason;
189       SGPRUsage = Best.SGPRUsage;
190       VGPRUsage = Best.VGPRUsage;
191       IsLowLatency = Best.IsLowLatency;
192       LowLatencyOffset = Best.LowLatencyOffset;
193       HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194     }
195   };
196
197   void undoSchedule();
198
199   void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200   void releaseSucc(SUnit *SU, SDep *SuccEdge);
201   // InOrOutBlock: restrict to links pointing inside the block (true),
202   // or restrict to links pointing outside the block (false).
203   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204
205   void nodeScheduled(SUnit *SU);
206   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208   SUnit* pickNode();
209   void traceCandidate(const SISchedCandidate &Cand);
210   void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211                        MachineBasicBlock::iterator EndBlock);
212 };
213
214 struct SIScheduleBlocks {
215   std::vector<SIScheduleBlock*> Blocks;
216   std::vector<int> TopDownIndex2Block;
217   std::vector<int> TopDownBlock2Index;
218 };
219
220 enum SISchedulerBlockCreatorVariant {
221   LatenciesAlone,
222   LatenciesGrouped,
223   LatenciesAlonePlusConsecutive
224 };
225
226 class SIScheduleBlockCreator {
227   SIScheduleDAGMI *DAG;
228   // unique_ptr handles freeing memory for us.
229   std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230   std::map<SISchedulerBlockCreatorVariant,
231            SIScheduleBlocks> Blocks;
232   std::vector<SIScheduleBlock*> CurrentBlocks;
233   std::vector<int> Node2CurrentBlock;
234
235   // Topological sort
236   // Maps topological index to the node number.
237   std::vector<int> TopDownIndex2Block;
238   std::vector<int> TopDownBlock2Index;
239   std::vector<int> BottomUpIndex2Block;
240
241   // 0 -> Color not given.
242   // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243   // Above -> Other groups.
244   int NextReservedID;
245   int NextNonReservedID;
246   std::vector<int> CurrentColoring;
247   std::vector<int> CurrentTopDownReservedDependencyColoring;
248   std::vector<int> CurrentBottomUpReservedDependencyColoring;
249
250 public:
251   SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
252   ~SIScheduleBlockCreator();
253
254   SIScheduleBlocks
255   getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256
257   bool isSUInBlock(SUnit *SU, unsigned ID);
258
259 private:
260   // Give a Reserved color to every high latency.
261   void colorHighLatenciesAlone();
262
263   // Create groups of high latencies with a Reserved color.
264   void colorHighLatenciesGroups();
265
266   // Compute coloring for topdown and bottom traversals with
267   // different colors depending on dependencies on Reserved colors.
268   void colorComputeReservedDependencies();
269
270   // Give color to all non-colored SUs according to Reserved groups dependencies.
271   void colorAccordingToReservedDependencies();
272
273   // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274   // The new colors are computed according to the dependencies on the other blocks
275   // formed with colorAccordingToReservedDependencies.
276   void colorEndsAccordingToDependencies();
277
278   // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279   void colorForceConsecutiveOrderInGroup();
280
281   // Merge Constant loads that have all their users into another group to the group.
282   // (TODO: else if all their users depend on the same group, put them there)
283   void colorMergeConstantLoadsNextGroup();
284
285   // Merge SUs that have all their users into another group to the group
286   void colorMergeIfPossibleNextGroup();
287
288   // Merge SUs that have all their users into another group to the group,
289   // but only for Reserved groups.
290   void colorMergeIfPossibleNextGroupOnlyForReserved();
291
292   // Merge SUs that have all their users into another group to the group,
293   // but only if the group is no more than a few SUs.
294   void colorMergeIfPossibleSmallGroupsToNextGroup();
295
296   // Divides Blocks with important size.
297   // Idea of implementation: attribute new colors depending on topdown and
298   // bottom up links to other blocks.
299   void cutHugeBlocks();
300
301   // Put in one group all instructions with no users in this scheduling region
302   // (we'd want these groups be at the end).
303   void regroupNoUserInstructions();
304
305   // Give Reserved color to export instructions
306   void colorExports();
307
308   void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
309
310   void topologicalSort();
311
312   void scheduleInsideBlocks();
313
314   void fillStats();
315 };
316
317 enum SISchedulerBlockSchedulerVariant {
318   BlockLatencyRegUsage,
319   BlockRegUsageLatency,
320   BlockRegUsage
321 };
322
323 class SIScheduleBlockScheduler {
324   SIScheduleDAGMI *DAG;
325   SISchedulerBlockSchedulerVariant Variant;
326   std::vector<SIScheduleBlock*> Blocks;
327
328   std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
329   std::set<unsigned> LiveRegs;
330   // Num of schedulable unscheduled blocks reading the register.
331   std::map<unsigned, unsigned> LiveRegsConsumers;
332
333   std::vector<unsigned> LastPosHighLatencyParentScheduled;
334   int LastPosWaitedHighLatency;
335
336   std::vector<SIScheduleBlock*> BlocksScheduled;
337   unsigned NumBlockScheduled;
338   std::vector<SIScheduleBlock*> ReadyBlocks;
339
340   unsigned VregCurrentUsage;
341   unsigned SregCurrentUsage;
342
343   // Currently is only approximation.
344   unsigned maxVregUsage;
345   unsigned maxSregUsage;
346
347   std::vector<unsigned> BlockNumPredsLeft;
348   std::vector<unsigned> BlockNumSuccsLeft;
349
350 public:
351   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
352                            SISchedulerBlockSchedulerVariant Variant,
353                            SIScheduleBlocks BlocksStruct);
354   ~SIScheduleBlockScheduler() = default;
355
356   std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
357
358   unsigned getVGPRUsage() { return maxVregUsage; }
359   unsigned getSGPRUsage() { return maxSregUsage; }
360
361 private:
362   struct SIBlockSchedCandidate : SISchedulerCandidate {
363     // The best Block candidate.
364     SIScheduleBlock *Block = nullptr;
365
366     bool IsHighLatency;
367     int VGPRUsageDiff;
368     unsigned NumSuccessors;
369     unsigned NumHighLatencySuccessors;
370     unsigned LastPosHighLatParentScheduled;
371     unsigned Height;
372
373     SIBlockSchedCandidate() = default;
374
375     bool isValid() const { return Block; }
376
377     // Copy the status of another candidate without changing policy.
378     void setBest(SIBlockSchedCandidate &Best) {
379       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
380       Block = Best.Block;
381       Reason = Best.Reason;
382       IsHighLatency = Best.IsHighLatency;
383       VGPRUsageDiff = Best.VGPRUsageDiff;
384       NumSuccessors = Best.NumSuccessors;
385       NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
386       LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
387       Height = Best.Height;
388     }
389   };
390
391   bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
392                            SIBlockSchedCandidate &TryCand);
393   bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
394                             SIBlockSchedCandidate &TryCand);
395   SIScheduleBlock *pickBlock();
396
397   void addLiveRegs(std::set<unsigned> &Regs);
398   void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
399   void releaseBlockSuccs(SIScheduleBlock *Parent);
400   void blockScheduled(SIScheduleBlock *Block);
401
402   // Check register pressure change
403   // by scheduling a block with these LiveIn and LiveOut.
404   std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
405                                        std::set<unsigned> &OutRegs);
406
407   void schedule();
408 };
409
410 struct SIScheduleBlockResult {
411   std::vector<unsigned> SUs;
412   unsigned MaxSGPRUsage;
413   unsigned MaxVGPRUsage;
414 };
415
416 class SIScheduler {
417   SIScheduleDAGMI *DAG;
418   SIScheduleBlockCreator BlockCreator;
419
420 public:
421   SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
422
423   ~SIScheduler() = default;
424
425   struct SIScheduleBlockResult
426   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
427                   SISchedulerBlockSchedulerVariant ScheduleVariant);
428 };
429
430 class SIScheduleDAGMI final : public ScheduleDAGMILive {
431   const SIInstrInfo *SITII;
432   const SIRegisterInfo *SITRI;
433
434   std::vector<SUnit> SUnitsLinksBackup;
435
436   // For moveLowLatencies. After all Scheduling variants are tested.
437   std::vector<unsigned> ScheduledSUnits;
438   std::vector<unsigned> ScheduledSUnitsInv;
439
440   unsigned VGPRSetID;
441   unsigned SGPRSetID;
442
443 public:
444   SIScheduleDAGMI(MachineSchedContext *C);
445
446   ~SIScheduleDAGMI() override;
447
448   // Entry point for the schedule.
449   void schedule() override;
450
451   // To init Block's RPTracker.
452   void initRPTracker(RegPressureTracker &RPTracker) {
453     RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
454   }
455
456   MachineBasicBlock *getBB() { return BB; }
457   MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
458   MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
459   LiveIntervals *getLIS() { return LIS; }
460   MachineRegisterInfo *getMRI() { return &MRI; }
461   const TargetRegisterInfo *getTRI() { return TRI; }
462   ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
463   SUnit& getEntrySU() { return EntrySU; }
464   SUnit& getExitSU() { return ExitSU; }
465
466   void restoreSULinksLeft();
467
468   template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469                                                      _Iterator End,
470                                                      unsigned &VgprUsage,
471                                                      unsigned &SgprUsage);
472
473   std::set<unsigned> getInRegs() {
474     std::set<unsigned> InRegs;
475     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
476       InRegs.insert(RegMaskPair.RegUnit);
477     }
478     return InRegs;
479   }
480
481   std::set<unsigned> getOutRegs() {
482     std::set<unsigned> OutRegs;
483     for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
484       OutRegs.insert(RegMaskPair.RegUnit);
485     }
486     return OutRegs;
487   };
488
489   unsigned getVGPRSetID() const { return VGPRSetID; }
490   unsigned getSGPRSetID() const { return SGPRSetID; }
491
492 private:
493   void topologicalSort();
494   // After scheduling is done, improve low latency placements.
495   void moveLowLatencies();
496
497 public:
498   // Some stats for scheduling inside blocks.
499   std::vector<unsigned> IsLowLatencySU;
500   std::vector<unsigned> LowLatencyOffset;
501   std::vector<unsigned> IsHighLatencySU;
502   // Topological sort
503   // Maps topological index to the node number.
504   std::vector<int> TopDownIndex2SU;
505   std::vector<int> BottomUpIndex2SU;
506 };
507
508 } // end namespace llvm
509
510 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H