]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
Update lldb to trunk r290819 and resolve conflicts.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / AMDGPU / SIMachineScheduler.h
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Machine Scheduler interface
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18 #include "SIInstrInfo.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineScheduler.h"
21 #include "llvm/CodeGen/RegisterPressure.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 #include <cassert>
24 #include <cstdint>
25 #include <map>
26 #include <memory>
27 #include <set>
28 #include <vector>
29
30 namespace llvm {
31
32 enum SIScheduleCandReason {
33   NoCand,
34   RegUsage,
35   Latency,
36   Successor,
37   Depth,
38   NodeOrder
39 };
40
41 struct SISchedulerCandidate {
42   // The reason for this candidate.
43   SIScheduleCandReason Reason;
44
45   // Set of reasons that apply to multiple candidates.
46   uint32_t RepeatReasonSet;
47
48   SISchedulerCandidate()
49     :  Reason(NoCand), RepeatReasonSet(0) {}
50
51   bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
52   void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
53 };
54
55 class SIScheduleDAGMI;
56 class SIScheduleBlockCreator;
57
58 class SIScheduleBlock {
59   SIScheduleDAGMI *DAG;
60   SIScheduleBlockCreator *BC;
61
62   std::vector<SUnit*> SUnits;
63   std::map<unsigned, unsigned> NodeNum2Index;
64   std::vector<SUnit*> TopReadySUs;
65   std::vector<SUnit*> ScheduledSUnits;
66
67   /// The top of the unscheduled zone.
68   IntervalPressure TopPressure;
69   RegPressureTracker TopRPTracker;
70
71   // Pressure: number of said class of registers needed to
72   // store the live virtual and real registers.
73   // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74   // Pressure of additional registers required inside the block.
75   std::vector<unsigned> InternalAdditionnalPressure;
76   // Pressure of input and output registers
77   std::vector<unsigned> LiveInPressure;
78   std::vector<unsigned> LiveOutPressure;
79   // Registers required by the block, and outputs.
80   // We do track only virtual registers.
81   // Note that some registers are not 32 bits,
82   // and thus the pressure is not equal
83   // to the number of live registers.
84   std::set<unsigned> LiveInRegs;
85   std::set<unsigned> LiveOutRegs;
86
87   bool Scheduled;
88   bool HighLatencyBlock;
89
90   std::vector<unsigned> HasLowLatencyNonWaitedParent;
91
92   // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93   unsigned ID;
94
95   std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
96   std::vector<SIScheduleBlock*> Succs;  // All blocks successors.
97   unsigned NumHighLatencySuccessors;
98
99 public:
100   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
101                   unsigned ID):
102     DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
103     HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
104
105   ~SIScheduleBlock() = default;
106
107   unsigned getID() const { return ID; }
108
109   /// Functions for Block construction.
110   void addUnit(SUnit *SU);
111
112   // When all SUs have been added.
113   void finalizeUnits();
114
115   // Add block pred, which has instruction predecessor of SU.
116   void addPred(SIScheduleBlock *Pred);
117   void addSucc(SIScheduleBlock *Succ);
118
119   const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
120   const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
121
122   unsigned Height;  // Maximum topdown path length to block without outputs
123   unsigned Depth;   // Maximum bottomup path length to block without inputs
124
125   unsigned getNumHighLatencySuccessors() const {
126     return NumHighLatencySuccessors;
127   }
128
129   bool isHighLatencyBlock() { return HighLatencyBlock; }
130
131   // This is approximative.
132   // Ideally should take into accounts some instructions (rcp, etc)
133   // are 4 times slower.
134   int getCost() { return SUnits.size(); }
135
136   // The block Predecessors and Successors must be all registered
137   // before fastSchedule().
138   // Fast schedule with no particular requirement.
139   void fastSchedule();
140
141   std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
142
143   // Complete schedule that will try to minimize reg pressure and
144   // low latencies, and will fill liveins and liveouts.
145   // Needs all MIs to be grouped between BeginBlock and EndBlock.
146   // The MIs can be moved after the scheduling,
147   // it is just used to allow correct track of live registers.
148   void schedule(MachineBasicBlock::iterator BeginBlock,
149                 MachineBasicBlock::iterator EndBlock);
150
151   bool isScheduled() { return Scheduled; }
152
153   // Needs the block to be scheduled inside
154   // TODO: find a way to compute it.
155   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
156     return InternalAdditionnalPressure;
157   }
158
159   std::set<unsigned> &getInRegs() { return LiveInRegs; }
160   std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
161
162   void printDebug(bool Full);
163
164 private:
165   struct SISchedCandidate : SISchedulerCandidate {
166     // The best SUnit candidate.
167     SUnit *SU = nullptr;
168
169     unsigned SGPRUsage;
170     unsigned VGPRUsage;
171     bool IsLowLatency;
172     unsigned LowLatencyOffset;
173     bool HasLowLatencyNonWaitedParent;
174
175     SISchedCandidate() = default;
176
177     bool isValid() const { return SU; }
178
179     // Copy the status of another candidate without changing policy.
180     void setBest(SISchedCandidate &Best) {
181       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
182       SU = Best.SU;
183       Reason = Best.Reason;
184       SGPRUsage = Best.SGPRUsage;
185       VGPRUsage = Best.VGPRUsage;
186       IsLowLatency = Best.IsLowLatency;
187       LowLatencyOffset = Best.LowLatencyOffset;
188       HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
189     }
190   };
191
192   void undoSchedule();
193
194   void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
195   void releaseSucc(SUnit *SU, SDep *SuccEdge);
196   // InOrOutBlock: restrict to links pointing inside the block (true),
197   // or restrict to links pointing outside the block (false).
198   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
199
200   void nodeScheduled(SUnit *SU);
201   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
202   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203   SUnit* pickNode();
204   void traceCandidate(const SISchedCandidate &Cand);
205   void initRegPressure(MachineBasicBlock::iterator BeginBlock,
206                        MachineBasicBlock::iterator EndBlock);
207 };
208
209 struct SIScheduleBlocks {
210   std::vector<SIScheduleBlock*> Blocks;
211   std::vector<int> TopDownIndex2Block;
212   std::vector<int> TopDownBlock2Index;
213 };
214
215 enum SISchedulerBlockCreatorVariant {
216     LatenciesAlone,
217     LatenciesGrouped,
218     LatenciesAlonePlusConsecutive
219 };
220
221 class SIScheduleBlockCreator {
222   SIScheduleDAGMI *DAG;
223   // unique_ptr handles freeing memory for us.
224   std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
225   std::map<SISchedulerBlockCreatorVariant,
226            SIScheduleBlocks> Blocks;
227   std::vector<SIScheduleBlock*> CurrentBlocks;
228   std::vector<int> Node2CurrentBlock;
229
230   // Topological sort
231   // Maps topological index to the node number.
232   std::vector<int> TopDownIndex2Block;
233   std::vector<int> TopDownBlock2Index;
234   std::vector<int> BottomUpIndex2Block;
235
236   // 0 -> Color not given.
237   // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
238   // Above -> Other groups.
239   int NextReservedID;
240   int NextNonReservedID;
241   std::vector<int> CurrentColoring;
242   std::vector<int> CurrentTopDownReservedDependencyColoring;
243   std::vector<int> CurrentBottomUpReservedDependencyColoring;
244
245 public:
246   SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
247   ~SIScheduleBlockCreator();
248
249   SIScheduleBlocks
250   getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
251
252   bool isSUInBlock(SUnit *SU, unsigned ID);
253
254 private:
255   // Give a Reserved color to every high latency.
256   void colorHighLatenciesAlone();
257
258   // Create groups of high latencies with a Reserved color.
259   void colorHighLatenciesGroups();
260
261   // Compute coloring for topdown and bottom traversals with
262   // different colors depending on dependencies on Reserved colors.
263   void colorComputeReservedDependencies();
264
265   // Give color to all non-colored SUs according to Reserved groups dependencies.
266   void colorAccordingToReservedDependencies();
267
268   // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
269   // The new colors are computed according to the dependencies on the other blocks
270   // formed with colorAccordingToReservedDependencies.
271   void colorEndsAccordingToDependencies();
272
273   // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
274   void colorForceConsecutiveOrderInGroup();
275
276   // Merge Constant loads that have all their users into another group to the group.
277   // (TODO: else if all their users depend on the same group, put them there)
278   void colorMergeConstantLoadsNextGroup();
279
280   // Merge SUs that have all their users into another group to the group
281   void colorMergeIfPossibleNextGroup();
282
283   // Merge SUs that have all their users into another group to the group,
284   // but only for Reserved groups.
285   void colorMergeIfPossibleNextGroupOnlyForReserved();
286
287   // Merge SUs that have all their users into another group to the group,
288   // but only if the group is no more than a few SUs.
289   void colorMergeIfPossibleSmallGroupsToNextGroup();
290
291   // Divides Blocks with important size.
292   // Idea of implementation: attribute new colors depending on topdown and
293   // bottom up links to other blocks.
294   void cutHugeBlocks();
295
296   // Put in one group all instructions with no users in this scheduling region
297   // (we'd want these groups be at the end).
298   void regroupNoUserInstructions();
299
300   void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
301
302   void topologicalSort();
303
304   void scheduleInsideBlocks();
305
306   void fillStats();
307 };
308
309 enum SISchedulerBlockSchedulerVariant {
310   BlockLatencyRegUsage,
311   BlockRegUsageLatency,
312   BlockRegUsage
313 };
314
315 class SIScheduleBlockScheduler {
316   SIScheduleDAGMI *DAG;
317   SISchedulerBlockSchedulerVariant Variant;
318   std::vector<SIScheduleBlock*> Blocks;
319
320   std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
321   std::set<unsigned> LiveRegs;
322   // Num of schedulable unscheduled blocks reading the register.
323   std::map<unsigned, unsigned> LiveRegsConsumers;
324
325   std::vector<unsigned> LastPosHighLatencyParentScheduled;
326   int LastPosWaitedHighLatency;
327
328   std::vector<SIScheduleBlock*> BlocksScheduled;
329   unsigned NumBlockScheduled;
330   std::vector<SIScheduleBlock*> ReadyBlocks;
331
332   unsigned VregCurrentUsage;
333   unsigned SregCurrentUsage;
334
335   // Currently is only approximation.
336   unsigned maxVregUsage;
337   unsigned maxSregUsage;
338
339   std::vector<unsigned> BlockNumPredsLeft;
340   std::vector<unsigned> BlockNumSuccsLeft;
341
342 public:
343   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
344                            SISchedulerBlockSchedulerVariant Variant,
345                            SIScheduleBlocks BlocksStruct);
346   ~SIScheduleBlockScheduler() = default;
347
348   std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
349
350   unsigned getVGPRUsage() { return maxVregUsage; }
351   unsigned getSGPRUsage() { return maxSregUsage; }
352
353 private:
354   struct SIBlockSchedCandidate : SISchedulerCandidate {
355     // The best Block candidate.
356     SIScheduleBlock *Block = nullptr;
357
358     bool IsHighLatency;
359     int VGPRUsageDiff;
360     unsigned NumSuccessors;
361     unsigned NumHighLatencySuccessors;
362     unsigned LastPosHighLatParentScheduled;
363     unsigned Height;
364
365     SIBlockSchedCandidate() = default;
366
367     bool isValid() const { return Block; }
368
369     // Copy the status of another candidate without changing policy.
370     void setBest(SIBlockSchedCandidate &Best) {
371       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
372       Block = Best.Block;
373       Reason = Best.Reason;
374       IsHighLatency = Best.IsHighLatency;
375       VGPRUsageDiff = Best.VGPRUsageDiff;
376       NumSuccessors = Best.NumSuccessors;
377       NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
378       LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
379       Height = Best.Height;
380     }
381   };
382
383   bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
384                            SIBlockSchedCandidate &TryCand);
385   bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
386                             SIBlockSchedCandidate &TryCand);
387   SIScheduleBlock *pickBlock();
388
389   void addLiveRegs(std::set<unsigned> &Regs);
390   void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
391   void releaseBlockSuccs(SIScheduleBlock *Parent);
392   void blockScheduled(SIScheduleBlock *Block);
393
394   // Check register pressure change
395   // by scheduling a block with these LiveIn and LiveOut.
396   std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
397                                        std::set<unsigned> &OutRegs);
398
399   void schedule();
400 };
401
402 struct SIScheduleBlockResult {
403   std::vector<unsigned> SUs;
404   unsigned MaxSGPRUsage;
405   unsigned MaxVGPRUsage;
406 };
407
408 class SIScheduler {
409   SIScheduleDAGMI *DAG;
410   SIScheduleBlockCreator BlockCreator;
411
412 public:
413   SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
414
415   ~SIScheduler() = default;
416
417   struct SIScheduleBlockResult
418   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
419                   SISchedulerBlockSchedulerVariant ScheduleVariant);
420 };
421
422 class SIScheduleDAGMI final : public ScheduleDAGMILive {
423   const SIInstrInfo *SITII;
424   const SIRegisterInfo *SITRI;
425
426   std::vector<SUnit> SUnitsLinksBackup;
427
428   // For moveLowLatencies. After all Scheduling variants are tested.
429   std::vector<unsigned> ScheduledSUnits;
430   std::vector<unsigned> ScheduledSUnitsInv;
431
432   unsigned VGPRSetID;
433   unsigned SGPRSetID;
434
435 public:
436   SIScheduleDAGMI(MachineSchedContext *C);
437
438   ~SIScheduleDAGMI() override;
439
440   // Entry point for the schedule.
441   void schedule() override;
442
443   // To init Block's RPTracker.
444   void initRPTracker(RegPressureTracker &RPTracker) {
445     RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
446   }
447
448   MachineBasicBlock *getBB() { return BB; }
449   MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
450   MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
451   LiveIntervals *getLIS() { return LIS; }
452   MachineRegisterInfo *getMRI() { return &MRI; }
453   const TargetRegisterInfo *getTRI() { return TRI; }
454   SUnit& getEntrySU() { return EntrySU; }
455   SUnit& getExitSU() { return ExitSU; }
456
457   void restoreSULinksLeft();
458
459   template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
460                                                      _Iterator End,
461                                                      unsigned &VgprUsage,
462                                                      unsigned &SgprUsage);
463
464   std::set<unsigned> getInRegs() {
465     std::set<unsigned> InRegs;
466     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
467       InRegs.insert(RegMaskPair.RegUnit);
468     }
469     return InRegs;
470   }
471
472   unsigned getVGPRSetID() const { return VGPRSetID; }
473   unsigned getSGPRSetID() const { return SGPRSetID; }
474
475 private:
476   void topologicalSort();
477   // After scheduling is done, improve low latency placements.
478   void moveLowLatencies();
479
480 public:
481   // Some stats for scheduling inside blocks.
482   std::vector<unsigned> IsLowLatencySU;
483   std::vector<unsigned> LowLatencyOffset;
484   std::vector<unsigned> IsHighLatencySU;
485   // Topological sort
486   // Maps topological index to the node number.
487   std::vector<int> TopDownIndex2SU;
488   std::vector<int> BottomUpIndex2SU;
489 };
490
491 } // end namespace llvm
492
493 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H