]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
Merge llvm trunk r300422 and resolve conflicts.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / AMDGPU / SIMachineScheduler.h
1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI Machine Scheduler interface
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18 #include "SIInstrInfo.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineScheduler.h"
21 #include "llvm/CodeGen/RegisterPressure.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 #include <cassert>
24 #include <cstdint>
25 #include <map>
26 #include <memory>
27 #include <set>
28 #include <vector>
29
30 namespace llvm {
31
32 enum SIScheduleCandReason {
33   NoCand,
34   RegUsage,
35   Latency,
36   Successor,
37   Depth,
38   NodeOrder
39 };
40
41 struct SISchedulerCandidate {
42   // The reason for this candidate.
43   SIScheduleCandReason Reason = NoCand;
44
45   // Set of reasons that apply to multiple candidates.
46   uint32_t RepeatReasonSet = 0;
47
48   SISchedulerCandidate() = default;
49
50   bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51   void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52 };
53
54 class SIScheduleDAGMI;
55 class SIScheduleBlockCreator;
56
57 enum SIScheduleBlockLinkKind {
58   NoData,
59   Data
60 };
61
62 class SIScheduleBlock {
63   SIScheduleDAGMI *DAG;
64   SIScheduleBlockCreator *BC;
65
66   std::vector<SUnit*> SUnits;
67   std::map<unsigned, unsigned> NodeNum2Index;
68   std::vector<SUnit*> TopReadySUs;
69   std::vector<SUnit*> ScheduledSUnits;
70
71   /// The top of the unscheduled zone.
72   IntervalPressure TopPressure;
73   RegPressureTracker TopRPTracker;
74
75   // Pressure: number of said class of registers needed to
76   // store the live virtual and real registers.
77   // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78   // Pressure of additional registers required inside the block.
79   std::vector<unsigned> InternalAdditionnalPressure;
80   // Pressure of input and output registers
81   std::vector<unsigned> LiveInPressure;
82   std::vector<unsigned> LiveOutPressure;
83   // Registers required by the block, and outputs.
84   // We do track only virtual registers.
85   // Note that some registers are not 32 bits,
86   // and thus the pressure is not equal
87   // to the number of live registers.
88   std::set<unsigned> LiveInRegs;
89   std::set<unsigned> LiveOutRegs;
90
91   bool Scheduled = false;
92   bool HighLatencyBlock = false;
93
94   std::vector<unsigned> HasLowLatencyNonWaitedParent;
95
96   // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97   unsigned ID;
98
99   std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
100   // All blocks successors, and the kind of link
101   std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
102   unsigned NumHighLatencySuccessors = 0;
103
104 public:
105   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106                   unsigned ID):
107     DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
108
109   ~SIScheduleBlock() = default;
110
111   unsigned getID() const { return ID; }
112
113   /// Functions for Block construction.
114   void addUnit(SUnit *SU);
115
116   // When all SUs have been added.
117   void finalizeUnits();
118
119   // Add block pred, which has instruction predecessor of SU.
120   void addPred(SIScheduleBlock *Pred);
121   void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
122
123   const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
124   ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
125     getSuccs() const { return Succs; }
126
127   unsigned Height;  // Maximum topdown path length to block without outputs
128   unsigned Depth;   // Maximum bottomup path length to block without inputs
129
130   unsigned getNumHighLatencySuccessors() const {
131     return NumHighLatencySuccessors;
132   }
133
134   bool isHighLatencyBlock() { return HighLatencyBlock; }
135
136   // This is approximative.
137   // Ideally should take into accounts some instructions (rcp, etc)
138   // are 4 times slower.
139   int getCost() { return SUnits.size(); }
140
141   // The block Predecessors and Successors must be all registered
142   // before fastSchedule().
143   // Fast schedule with no particular requirement.
144   void fastSchedule();
145
146   std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147
148   // Complete schedule that will try to minimize reg pressure and
149   // low latencies, and will fill liveins and liveouts.
150   // Needs all MIs to be grouped between BeginBlock and EndBlock.
151   // The MIs can be moved after the scheduling,
152   // it is just used to allow correct track of live registers.
153   void schedule(MachineBasicBlock::iterator BeginBlock,
154                 MachineBasicBlock::iterator EndBlock);
155
156   bool isScheduled() { return Scheduled; }
157
158   // Needs the block to be scheduled inside
159   // TODO: find a way to compute it.
160   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161     return InternalAdditionnalPressure;
162   }
163
164   std::set<unsigned> &getInRegs() { return LiveInRegs; }
165   std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166
167   void printDebug(bool Full);
168
169 private:
170   struct SISchedCandidate : SISchedulerCandidate {
171     // The best SUnit candidate.
172     SUnit *SU = nullptr;
173
174     unsigned SGPRUsage;
175     unsigned VGPRUsage;
176     bool IsLowLatency;
177     unsigned LowLatencyOffset;
178     bool HasLowLatencyNonWaitedParent;
179
180     SISchedCandidate() = default;
181
182     bool isValid() const { return SU; }
183
184     // Copy the status of another candidate without changing policy.
185     void setBest(SISchedCandidate &Best) {
186       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187       SU = Best.SU;
188       Reason = Best.Reason;
189       SGPRUsage = Best.SGPRUsage;
190       VGPRUsage = Best.VGPRUsage;
191       IsLowLatency = Best.IsLowLatency;
192       LowLatencyOffset = Best.LowLatencyOffset;
193       HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194     }
195   };
196
197   void undoSchedule();
198
199   void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200   void releaseSucc(SUnit *SU, SDep *SuccEdge);
201   // InOrOutBlock: restrict to links pointing inside the block (true),
202   // or restrict to links pointing outside the block (false).
203   void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204
205   void nodeScheduled(SUnit *SU);
206   void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207   void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208   SUnit* pickNode();
209   void traceCandidate(const SISchedCandidate &Cand);
210   void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211                        MachineBasicBlock::iterator EndBlock);
212 };
213
214 struct SIScheduleBlocks {
215   std::vector<SIScheduleBlock*> Blocks;
216   std::vector<int> TopDownIndex2Block;
217   std::vector<int> TopDownBlock2Index;
218 };
219
220 enum SISchedulerBlockCreatorVariant {
221   LatenciesAlone,
222   LatenciesGrouped,
223   LatenciesAlonePlusConsecutive
224 };
225
226 class SIScheduleBlockCreator {
227   SIScheduleDAGMI *DAG;
228   // unique_ptr handles freeing memory for us.
229   std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230   std::map<SISchedulerBlockCreatorVariant,
231            SIScheduleBlocks> Blocks;
232   std::vector<SIScheduleBlock*> CurrentBlocks;
233   std::vector<int> Node2CurrentBlock;
234
235   // Topological sort
236   // Maps topological index to the node number.
237   std::vector<int> TopDownIndex2Block;
238   std::vector<int> TopDownBlock2Index;
239   std::vector<int> BottomUpIndex2Block;
240
241   // 0 -> Color not given.
242   // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243   // Above -> Other groups.
244   int NextReservedID;
245   int NextNonReservedID;
246   std::vector<int> CurrentColoring;
247   std::vector<int> CurrentTopDownReservedDependencyColoring;
248   std::vector<int> CurrentBottomUpReservedDependencyColoring;
249
250 public:
251   SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
252   ~SIScheduleBlockCreator();
253
254   SIScheduleBlocks
255   getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256
257   bool isSUInBlock(SUnit *SU, unsigned ID);
258
259 private:
260   // Give a Reserved color to every high latency.
261   void colorHighLatenciesAlone();
262
263   // Create groups of high latencies with a Reserved color.
264   void colorHighLatenciesGroups();
265
266   // Compute coloring for topdown and bottom traversals with
267   // different colors depending on dependencies on Reserved colors.
268   void colorComputeReservedDependencies();
269
270   // Give color to all non-colored SUs according to Reserved groups dependencies.
271   void colorAccordingToReservedDependencies();
272
273   // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274   // The new colors are computed according to the dependencies on the other blocks
275   // formed with colorAccordingToReservedDependencies.
276   void colorEndsAccordingToDependencies();
277
278   // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279   void colorForceConsecutiveOrderInGroup();
280
281   // Merge Constant loads that have all their users into another group to the group.
282   // (TODO: else if all their users depend on the same group, put them there)
283   void colorMergeConstantLoadsNextGroup();
284
285   // Merge SUs that have all their users into another group to the group
286   void colorMergeIfPossibleNextGroup();
287
288   // Merge SUs that have all their users into another group to the group,
289   // but only for Reserved groups.
290   void colorMergeIfPossibleNextGroupOnlyForReserved();
291
292   // Merge SUs that have all their users into another group to the group,
293   // but only if the group is no more than a few SUs.
294   void colorMergeIfPossibleSmallGroupsToNextGroup();
295
296   // Divides Blocks with important size.
297   // Idea of implementation: attribute new colors depending on topdown and
298   // bottom up links to other blocks.
299   void cutHugeBlocks();
300
301   // Put in one group all instructions with no users in this scheduling region
302   // (we'd want these groups be at the end).
303   void regroupNoUserInstructions();
304
305   void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
306
307   void topologicalSort();
308
309   void scheduleInsideBlocks();
310
311   void fillStats();
312 };
313
314 enum SISchedulerBlockSchedulerVariant {
315   BlockLatencyRegUsage,
316   BlockRegUsageLatency,
317   BlockRegUsage
318 };
319
320 class SIScheduleBlockScheduler {
321   SIScheduleDAGMI *DAG;
322   SISchedulerBlockSchedulerVariant Variant;
323   std::vector<SIScheduleBlock*> Blocks;
324
325   std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
326   std::set<unsigned> LiveRegs;
327   // Num of schedulable unscheduled blocks reading the register.
328   std::map<unsigned, unsigned> LiveRegsConsumers;
329
330   std::vector<unsigned> LastPosHighLatencyParentScheduled;
331   int LastPosWaitedHighLatency;
332
333   std::vector<SIScheduleBlock*> BlocksScheduled;
334   unsigned NumBlockScheduled;
335   std::vector<SIScheduleBlock*> ReadyBlocks;
336
337   unsigned VregCurrentUsage;
338   unsigned SregCurrentUsage;
339
340   // Currently is only approximation.
341   unsigned maxVregUsage;
342   unsigned maxSregUsage;
343
344   std::vector<unsigned> BlockNumPredsLeft;
345   std::vector<unsigned> BlockNumSuccsLeft;
346
347 public:
348   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
349                            SISchedulerBlockSchedulerVariant Variant,
350                            SIScheduleBlocks BlocksStruct);
351   ~SIScheduleBlockScheduler() = default;
352
353   std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
354
355   unsigned getVGPRUsage() { return maxVregUsage; }
356   unsigned getSGPRUsage() { return maxSregUsage; }
357
358 private:
359   struct SIBlockSchedCandidate : SISchedulerCandidate {
360     // The best Block candidate.
361     SIScheduleBlock *Block = nullptr;
362
363     bool IsHighLatency;
364     int VGPRUsageDiff;
365     unsigned NumSuccessors;
366     unsigned NumHighLatencySuccessors;
367     unsigned LastPosHighLatParentScheduled;
368     unsigned Height;
369
370     SIBlockSchedCandidate() = default;
371
372     bool isValid() const { return Block; }
373
374     // Copy the status of another candidate without changing policy.
375     void setBest(SIBlockSchedCandidate &Best) {
376       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
377       Block = Best.Block;
378       Reason = Best.Reason;
379       IsHighLatency = Best.IsHighLatency;
380       VGPRUsageDiff = Best.VGPRUsageDiff;
381       NumSuccessors = Best.NumSuccessors;
382       NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
383       LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
384       Height = Best.Height;
385     }
386   };
387
388   bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
389                            SIBlockSchedCandidate &TryCand);
390   bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
391                             SIBlockSchedCandidate &TryCand);
392   SIScheduleBlock *pickBlock();
393
394   void addLiveRegs(std::set<unsigned> &Regs);
395   void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
396   void releaseBlockSuccs(SIScheduleBlock *Parent);
397   void blockScheduled(SIScheduleBlock *Block);
398
399   // Check register pressure change
400   // by scheduling a block with these LiveIn and LiveOut.
401   std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
402                                        std::set<unsigned> &OutRegs);
403
404   void schedule();
405 };
406
407 struct SIScheduleBlockResult {
408   std::vector<unsigned> SUs;
409   unsigned MaxSGPRUsage;
410   unsigned MaxVGPRUsage;
411 };
412
413 class SIScheduler {
414   SIScheduleDAGMI *DAG;
415   SIScheduleBlockCreator BlockCreator;
416
417 public:
418   SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
419
420   ~SIScheduler() = default;
421
422   struct SIScheduleBlockResult
423   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
424                   SISchedulerBlockSchedulerVariant ScheduleVariant);
425 };
426
427 class SIScheduleDAGMI final : public ScheduleDAGMILive {
428   const SIInstrInfo *SITII;
429   const SIRegisterInfo *SITRI;
430
431   std::vector<SUnit> SUnitsLinksBackup;
432
433   // For moveLowLatencies. After all Scheduling variants are tested.
434   std::vector<unsigned> ScheduledSUnits;
435   std::vector<unsigned> ScheduledSUnitsInv;
436
437   unsigned VGPRSetID;
438   unsigned SGPRSetID;
439
440 public:
441   SIScheduleDAGMI(MachineSchedContext *C);
442
443   ~SIScheduleDAGMI() override;
444
445   // Entry point for the schedule.
446   void schedule() override;
447
448   // To init Block's RPTracker.
449   void initRPTracker(RegPressureTracker &RPTracker) {
450     RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
451   }
452
453   MachineBasicBlock *getBB() { return BB; }
454   MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
455   MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
456   LiveIntervals *getLIS() { return LIS; }
457   MachineRegisterInfo *getMRI() { return &MRI; }
458   const TargetRegisterInfo *getTRI() { return TRI; }
459   ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
460   SUnit& getEntrySU() { return EntrySU; }
461   SUnit& getExitSU() { return ExitSU; }
462
463   void restoreSULinksLeft();
464
465   template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
466                                                      _Iterator End,
467                                                      unsigned &VgprUsage,
468                                                      unsigned &SgprUsage);
469
470   std::set<unsigned> getInRegs() {
471     std::set<unsigned> InRegs;
472     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
473       InRegs.insert(RegMaskPair.RegUnit);
474     }
475     return InRegs;
476   }
477
478   std::set<unsigned> getOutRegs() {
479     std::set<unsigned> OutRegs;
480     for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
481       OutRegs.insert(RegMaskPair.RegUnit);
482     }
483     return OutRegs;
484   };
485
486   unsigned getVGPRSetID() const { return VGPRSetID; }
487   unsigned getSGPRSetID() const { return SGPRSetID; }
488
489 private:
490   void topologicalSort();
491   // After scheduling is done, improve low latency placements.
492   void moveLowLatencies();
493
494 public:
495   // Some stats for scheduling inside blocks.
496   std::vector<unsigned> IsLowLatencySU;
497   std::vector<unsigned> LowLatencyOffset;
498   std::vector<unsigned> IsHighLatencySU;
499   // Topological sort
500   // Maps topological index to the node number.
501   std::vector<int> TopDownIndex2SU;
502   std::vector<int> BottomUpIndex2SU;
503 };
504
505 } // end namespace llvm
506
507 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H