1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This contains a MachineSchedStrategy implementation for maximizing wave
12 /// occupancy on GCN hardware.
13 //===----------------------------------------------------------------------===//
15 #include "GCNSchedStrategy.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/RegisterClassInfo.h"
22 #define DEBUG_TYPE "misched"
26 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27 const MachineSchedContext *C) :
28 GenericScheduler(C) { }
30 static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
31 const MachineFunction &MF) {
33 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
34 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
35 unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
36 ST.getOccupancyWithNumVGPRs(VGPRs));
37 return std::min(MinRegOccupancy,
38 ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
41 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
42 bool AtTop, const RegPressureTracker &RPTracker,
43 const SIRegisterInfo *SRI,
48 int SGPRCriticalLimit,
49 int VGPRCriticalLimit) {
54 // getDownwardPressure() and getUpwardPressure() make temporary changes to
55 // the the tracker, so we need to pass those function a non-const copy.
56 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
58 std::vector<unsigned> Pressure;
59 std::vector<unsigned> MaxPressure;
62 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
64 // FIXME: I think for bottom up scheduling, the register pressure is cached
65 // and can be retrieved by DAG->getPressureDif(SU).
66 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
69 int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
70 int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
72 // If two instructions increase the pressure of different register sets
73 // by the same amount, the generic scheduler will prefer to schedule the
74 // instruction that increases the set with the least amount of registers,
75 // which in our case would be SGPRs. This is rarely what we want, so
76 // when we report excess/critical register pressure, we do it either
77 // only for VGPRs or only for SGPRs.
79 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
80 const int MaxVGPRPressureInc = 16;
81 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
82 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
85 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
86 // to increase the likelihood we don't go over the limits. We should improve
87 // the analysis to look through dependencies to find the path with the least
89 // FIXME: This is also necessary, because some passes that run after
90 // scheduling and before regalloc increase register pressure.
91 const int ErrorMargin = 3;
92 VGPRExcessLimit -= ErrorMargin;
93 SGPRExcessLimit -= ErrorMargin;
95 // We only need to update the RPDelata for instructions that increase
96 // register pressure. Instructions that decrease or keep reg pressure
97 // the same will be marked as RegExcess in tryCandidate() when they
98 // are compared with instructions that increase the register pressure.
99 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
100 Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
101 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
104 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
105 Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
106 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
109 // Register pressure is considered 'CRITICAL' if it is approaching a value
110 // that would reduce the wave occupancy for the execution unit. When
111 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
112 // has the same cost, so we don't need to prefer one over the other.
114 VGPRCriticalLimit -= ErrorMargin;
115 SGPRCriticalLimit -= ErrorMargin;
117 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
118 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
120 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
121 if (SGPRDelta > VGPRDelta) {
122 Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
123 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
125 Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
126 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
131 // This function is mostly cut and pasted from
132 // GenericScheduler::pickNodeFromQueue()
133 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
134 const CandPolicy &ZonePolicy,
135 const RegPressureTracker &RPTracker,
136 SchedCandidate &Cand) {
137 const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
138 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
139 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
140 unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
141 unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
142 unsigned SGPRExcessLimit =
143 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
144 unsigned VGPRExcessLimit =
145 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
146 unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
147 unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
148 unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
150 ReadyQueue &Q = Zone.Available;
151 for (SUnit *SU : Q) {
153 SchedCandidate TryCand(ZonePolicy);
154 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
155 SGPRPressure, VGPRPressure,
156 SGPRExcessLimit, VGPRExcessLimit,
157 SGPRCriticalLimit, VGPRCriticalLimit);
158 // Pass SchedBoundary only when comparing nodes from the same boundary.
159 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
160 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
161 if (TryCand.Reason != NoCand) {
162 // Initialize resource delta if needed in case future heuristics query it.
163 if (TryCand.ResDelta == SchedResourceDelta())
164 TryCand.initResourceDelta(Zone.DAG, SchedModel);
165 Cand.setBest(TryCand);
170 static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
174 case GenericSchedulerBase::RegCritical:
175 case GenericSchedulerBase::RegExcess:
180 // This function is mostly cut and pasted from
181 // GenericScheduler::pickNodeBidirectional()
182 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
183 // Schedule as far as possible in the direction of no choice. This is most
184 // efficient, but also provides the best heuristics for CriticalPSets.
185 if (SUnit *SU = Bot.pickOnlyChoice()) {
189 if (SUnit *SU = Top.pickOnlyChoice()) {
193 // Set the bottom-up policy based on the state of the current bottom zone and
194 // the instructions outside the zone, including the top zone.
195 CandPolicy BotPolicy;
196 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
197 // Set the top-down policy based on the state of the current top zone and
198 // the instructions outside the zone, including the bottom zone.
199 CandPolicy TopPolicy;
200 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
202 // See if BotCand is still valid (because we previously scheduled from Top).
203 DEBUG(dbgs() << "Picking from Bot:\n");
204 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
205 BotCand.Policy != BotPolicy) {
206 BotCand.reset(CandPolicy());
207 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
208 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
210 DEBUG(traceCandidate(BotCand));
213 // Check if the top Q has a better candidate.
214 DEBUG(dbgs() << "Picking from Top:\n");
215 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
216 TopCand.Policy != TopPolicy) {
217 TopCand.reset(CandPolicy());
218 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
219 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
221 DEBUG(traceCandidate(TopCand));
224 // Pick best from BotCand and TopCand.
226 dbgs() << "Top Cand: ";
227 traceCandidate(BotCand);
228 dbgs() << "Bot Cand: ";
229 traceCandidate(TopCand);
232 if (TopCand.Reason == BotCand.Reason) {
234 GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
235 TopCand.Reason = NoCand;
236 GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
237 if (TopCand.Reason != NoCand) {
238 Cand.setBest(TopCand);
240 TopCand.Reason = TopReason;
243 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
245 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
247 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
249 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
252 int TopRank = getBidirectionalReasonRank(TopCand.Reason);
253 int BotRank = getBidirectionalReasonRank(BotCand.Reason);
254 if (TopRank > BotRank) {
262 dbgs() << "Picking: ";
263 traceCandidate(Cand);
266 IsTopNode = Cand.AtTop;
270 // This function is mostly cut and pasted from
271 // GenericScheduler::pickNode()
272 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
273 if (DAG->top() == DAG->bottom()) {
274 assert(Top.Available.empty() && Top.Pending.empty() &&
275 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
280 if (RegionPolicy.OnlyTopDown) {
281 SU = Top.pickOnlyChoice();
284 TopCand.reset(NoPolicy);
285 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
286 assert(TopCand.Reason != NoCand && "failed to find a candidate");
290 } else if (RegionPolicy.OnlyBottomUp) {
291 SU = Bot.pickOnlyChoice();
294 BotCand.reset(NoPolicy);
295 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
296 assert(BotCand.Reason != NoCand && "failed to find a candidate");
301 SU = pickNodeBidirectional(IsTopNode);
303 } while (SU->isScheduled);
305 if (SU->isTopReady())
307 if (SU->isBottomReady())
310 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());