1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a hazard recognizer for the SystemZ scheduler.
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
29 // ===---------------------------------------------------------------------===//
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
36 #define DEBUG_TYPE "machine-scheduler"
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42 cl::desc("The OOO window for processor "
43 "resources during scheduling."),
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48 const MCSchedClassDesc *SC = getSchedClass(SU);
50 return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
54 return 2; // Cracked instruction
56 return 3; // Expanded/group-alone instruction
59 return 1; // Normal instruction
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63 unsigned Idx = CurrGroupSize;
67 if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68 if (Idx == 1 || Idx == 2)
70 else if (Idx == 4 || Idx == 5)
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
78 getHazardType(SUnit *m, int Stalls) {
79 return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
82 void SystemZHazardRecognizer::Reset() {
84 CurrGroupHas4RegOps = false;
85 clearProcResCounters();
87 LastFPdOpCycleIdx = UINT_MAX;
88 LastEmittedMI = nullptr;
89 LLVM_DEBUG(CurGroupDbg = "";);
93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94 const MCSchedClassDesc *SC = getSchedClass(SU);
98 // A cracked instruction only fits into schedule if the current
101 return (CurrGroupSize == 0);
103 // An instruction with 4 register operands will not fit in last slot.
104 assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105 "Current decoder group is already full!");
106 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
109 // Since a full group is handled immediately in EmitInstruction(),
110 // SU should fit into current group. NumSlots should be 1 or 0,
111 // since it is not a cracked or expanded instruction.
112 assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113 "Expected normal instruction to fit in non-full group!");
118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119 const MachineFunction &MF = *MI->getParent()->getParent();
120 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121 const MCInstrDesc &MID = MI->getDesc();
123 for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124 const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
127 if (OpIdx >= MID.getNumDefs() &&
128 MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
135 void SystemZHazardRecognizer::nextGroup() {
136 if (CurrGroupSize == 0)
139 LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140 LLVM_DEBUG(CurGroupDbg = "";);
144 // Reset counter for next group.
146 CurrGroupHas4RegOps = false;
148 // Decrease counters for execution units by one.
149 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
150 if (ProcResourceCounters[i] > 0)
151 ProcResourceCounters[i]--;
153 // Clear CriticalResourceIdx if it is now below the threshold.
154 if (CriticalResourceIdx != UINT_MAX &&
155 (ProcResourceCounters[CriticalResourceIdx] <=
157 CriticalResourceIdx = UINT_MAX;
159 LLVM_DEBUG(dumpState(););
162 #ifndef NDEBUG // Debug output
163 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
164 OS << "SU(" << SU->NodeNum << "):";
165 OS << TII->getName(SU->getInstr()->getOpcode());
167 const MCSchedClassDesc *SC = getSchedClass(SU);
171 for (TargetSchedModel::ProcResIter
172 PI = SchedModel->getWriteProcResBegin(SC),
173 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
174 const MCProcResourceDesc &PRD =
175 *SchedModel->getProcResource(PI->ProcResourceIdx);
176 std::string FU(PRD.Name);
177 // trim e.g. Z13_FXaUnit -> FXa
178 FU = FU.substr(FU.find("_") + 1);
179 size_t Pos = FU.find("Unit");
180 if (Pos != std::string::npos)
182 if (FU == "LS") // LSUnit -> LSU
187 OS << "(" << PI->Cycles << "cyc)";
190 if (SC->NumMicroOps > 1)
191 OS << "/" << SC->NumMicroOps << "uops";
192 if (SC->BeginGroup && SC->EndGroup)
193 OS << "/GroupsAlone";
194 else if (SC->BeginGroup)
195 OS << "/BeginsGroup";
196 else if (SC->EndGroup)
198 if (SU->isUnbuffered)
200 if (has4RegOps(SU->getInstr()))
204 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
205 dbgs() << "++ " << Msg;
208 if (CurGroupDbg.empty())
209 dbgs() << " <empty>\n";
211 dbgs() << "{ " << CurGroupDbg << " }";
212 dbgs() << " (" << CurrGroupSize << " decoder slot"
213 << (CurrGroupSize > 1 ? "s":"")
214 << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
219 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
222 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
223 if (ProcResourceCounters[i] > 0) {
231 dbgs() << "++ | Resource counters: ";
232 for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
233 if (ProcResourceCounters[i] > 0)
234 dbgs() << SchedModel->getProcResource(i)->Name
235 << ":" << ProcResourceCounters[i] << " ";
238 if (CriticalResourceIdx != UINT_MAX)
239 dbgs() << "++ | Critical resource: "
240 << SchedModel->getProcResource(CriticalResourceIdx)->Name
244 void SystemZHazardRecognizer::dumpState() const {
245 dumpCurrGroup("| Current decoder group");
246 dbgs() << "++ | Current cycle index: "
247 << getCurrCycleIdx() << "\n";
248 dumpProcResourceCounters();
249 if (LastFPdOpCycleIdx != UINT_MAX)
250 dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
255 void SystemZHazardRecognizer::clearProcResCounters() {
256 ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
257 CriticalResourceIdx = UINT_MAX;
260 static inline bool isBranchRetTrap(MachineInstr *MI) {
261 return (MI->isBranch() || MI->isReturn() ||
262 MI->getOpcode() == SystemZ::CondTrap);
265 // Update state with SU as the next scheduled unit.
266 void SystemZHazardRecognizer::
267 EmitInstruction(SUnit *SU) {
268 const MCSchedClassDesc *SC = getSchedClass(SU);
269 LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
271 LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
273 // If scheduling an SU that must begin a new decoder group, move on
275 if (!fitsIntoCurrentGroup(SU))
278 LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
279 if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
281 LastEmittedMI = SU->getInstr();
283 // After returning from a call, we don't know much about the state.
285 LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
287 LastEmittedMI = SU->getInstr();
291 // Increase counter for execution unit(s).
292 for (TargetSchedModel::ProcResIter
293 PI = SchedModel->getWriteProcResBegin(SC),
294 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
295 // Don't handle FPd together with the other resources.
296 if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
299 ProcResourceCounters[PI->ProcResourceIdx];
300 CurrCounter += PI->Cycles;
301 // Check if this is now the new critical resource.
302 if ((CurrCounter > ProcResCostLim) &&
303 (CriticalResourceIdx == UINT_MAX ||
304 (PI->ProcResourceIdx != CriticalResourceIdx &&
306 ProcResourceCounters[CriticalResourceIdx]))) {
308 dbgs() << "++ New critical resource: "
309 << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
311 CriticalResourceIdx = PI->ProcResourceIdx;
315 // Make note of an instruction that uses a blocking resource (FPd).
316 if (SU->isUnbuffered) {
317 LastFPdOpCycleIdx = getCurrCycleIdx(SU);
318 LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
322 // Insert SU into current group by increasing number of slots used
324 CurrGroupSize += getNumDecoderSlots(SU);
325 CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
327 ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
328 assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
330 // Check if current group is now full/ended. If so, move on to next
331 // group to be ready to evaluate more candidates.
332 if (CurrGroupSize == GroupLim || SC->EndGroup)
336 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
337 const MCSchedClassDesc *SC = getSchedClass(SU);
341 // If SU begins new group, it can either break a current group early
342 // or fit naturally if current group is empty (negative cost).
343 if (SC->BeginGroup) {
345 return 3 - CurrGroupSize;
349 // Similarly, a group-ending SU may either fit well (last in group), or
350 // end the group prematurely.
352 unsigned resultingGroupSize =
353 (CurrGroupSize + getNumDecoderSlots(SU));
354 if (resultingGroupSize < 3)
355 return (3 - resultingGroupSize);
359 // An instruction with 4 register operands will not fit in last slot.
360 if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
363 // Most instructions can be placed in any decoder slot.
367 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
368 assert (SU->isUnbuffered);
369 // If this is the first FPd op, it should be scheduled high.
370 if (LastFPdOpCycleIdx == UINT_MAX)
372 // If this is not the first PFd op, it should go into the other side
373 // of the processor to use the other FPd unit there. This should
374 // generally happen if two FPd ops are placed with 2 other
375 // instructions between them (modulo 6).
376 unsigned SUCycleIdx = getCurrCycleIdx(SU);
377 if (LastFPdOpCycleIdx > SUCycleIdx)
378 return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
379 return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
382 int SystemZHazardRecognizer::
383 resourcesCost(SUnit *SU) {
386 const MCSchedClassDesc *SC = getSchedClass(SU);
390 // For a FPd op, either return min or max value as indicated by the
391 // distance to any prior FPd op.
392 if (SU->isUnbuffered)
393 Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
394 // For other instructions, give a cost to the use of the critical resource.
395 else if (CriticalResourceIdx != UINT_MAX) {
396 for (TargetSchedModel::ProcResIter
397 PI = SchedModel->getWriteProcResBegin(SC),
398 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
399 if (PI->ProcResourceIdx == CriticalResourceIdx)
406 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
408 // Make a temporary SUnit.
411 // Set interesting flags.
412 SU.isCall = MI->isCall();
414 const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
415 for (const MCWriteProcResEntry &PRE :
416 make_range(SchedModel->getWriteProcResBegin(SC),
417 SchedModel->getWriteProcResEnd(SC))) {
418 switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
420 SU.hasReservedResource = true;
423 SU.isUnbuffered = true;
430 unsigned GroupSizeBeforeEmit = CurrGroupSize;
431 EmitInstruction(&SU);
433 if (!TakenBranch && isBranchRetTrap(MI)) {
434 // NT Branch on second slot ends group.
435 if (GroupSizeBeforeEmit == 1)
439 if (TakenBranch && CurrGroupSize > 0)
442 assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
443 "Scheduler: unhandled terminator!");
446 void SystemZHazardRecognizer::
447 copyState(SystemZHazardRecognizer *Incoming) {
448 // Current decoder group
449 CurrGroupSize = Incoming->CurrGroupSize;
450 LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
452 // Processor resources
453 ProcResourceCounters = Incoming->ProcResourceCounters;
454 CriticalResourceIdx = Incoming->CriticalResourceIdx;
457 LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
458 GrpCount = Incoming->GrpCount;