1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements hazard recognizers for scheduling on GCN processors.
12 //===----------------------------------------------------------------------===//
14 #include "GCNHazardRecognizer.h"
15 #include "AMDGPUSubtarget.h"
16 #include "SIInstrInfo.h"
17 #include "llvm/CodeGen/ScheduleDAG.h"
18 #include "llvm/Support/Debug.h"
22 //===----------------------------------------------------------------------===//
23 // Hazard Recoginizer Implementation
24 //===----------------------------------------------------------------------===//
26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
27 CurrCycleInstr(nullptr),
29 ST(MF.getSubtarget<SISubtarget>()) {
33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
34 EmitInstruction(SU->getInstr());
37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
41 static bool isDivFMas(unsigned Opcode) {
42 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
45 static bool isSGetReg(unsigned Opcode) {
46 return Opcode == AMDGPU::S_GETREG_B32;
49 static bool isSSetReg(unsigned Opcode) {
50 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
53 static bool isRWLane(unsigned Opcode) {
54 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
57 static bool isRFE(unsigned Opcode) {
58 return Opcode == AMDGPU::S_RFE_B64;
61 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
63 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
64 AMDGPU::OpName::simm16);
65 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
68 ScheduleHazardRecognizer::HazardType
69 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
70 MachineInstr *MI = SU->getInstr();
72 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
75 if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
78 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
81 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
84 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
87 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
90 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
93 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
96 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
102 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
103 return PreEmitNoops(SU->getInstr());
106 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
107 if (SIInstrInfo::isSMRD(*MI))
108 return std::max(0, checkSMRDHazards(MI));
110 if (SIInstrInfo::isVALU(*MI)) {
111 int WaitStates = std::max(0, checkVALUHazards(MI));
113 if (SIInstrInfo::isVMEM(*MI))
114 WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
116 if (SIInstrInfo::isDPP(*MI))
117 WaitStates = std::max(WaitStates, checkDPPHazards(MI));
119 if (isDivFMas(MI->getOpcode()))
120 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
122 if (isRWLane(MI->getOpcode()))
123 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
128 if (isSGetReg(MI->getOpcode()))
129 return std::max(0, checkGetRegHazards(MI));
131 if (isSSetReg(MI->getOpcode()))
132 return std::max(0, checkSetRegHazards(MI));
134 if (isRFE(MI->getOpcode()))
135 return std::max(0, checkRFEHazards(MI));
140 void GCNHazardRecognizer::EmitNoop() {
141 EmittedInstrs.push_front(nullptr);
144 void GCNHazardRecognizer::AdvanceCycle() {
146 // When the scheduler detects a stall, it will call AdvanceCycle() without
147 // emitting any instructions.
151 const SIInstrInfo *TII = ST.getInstrInfo();
152 unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
154 // Keep track of emitted instructions
155 EmittedInstrs.push_front(CurrCycleInstr);
157 // Add a nullptr for each additional wait state after the first. Make sure
158 // not to add more than getMaxLookAhead() items to the list, since we
159 // truncate the list to that size right after this loop.
160 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
162 EmittedInstrs.push_front(nullptr);
165 // getMaxLookahead() is the largest number of wait states we will ever need
166 // to insert, so there is no point in keeping track of more than that many
168 EmittedInstrs.resize(getMaxLookAhead());
170 CurrCycleInstr = nullptr;
173 void GCNHazardRecognizer::RecedeCycle() {
174 llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
177 //===----------------------------------------------------------------------===//
179 //===----------------------------------------------------------------------===//
181 int GCNHazardRecognizer::getWaitStatesSince(
182 function_ref<bool(MachineInstr *)> IsHazard) {
185 for (MachineInstr *MI : EmittedInstrs) {
187 if (!MI || !IsHazard(MI))
191 return std::numeric_limits<int>::max();
194 int GCNHazardRecognizer::getWaitStatesSinceDef(
195 unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
196 const SIRegisterInfo *TRI = ST.getRegisterInfo();
198 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
199 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
202 return getWaitStatesSince(IsHazardFn);
205 int GCNHazardRecognizer::getWaitStatesSinceSetReg(
206 function_ref<bool(MachineInstr *)> IsHazard) {
208 auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
209 return isSSetReg(MI->getOpcode()) && IsHazard(MI);
212 return getWaitStatesSince(IsHazardFn);
215 //===----------------------------------------------------------------------===//
216 // No-op Hazard Detection
217 //===----------------------------------------------------------------------===//
219 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
220 std::set<unsigned> &Set) {
221 for (const MachineOperand &Op : Ops) {
223 Set.insert(Op.getReg());
227 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
228 // SMEM soft clause are only present on VI+
229 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
232 // A soft-clause is any group of consecutive SMEM instructions. The
233 // instructions in this group may return out of order and/or may be
234 // replayed (i.e. the same instruction issued more than once).
236 // In order to handle these situations correctly we need to make sure
237 // that when a clause has more than one instruction, no instruction in the
238 // clause writes to a register that is read another instruction in the clause
239 // (including itself). If we encounter this situaion, we need to break the
240 // clause by inserting a non SMEM instruction.
242 std::set<unsigned> ClauseDefs;
243 std::set<unsigned> ClauseUses;
245 for (MachineInstr *MI : EmittedInstrs) {
247 // When we hit a non-SMEM instruction then we have passed the start of the
248 // clause and we can stop.
249 if (!MI || !SIInstrInfo::isSMRD(*MI))
252 addRegsToSet(MI->defs(), ClauseDefs);
253 addRegsToSet(MI->uses(), ClauseUses);
256 if (ClauseDefs.empty())
259 // FIXME: When we support stores, we need to make sure not to put loads and
260 // stores in the same clause if they use the same address. For now, just
261 // start a new clause whenever we see a store.
262 if (SMEM->mayStore())
265 addRegsToSet(SMEM->defs(), ClauseDefs);
266 addRegsToSet(SMEM->uses(), ClauseUses);
268 std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size()));
269 std::vector<unsigned>::iterator End;
271 End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(),
272 ClauseUses.begin(), ClauseUses.end(), Result.begin());
274 // If the set of defs and uses intersect then we cannot add this instruction
275 // to the clause, so we have a hazard.
276 if (End != Result.begin())
282 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
283 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
284 const SIInstrInfo *TII = ST.getInstrInfo();
285 int WaitStatesNeeded = 0;
287 WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
289 // This SMRD hazard only affects SI.
290 if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
291 return WaitStatesNeeded;
293 // A read of an SGPR by SMRD instruction requires 4 wait states when the
294 // SGPR was written by a VALU instruction.
295 int SmrdSgprWaitStates = 4;
296 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
298 for (const MachineOperand &Use : SMRD->uses()) {
301 int WaitStatesNeededForUse =
302 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
303 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
305 return WaitStatesNeeded;
308 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
309 const SIInstrInfo *TII = ST.getInstrInfo();
311 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
314 const SIRegisterInfo &TRI = TII->getRegisterInfo();
316 // A read of an SGPR by a VMEM instruction requires 5 wait states when the
317 // SGPR was written by a VALU Instruction.
318 int VmemSgprWaitStates = 5;
319 int WaitStatesNeeded = 0;
320 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
322 for (const MachineOperand &Use : VMEM->uses()) {
323 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
326 int WaitStatesNeededForUse =
327 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
328 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
330 return WaitStatesNeeded;
333 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
334 const SIRegisterInfo *TRI = ST.getRegisterInfo();
336 // Check for DPP VGPR read after VALU VGPR write.
337 int DppVgprWaitStates = 2;
338 int WaitStatesNeeded = 0;
340 for (const MachineOperand &Use : DPP->uses()) {
341 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
343 int WaitStatesNeededForUse =
344 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
345 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
348 return WaitStatesNeeded;
351 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
352 const SIInstrInfo *TII = ST.getInstrInfo();
354 // v_div_fmas requires 4 wait states after a write to vcc from a VALU
356 const int DivFMasWaitStates = 4;
357 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
358 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
360 return DivFMasWaitStates - WaitStatesNeeded;
363 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
364 const SIInstrInfo *TII = ST.getInstrInfo();
365 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
367 const int GetRegWaitStates = 2;
368 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
369 return GetRegHWReg == getHWReg(TII, *MI);
371 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
373 return GetRegWaitStates - WaitStatesNeeded;
376 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
377 const SIInstrInfo *TII = ST.getInstrInfo();
378 unsigned HWReg = getHWReg(TII, *SetRegInstr);
380 const int SetRegWaitStates =
381 ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
382 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
383 return HWReg == getHWReg(TII, *MI);
385 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
386 return SetRegWaitStates - WaitStatesNeeded;
389 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
393 const SIInstrInfo *TII = ST.getInstrInfo();
394 unsigned Opcode = MI.getOpcode();
395 const MCInstrDesc &Desc = MI.getDesc();
397 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
400 VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
402 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
403 // There is no hazard if the instruction does not use vector regs
407 // For MUBUF/MTBUF instructions this hazard only exists if the
408 // instruction is not using a register in the soffset field.
409 const MachineOperand *SOffset =
410 TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
411 // If we have no soffset operand, then assume this field has been
412 // hardcoded to zero.
413 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
414 (!SOffset || !SOffset->isReg()))
418 // MIMG instructions create a hazard if they don't use a 256-bit T# and
419 // the store size is greater than 8 bytes and they have more than two bits
420 // of their dmask set.
421 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
422 if (TII->isMIMG(MI)) {
423 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
424 assert(SRsrcIdx != -1 &&
425 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
429 if (TII->isFLAT(MI)) {
430 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
431 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
438 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
439 // This checks for the hazard where VMEM instructions that store more than
440 // 8 bytes can have there store data over written by the next instruction.
441 if (!ST.has12DWordStoreHazard())
444 const SIRegisterInfo *TRI = ST.getRegisterInfo();
445 const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo();
447 const int VALUWaitStates = 1;
448 int WaitStatesNeeded = 0;
450 for (const MachineOperand &Def : VALU->defs()) {
451 if (!TRI->isVGPR(MRI, Def.getReg()))
453 unsigned Reg = Def.getReg();
454 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
455 int DataIdx = createsVALUHazard(*MI);
456 return DataIdx >= 0 &&
457 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
459 int WaitStatesNeededForDef =
460 VALUWaitStates - getWaitStatesSince(IsHazardFn);
461 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
463 return WaitStatesNeeded;
466 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
467 const SIInstrInfo *TII = ST.getInstrInfo();
468 const SIRegisterInfo *TRI = ST.getRegisterInfo();
469 const MachineRegisterInfo &MRI =
470 RWLane->getParent()->getParent()->getRegInfo();
472 const MachineOperand *LaneSelectOp =
473 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
475 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
478 unsigned LaneSelectReg = LaneSelectOp->getReg();
479 auto IsHazardFn = [TII] (MachineInstr *MI) {
480 return TII->isVALU(*MI);
483 const int RWLaneWaitStates = 4;
484 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
485 return RWLaneWaitStates - WaitStatesSince;
488 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
490 if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
493 const SIInstrInfo *TII = ST.getInstrInfo();
495 const int RFEWaitStates = 1;
497 auto IsHazardFn = [TII] (MachineInstr *MI) {
498 return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
500 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
501 return RFEWaitStates - WaitStatesNeeded;