contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp

   1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a hazard recognizer for the SystemZ scheduler.
  11 //
  12 // This class is used by the SystemZ scheduling strategy to maintain
  13 // the state during scheduling, and provide cost functions for
  14 // scheduling candidates. This includes:
  15 //
  16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
  17 // instructions that always begin a new group should be scheduled when
  18 // the current decoder group is empty.
  19 // * Processor resources usage. It is beneficial to balance the use of
  20 // resources.
  21 //
  22 // ===---------------------------------------------------------------------===//
  23
  24 #include "SystemZHazardRecognizer.h"
  25 #include "llvm/ADT/Statistic.h"
  26
  27 using namespace llvm;
  28
  29 #define DEBUG_TYPE "misched"
  30
  31 // This is the limit of processor resource usage at which the
  32 // scheduler should try to look for other instructions (not using the
  33 // critical resource).
  34 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
  35                                    cl::desc("The OOO window for processor "
  36                                             "resources during scheduling."),
  37                                    cl::init(8));
  38
  39 SystemZHazardRecognizer::
  40 SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
  41                                                         SchedModel(nullptr) {}
  42
  43 unsigned SystemZHazardRecognizer::
  44 getNumDecoderSlots(SUnit *SU) const {
  45   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  46   if (!SC->isValid())
  47     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
  48
  49   if (SC->BeginGroup) {
  50     if (!SC->EndGroup)
  51       return 2; // Cracked instruction
  52     else
  53       return 3; // Expanded/group-alone instruction
  54   }
  55
  56   return 1; // Normal instruction
  57 }
  58
  59 unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
  60   unsigned Idx = CurrGroupSize;
  61   if (GrpCount % 2)
  62     Idx += 3;
  63   return Idx;
  64 }
  65
  66 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
  67 getHazardType(SUnit *m, int Stalls) {
  68   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
  69 }
  70
  71 void SystemZHazardRecognizer::Reset() {
  72   CurrGroupSize = 0;
  73   clearProcResCounters();
  74   GrpCount = 0;
  75   LastFPdOpCycleIdx = UINT_MAX;
  76   DEBUG(CurGroupDbg = "";);
  77 }
  78
  79 bool
  80 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
  81   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  82   if (!SC->isValid())
  83     return true;
  84
  85   // A cracked instruction only fits into schedule if the current
  86   // group is empty.
  87   if (SC->BeginGroup)
  88     return (CurrGroupSize == 0);
  89
  90   // Since a full group is handled immediately in EmitInstruction(),
  91   // SU should fit into current group. NumSlots should be 1 or 0,
  92   // since it is not a cracked or expanded instruction.
  93   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
  94           "Expected normal instruction to fit in non-full group!");
  95
  96   return true;
  97 }
  98
  99 void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
 100   if (CurrGroupSize > 0) {
 101     DEBUG(dumpCurrGroup("Completed decode group"));
 102     DEBUG(CurGroupDbg = "";);
 103
 104     GrpCount++;
 105
 106     // Reset counter for next group.
 107     CurrGroupSize = 0;
 108
 109     // Decrease counters for execution units by one.
 110     for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
 111       if (ProcResourceCounters[i] > 0)
 112         ProcResourceCounters[i]--;
 113
 114     // Clear CriticalResourceIdx if it is now below the threshold.
 115     if (CriticalResourceIdx != UINT_MAX &&
 116         (ProcResourceCounters[CriticalResourceIdx] <=
 117          ProcResCostLim))
 118       CriticalResourceIdx = UINT_MAX;
 119   }
 120
 121   DEBUG(if (DbgOutput)
 122           dumpProcResourceCounters(););
 123 }
 124
 125 #ifndef NDEBUG // Debug output
 126 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
 127   OS << "SU(" << SU->NodeNum << "):";
 128   OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
 129
 130   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
 131   if (!SC->isValid())
 132     return;
 133
 134   for (TargetSchedModel::ProcResIter
 135          PI = SchedModel->getWriteProcResBegin(SC),
 136          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
 137     const MCProcResourceDesc &PRD =
 138       *SchedModel->getProcResource(PI->ProcResourceIdx);
 139     std::string FU(PRD.Name);
 140     // trim e.g. Z13_FXaUnit -> FXa
 141     FU = FU.substr(FU.find("_") + 1);
 142     FU.resize(FU.find("Unit"));
 143     OS << "/" << FU;
 144
 145     if (PI->Cycles > 1)
 146       OS << "(" << PI->Cycles << "cyc)";
 147   }
 148
 149   if (SC->NumMicroOps > 1)
 150     OS << "/" << SC->NumMicroOps << "uops";
 151   if (SC->BeginGroup && SC->EndGroup)
 152     OS << "/GroupsAlone";
 153   else if (SC->BeginGroup)
 154     OS << "/BeginsGroup";
 155   else if (SC->EndGroup)
 156     OS << "/EndsGroup";
 157   if (SU->isUnbuffered)
 158     OS << "/Unbuffered";
 159 }
 160
 161 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
 162   dbgs() << "+++ " << Msg;
 163   dbgs() << ": ";
 164
 165   if (CurGroupDbg.empty())
 166     dbgs() << " <empty>\n";
 167   else {
 168     dbgs() << "{ " << CurGroupDbg << " }";
 169     dbgs() << " (" << CurrGroupSize << " decoder slot"
 170            << (CurrGroupSize > 1 ? "s":"")
 171            << ")\n";
 172   }
 173 }
 174
 175 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
 176   bool any = false;
 177
 178   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
 179     if (ProcResourceCounters[i] > 0) {
 180       any = true;
 181       break;
 182     }
 183
 184   if (!any)
 185     return;
 186
 187   dbgs() << "+++ Resource counters:\n";
 188   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
 189     if (ProcResourceCounters[i] > 0) {
 190       dbgs() << "+++ Extra schedule for execution unit "
 191              << SchedModel->getProcResource(i)->Name
 192              << ": " << ProcResourceCounters[i] << "\n";
 193       any = true;
 194     }
 195 }
 196 #endif //NDEBUG
 197
 198 void SystemZHazardRecognizer::clearProcResCounters() {
 199   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
 200   CriticalResourceIdx = UINT_MAX;
 201 }
 202
 203 // Update state with SU as the next scheduled unit.
 204 void SystemZHazardRecognizer::
 205 EmitInstruction(SUnit *SU) {
 206   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
 207   DEBUG( dumpCurrGroup("Decode group before emission"););
 208
 209   // If scheduling an SU that must begin a new decoder group, move on
 210   // to next group.
 211   if (!fitsIntoCurrentGroup(SU))
 212     nextGroup();
 213
 214   DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
 215          dbgs() << "\n";
 216          raw_string_ostream cgd(CurGroupDbg);
 217          if (CurGroupDbg.length())
 218            cgd << ", ";
 219          dumpSU(SU, cgd););
 220
 221   // After returning from a call, we don't know much about the state.
 222   if (SU->getInstr()->isCall()) {
 223     DEBUG (dbgs() << "+++ Clearing state after call.\n";);
 224     clearProcResCounters();
 225     LastFPdOpCycleIdx = UINT_MAX;
 226     CurrGroupSize += getNumDecoderSlots(SU);
 227     assert (CurrGroupSize <= 3);
 228     nextGroup();
 229     return;
 230   }
 231
 232   // Increase counter for execution unit(s).
 233   for (TargetSchedModel::ProcResIter
 234          PI = SchedModel->getWriteProcResBegin(SC),
 235          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
 236     // Don't handle FPd together with the other resources.
 237     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
 238       continue;
 239     int &CurrCounter =
 240       ProcResourceCounters[PI->ProcResourceIdx];
 241     CurrCounter += PI->Cycles;
 242     // Check if this is now the new critical resource.
 243     if ((CurrCounter > ProcResCostLim) &&
 244         (CriticalResourceIdx == UINT_MAX ||
 245          (PI->ProcResourceIdx != CriticalResourceIdx &&
 246           CurrCounter >
 247           ProcResourceCounters[CriticalResourceIdx]))) {
 248       DEBUG( dbgs() << "+++ New critical resource: "
 249              << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
 250              << "\n";);
 251       CriticalResourceIdx = PI->ProcResourceIdx;
 252     }
 253   }
 254
 255   // Make note of an instruction that uses a blocking resource (FPd).
 256   if (SU->isUnbuffered) {
 257     LastFPdOpCycleIdx = getCurrCycleIdx();
 258     DEBUG (dbgs() << "+++ Last FPd cycle index: "
 259            << LastFPdOpCycleIdx << "\n";);
 260   }
 261
 262   // Insert SU into current group by increasing number of slots used
 263   // in current group.
 264   CurrGroupSize += getNumDecoderSlots(SU);
 265   assert (CurrGroupSize <= 3);
 266
 267   // Check if current group is now full/ended. If so, move on to next
 268   // group to be ready to evaluate more candidates.
 269   if (CurrGroupSize == 3 || SC->EndGroup)
 270     nextGroup();
 271 }
 272
 273 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
 274   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
 275   if (!SC->isValid())
 276     return 0;
 277
 278   // If SU begins new group, it can either break a current group early
 279   // or fit naturally if current group is empty (negative cost).
 280   if (SC->BeginGroup) {
 281     if (CurrGroupSize)
 282       return 3 - CurrGroupSize;
 283     return -1;
 284   }
 285
 286   // Similarly, a group-ending SU may either fit well (last in group), or
 287   // end the group prematurely.
 288   if (SC->EndGroup) {
 289     unsigned resultingGroupSize =
 290       (CurrGroupSize + getNumDecoderSlots(SU));
 291     if (resultingGroupSize < 3)
 292       return (3 - resultingGroupSize);
 293     return -1;
 294   }
 295
 296   // Most instructions can be placed in any decoder slot.
 297   return 0;
 298 }
 299
 300 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
 301   assert (SU->isUnbuffered);
 302   // If this is the first FPd op, it should be scheduled high.
 303   if (LastFPdOpCycleIdx == UINT_MAX)
 304     return true;
 305   // If this is not the first PFd op, it should go into the other side
 306   // of the processor to use the other FPd unit there. This should
 307   // generally happen if two FPd ops are placed with 2 other
 308   // instructions between them (modulo 6).
 309   if (LastFPdOpCycleIdx > getCurrCycleIdx())
 310     return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
 311   return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
 312 }
 313
 314 int SystemZHazardRecognizer::
 315 resourcesCost(SUnit *SU) {
 316   int Cost = 0;
 317
 318   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
 319   if (!SC->isValid())
 320     return 0;
 321
 322   // For a FPd op, either return min or max value as indicated by the
 323   // distance to any prior FPd op.
 324   if (SU->isUnbuffered)
 325     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
 326   // For other instructions, give a cost to the use of the critical resource.
 327   else if (CriticalResourceIdx != UINT_MAX) {
 328     for (TargetSchedModel::ProcResIter
 329            PI = SchedModel->getWriteProcResBegin(SC),
 330            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
 331       if (PI->ProcResourceIdx == CriticalResourceIdx)
 332         Cost = PI->Cycles;
 333   }
 334
 335   return Cost;
 336 }
 337