1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements the InstrBuilder interface.
13 //===----------------------------------------------------------------------===//
15 #include "InstrBuilder.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
23 #define DEBUG_TYPE "llvm-mca"
29 static void initializeUsedResources(InstrDesc &ID,
30 const MCSchedClassDesc &SCDesc,
31 const MCSubtargetInfo &STI,
32 ArrayRef<uint64_t> ProcResourceMasks) {
33 const MCSchedModel &SM = STI.getSchedModel();
35 // Populate resources consumed.
36 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
37 std::vector<ResourcePlusCycles> Worklist;
39 // Track cycles contributed by resources that are in a "Super" relationship.
40 // This is required if we want to correctly match the behavior of method
41 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
42 // of "consumed" processor resources and resource cycles, the logic in
43 // ExpandProcResource() doesn't update the number of resource cycles
44 // contributed by a "Super" resource to a group.
45 // We need to take this into account when we find that a processor resource is
46 // part of a group, and it is also used as the "Super" of other resources.
47 // This map stores the number of cycles contributed by sub-resources that are
48 // part of a "Super" resource. The key value is the "Super" resource mask ID.
49 DenseMap<uint64_t, unsigned> SuperResources;
51 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
52 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
53 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
54 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
55 if (PR.BufferSize != -1)
56 ID.Buffers.push_back(Mask);
57 CycleSegment RCy(0, PRE->Cycles, false);
58 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
60 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
61 SuperResources[Super] += PRE->Cycles;
65 // Sort elements by mask popcount, so that we prioritize resource units over
66 // resource groups, and smaller groups over larger groups.
67 llvm::sort(Worklist.begin(), Worklist.end(),
68 [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
69 unsigned popcntA = countPopulation(A.first);
70 unsigned popcntB = countPopulation(B.first);
71 if (popcntA < popcntB)
73 if (popcntA > popcntB)
75 return A.first < B.first;
78 uint64_t UsedResourceUnits = 0;
80 // Remove cycles contributed by smaller resources.
81 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
82 ResourcePlusCycles &A = Worklist[I];
83 if (!A.second.size()) {
84 A.second.NumUnits = 0;
85 A.second.setReserved();
86 ID.Resources.emplace_back(A);
90 ID.Resources.emplace_back(A);
91 uint64_t NormalizedMask = A.first;
92 if (countPopulation(A.first) == 1) {
93 UsedResourceUnits |= A.first;
95 // Remove the leading 1 from the resource group mask.
96 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
99 for (unsigned J = I + 1; J < E; ++J) {
100 ResourcePlusCycles &B = Worklist[J];
101 if ((NormalizedMask & B.first) == NormalizedMask) {
102 B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
103 if (countPopulation(B.first) > 1)
109 // A SchedWrite may specify a number of cycles in which a resource group
110 // is reserved. For example (on target x86; cpu Haswell):
112 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
113 // let ResourceCycles = [2, 2, 3];
117 // Resource units HWPort0 and HWPort1 are both used for 2cy.
118 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
119 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
120 // will not be usable for 2 entire cycles from instruction issue.
122 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
123 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
124 // extra delay on top of the 2 cycles latency.
125 // During those extra cycles, HWPort01 is not usable by other instructions.
126 for (ResourcePlusCycles &RPC : ID.Resources) {
127 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
128 // Remove the leading 1 from the resource group mask.
129 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
130 if ((Mask & UsedResourceUnits) == Mask)
131 RPC.second.setReserved();
136 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
137 dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
138 for (const uint64_t R : ID.Buffers)
139 dbgs() << "\t\tBuffer Mask=" << R << '\n';
143 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
144 const MCSchedClassDesc &SCDesc,
145 const MCSubtargetInfo &STI) {
146 if (MCDesc.isCall()) {
147 // We cannot estimate how long this call will take.
148 // Artificially set an arbitrarily high latency (100cy).
149 ID.MaxLatency = 100U;
153 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
154 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
155 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
158 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
159 unsigned SchedClassID) {
160 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
161 const MCSchedModel &SM = STI.getSchedModel();
162 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
164 // These are for now the (strong) assumptions made by this algorithm:
165 // * The number of explicit and implicit register definitions in a MCInst
166 // matches the number of explicit and implicit definitions according to
167 // the opcode descriptor (MCInstrDesc).
168 // * Register definitions take precedence over register uses in the operands
170 // * If an opcode specifies an optional definition, then the optional
171 // definition is always the last operand in the sequence, and it can be
172 // set to zero (i.e. "no register").
174 // These assumptions work quite well for most out-of-order in-tree targets
175 // like x86. This is mainly because the vast majority of instructions is
176 // expanded to MCInst using a straightforward lowering logic that preserves
177 // the ordering of the operands.
178 unsigned NumExplicitDefs = MCDesc.getNumDefs();
179 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
180 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
181 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
182 if (MCDesc.hasOptionalDef())
184 ID.Writes.resize(TotalDefs);
185 // Iterate over the operands list, and skip non-register operands.
186 // The first NumExplictDefs register operands are expected to be register
188 unsigned CurrentDef = 0;
190 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
191 const MCOperand &Op = MCI.getOperand(i);
195 WriteDescriptor &Write = ID.Writes[CurrentDef];
197 if (CurrentDef < NumWriteLatencyEntries) {
198 const MCWriteLatencyEntry &WLE =
199 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
200 // Conservatively default to MaxLatency.
202 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
203 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
205 // Assign a default latency for this write.
206 Write.Latency = ID.MaxLatency;
207 Write.SClassOrWriteResourceID = 0;
209 Write.IsOptionalDef = false;
211 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
212 << ", Latency=" << Write.Latency
213 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
218 if (CurrentDef != NumExplicitDefs)
219 llvm::report_fatal_error(
220 "error: Expected more register operand definitions. ");
223 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
224 unsigned Index = NumExplicitDefs + CurrentDef;
225 WriteDescriptor &Write = ID.Writes[Index];
226 Write.OpIndex = ~CurrentDef;
227 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
228 if (Index < NumWriteLatencyEntries) {
229 const MCWriteLatencyEntry &WLE =
230 *STI.getWriteLatencyEntry(&SCDesc, Index);
231 // Conservatively default to MaxLatency.
233 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
234 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
236 // Assign a default latency for this write.
237 Write.Latency = ID.MaxLatency;
238 Write.SClassOrWriteResourceID = 0;
241 Write.IsOptionalDef = false;
242 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
244 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
245 << ", PhysReg=" << MRI.getName(Write.RegisterID)
246 << ", Latency=" << Write.Latency
247 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
251 if (MCDesc.hasOptionalDef()) {
252 // Always assume that the optional definition is the last operand of the
254 const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
255 if (i == MCI.getNumOperands() || !Op.isReg())
256 llvm::report_fatal_error(
257 "error: expected a register operand for an optional "
258 "definition. Instruction has not be correctly analyzed.\n",
261 WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
262 Write.OpIndex = MCI.getNumOperands() - 1;
263 // Assign a default latency for this write.
264 Write.Latency = ID.MaxLatency;
265 Write.SClassOrWriteResourceID = 0;
266 Write.IsOptionalDef = true;
270 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
271 unsigned SchedClassID) {
272 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
273 unsigned NumExplicitDefs = MCDesc.getNumDefs();
275 // Skip explicit definitions.
277 for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
278 const MCOperand &Op = MCI.getOperand(i);
284 llvm::report_fatal_error(
285 "error: Expected more register operand definitions. ", false);
287 unsigned NumExplicitUses = MCI.getNumOperands() - i;
288 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
289 if (MCDesc.hasOptionalDef()) {
290 assert(NumExplicitUses);
293 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
297 ID.Reads.resize(TotalUses);
298 for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
299 ReadDescriptor &Read = ID.Reads[CurrentUse];
300 Read.OpIndex = i + CurrentUse;
301 Read.UseIndex = CurrentUse;
302 Read.SchedClassID = SchedClassID;
303 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
304 << ", UseIndex=" << Read.UseIndex << '\n');
307 for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
308 ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
309 Read.OpIndex = ~CurrentUse;
310 Read.UseIndex = NumExplicitUses + CurrentUse;
311 Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
312 Read.SchedClassID = SchedClassID;
313 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
314 << MRI.getName(Read.RegisterID) << '\n');
318 const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
319 assert(STI.getSchedModel().hasInstrSchedModel() &&
320 "Itineraries are not yet supported!");
322 // Obtain the instruction descriptor from the opcode.
323 unsigned short Opcode = MCI.getOpcode();
324 const MCInstrDesc &MCDesc = MCII.get(Opcode);
325 const MCSchedModel &SM = STI.getSchedModel();
327 // Then obtain the scheduling class information from the instruction.
328 unsigned SchedClassID = MCDesc.getSchedClass();
329 unsigned CPUID = SM.getProcessorID();
331 // Try to solve variant scheduling classes.
333 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
334 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
337 llvm::report_fatal_error("unable to resolve this variant class.");
340 // Check if this instruction is supported. Otherwise, report a fatal error.
341 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
342 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
343 std::string ToString;
344 llvm::raw_string_ostream OS(ToString);
345 WithColor::error() << "found an unsupported instruction in the input"
346 << " assembly sequence.\n";
347 MCIP.printInst(&MCI, OS, "", STI);
350 WithColor::note() << "instruction: " << ToString << '\n';
351 llvm::report_fatal_error(
352 "Don't know how to analyze unsupported instructions.");
355 // Create a new empty descriptor.
356 std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
357 ID->NumMicroOps = SCDesc.NumMicroOps;
359 if (MCDesc.isCall()) {
360 // We don't correctly model calls.
361 WithColor::warning() << "found a call in the input assembly sequence.\n";
362 WithColor::note() << "call instructions are not correctly modeled. "
363 << "Assume a latency of 100cy.\n";
366 if (MCDesc.isReturn()) {
367 WithColor::warning() << "found a return instruction in the input"
368 << " assembly sequence.\n";
369 WithColor::note() << "program counter updates are ignored.\n";
372 ID->MayLoad = MCDesc.mayLoad();
373 ID->MayStore = MCDesc.mayStore();
374 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
376 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
377 computeMaxLatency(*ID, MCDesc, SCDesc, STI);
378 populateWrites(*ID, MCI, SchedClassID);
379 populateReads(*ID, MCI, SchedClassID);
381 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
382 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
384 // Now add the new descriptor.
385 SchedClassID = MCDesc.getSchedClass();
386 if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
387 Descriptors[MCI.getOpcode()] = std::move(ID);
388 return *Descriptors[MCI.getOpcode()];
391 VariantDescriptors[&MCI] = std::move(ID);
392 return *VariantDescriptors[&MCI];
395 const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
396 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
397 return *Descriptors[MCI.getOpcode()];
399 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
400 return *VariantDescriptors[&MCI];
402 return createInstrDescImpl(MCI);
405 std::unique_ptr<Instruction>
406 InstrBuilder::createInstruction(const MCInst &MCI) {
407 const InstrDesc &D = getOrCreateInstrDesc(MCI);
408 std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
410 // Initialize Reads first.
411 for (const ReadDescriptor &RD : D.Reads) {
413 if (!RD.isImplicitRead()) {
415 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
416 // Skip non-register operands.
422 RegID = RD.RegisterID;
425 // Skip invalid register operands.
429 // Okay, this is a register operand. Create a ReadState for it.
430 assert(RegID > 0 && "Invalid register ID found!");
431 NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
434 // Early exit if there are no writes.
435 if (D.Writes.empty())
438 // Track register writes that implicitly clear the upper portion of the
439 // underlying super-registers using an APInt.
440 APInt WriteMask(D.Writes.size(), 0);
442 // Now query the MCInstrAnalysis object to obtain information about which
443 // register writes implicitly clear the upper portion of a super-register.
444 MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
446 // Check if this is a dependency breaking instruction.
447 if (MCIA.isDependencyBreaking(STI, MCI))
448 NewIS->setDependencyBreaking();
450 // Initialize writes.
451 unsigned WriteIndex = 0;
452 for (const WriteDescriptor &WD : D.Writes) {
453 unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
454 : MCI.getOperand(WD.OpIndex).getReg();
455 // Check if this is a optional definition that references NoReg.
456 if (WD.IsOptionalDef && !RegID) {
461 assert(RegID && "Expected a valid register ID!");
462 NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
463 WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));