1 //===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // \file This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of the DAG scheduling mutation to
11 // pair instructions back to back.
13 //===----------------------------------------------------------------------===//
15 #include "X86MacroFusion.h"
16 #include "X86Subtarget.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Target/TargetInstrInfo.h"
21 #define DEBUG_TYPE "misched"
23 STATISTIC(NumFused, "Number of instr pairs fused");
27 static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden,
28 cl::desc("Enable scheduling for macro fusion."), cl::init(true));
32 /// \brief Verify that the instruction pair, First and Second,
33 /// should be scheduled back to back. If either instruction is unspecified,
34 /// then verify that the other instruction may be part of a pair at all.
35 static bool shouldScheduleAdjacent(const X86Subtarget &ST,
36 const MachineInstr *First,
37 const MachineInstr *Second) {
38 // Check if this processor supports macro-fusion. Since this is a minor
39 // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
40 // proxy for SandyBridge+.
50 assert((First || Second) && "At least one instr must be specified");
51 unsigned FirstOpcode = First
53 : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
54 unsigned SecondOpcode = Second
56 : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
58 switch (SecondOpcode) {
85 switch (FirstOpcode) {
102 case X86::TEST8ri_NOREX:
145 case X86::ADD16ri8_DB:
146 case X86::ADD16ri_DB:
149 case X86::ADD16rr_DB:
153 case X86::ADD32ri8_DB:
154 case X86::ADD32ri_DB:
157 case X86::ADD32rr_DB:
160 case X86::ADD64ri32_DB:
162 case X86::ADD64ri8_DB:
165 case X86::ADD64rr_DB:
191 return FuseKind == FuseCmp || FuseKind == FuseInc;
200 return FuseKind == FuseInc;
201 case X86::INSTRUCTION_LIST_END:
206 /// \brief Post-process the DAG to create cluster edges between instructions
207 /// that may be fused by the processor into a single operation.
208 class X86MacroFusion : public ScheduleDAGMutation {
212 void apply(ScheduleDAGInstrs *DAGInstrs) override;
215 void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
216 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
217 const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>();
219 // For now, assume targets can only fuse with the branch.
220 SUnit &ExitSU = DAG->ExitSU;
221 MachineInstr *Branch = ExitSU.getInstr();
222 if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch))
225 for (SDep &PredDep : ExitSU.Preds) {
226 if (PredDep.isWeak())
228 SUnit &SU = *PredDep.getSUnit();
229 MachineInstr &Pred = *SU.getInstr();
230 if (!shouldScheduleAdjacent(ST, &Pred, Branch))
233 // Create a single weak edge from SU to ExitSU. The only effect is to cause
234 // bottom-up scheduling to heavily prioritize the clustered SU. There is no
235 // need to copy predecessor edges from ExitSU to SU, since top-down
236 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
237 // of SU, we could create an artificial edge from the deepest root, but it
238 // hasn't been needed yet.
239 bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
241 assert(Success && "No DAG nodes should be reachable from ExitSU");
243 // Adjust latency of data deps between the nodes.
244 for (SDep &PredDep : ExitSU.Preds)
245 if (PredDep.getSUnit() == &SU)
246 PredDep.setLatency(0);
247 for (SDep &SuccDep : SU.Succs)
248 if (SuccDep.getSUnit() == &ExitSU)
249 SuccDep.setLatency(0);
252 DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
253 SU.print(dbgs(), DAG);
254 dbgs() << " - ExitSU"
255 << " / " << DAG->TII->getName(Pred.getOpcode()) << " - "
256 << DAG->TII->getName(Branch->getOpcode()) << '\n';);
266 std::unique_ptr<ScheduleDAGMutation>
267 createX86MacroFusionDAGMutation () {
268 return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr;
271 } // end namespace llvm