1 //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // \file This file contains the AArch64 implementation of the DAG scheduling mutation
11 // to pair instructions back to back.
13 //===----------------------------------------------------------------------===//
15 #include "AArch64MacroFusion.h"
16 #include "AArch64Subtarget.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Target/TargetInstrInfo.h"
21 #define DEBUG_TYPE "misched"
23 STATISTIC(NumFused, "Number of instr pairs fused");
27 static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
28 cl::desc("Enable scheduling for macro fusion."), cl::init(true));
32 /// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
33 /// together. Given an anchor instr, when the other instr is unspecified, then
34 /// check if the anchor instr may be part of a fused pair at all.
35 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
36 const TargetSubtargetInfo &TSI,
37 const MachineInstr *FirstMI,
38 const MachineInstr *SecondMI) {
39 assert((FirstMI || SecondMI) && "At least one instr must be specified");
41 const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
42 const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
44 // Assume wildcards for unspecified instrs.
45 unsigned FirstOpcode =
46 FirstMI ? FirstMI->getOpcode()
47 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
48 unsigned SecondOpcode =
49 SecondMI ? SecondMI->getOpcode()
50 : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
52 if (ST.hasArithmeticBccFusion())
53 // Fuse CMN, CMP, TST followed by Bcc.
54 if (SecondOpcode == AArch64::Bcc)
55 switch (FirstOpcode) {
58 case AArch64::ADDSWri:
59 case AArch64::ADDSWrr:
60 case AArch64::ADDSXri:
61 case AArch64::ADDSXrr:
62 case AArch64::ANDSWri:
63 case AArch64::ANDSWrr:
64 case AArch64::ANDSXri:
65 case AArch64::ANDSXrr:
66 case AArch64::SUBSWri:
67 case AArch64::SUBSWrr:
68 case AArch64::SUBSXri:
69 case AArch64::SUBSXrr:
70 case AArch64::BICSWrr:
71 case AArch64::BICSXrr:
73 case AArch64::ADDSWrs:
74 case AArch64::ADDSXrs:
75 case AArch64::ANDSWrs:
76 case AArch64::ANDSXrs:
77 case AArch64::SUBSWrs:
78 case AArch64::SUBSXrs:
79 case AArch64::BICSWrs:
80 case AArch64::BICSXrs:
81 // Shift value can be 0 making these behave like the "rr" variant...
82 return !II.hasShiftedReg(*FirstMI);
83 case AArch64::INSTRUCTION_LIST_END:
87 if (ST.hasArithmeticCbzFusion())
88 // Fuse ALU operations followed by CBZ/CBNZ.
89 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
90 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
91 switch (FirstOpcode) {
100 case AArch64::ANDXri:
101 case AArch64::ANDXrr:
102 case AArch64::EORWri:
103 case AArch64::EORWrr:
104 case AArch64::EORXri:
105 case AArch64::EORXrr:
106 case AArch64::ORRWri:
107 case AArch64::ORRWrr:
108 case AArch64::ORRXri:
109 case AArch64::ORRXrr:
110 case AArch64::SUBWri:
111 case AArch64::SUBWrr:
112 case AArch64::SUBXri:
113 case AArch64::SUBXrr:
115 case AArch64::ADDWrs:
116 case AArch64::ADDXrs:
117 case AArch64::ANDWrs:
118 case AArch64::ANDXrs:
119 case AArch64::SUBWrs:
120 case AArch64::SUBXrs:
121 case AArch64::BICWrs:
122 case AArch64::BICXrs:
123 // Shift value can be 0 making these behave like the "rr" variant...
124 return !II.hasShiftedReg(*FirstMI);
125 case AArch64::INSTRUCTION_LIST_END:
130 // Fuse AES crypto operations.
131 switch(FirstOpcode) {
133 case AArch64::AESErr:
134 return SecondOpcode == AArch64::AESMCrr ||
135 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
137 case AArch64::AESDrr:
138 return SecondOpcode == AArch64::AESIMCrr ||
139 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
142 if (ST.hasFuseLiterals())
143 // Fuse literal generation operations.
144 switch (FirstOpcode) {
145 // PC relative address.
147 return SecondOpcode == AArch64::ADDXri ||
148 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
150 case AArch64::MOVZWi:
151 return (SecondOpcode == AArch64::MOVKWi &&
152 SecondMI->getOperand(3).getImm() == 16) ||
153 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
154 // Lower half of 64 bit immediate.
155 case AArch64::MOVZXi:
156 return (SecondOpcode == AArch64::MOVKXi &&
157 SecondMI->getOperand(3).getImm() == 16) ||
158 SecondOpcode == AArch64::INSTRUCTION_LIST_END;
159 // Upper half of 64 bit immediate.
160 case AArch64::MOVKXi:
161 return FirstMI->getOperand(3).getImm() == 32 &&
162 ((SecondOpcode == AArch64::MOVKXi &&
163 SecondMI->getOperand(3).getImm() == 48) ||
164 SecondOpcode == AArch64::INSTRUCTION_LIST_END);
170 /// \brief Implement the fusion of instr pairs in the scheduling DAG,
171 /// anchored at the instr in AnchorSU..
172 static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
173 const MachineInstr *AnchorMI = AnchorSU.getInstr();
174 if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
177 // If the anchor instr is the ExitSU, then consider its predecessors;
178 // otherwise, its successors.
179 bool Preds = (&AnchorSU == &DAG->ExitSU);
180 SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
182 const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
183 const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
185 // Check if the anchor instr may be fused.
186 if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
190 // Explorer for fusion candidates among the dependencies of the anchor instr.
191 for (SDep &Dep : AnchorDeps) {
192 // Ignore dependencies that don't enforce ordering.
196 SUnit &DepSU = *Dep.getSUnit();
197 // Ignore the ExitSU if the dependents are successors.
198 if (!Preds && &DepSU == &DAG->ExitSU)
201 const MachineInstr *DepMI = DepSU.getInstr();
202 if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
205 FirstMI = Preds ? DepMI : AnchorMI;
206 SecondMI = Preds ? AnchorMI : DepMI;
207 if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
211 // Create a single weak edge between the adjacent instrs. The only effect is
212 // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
213 SUnit &FirstSU = Preds ? DepSU : AnchorSU;
214 SUnit &SecondSU = Preds ? AnchorSU : DepSU;
215 DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
217 // Adjust the latency between the anchor instr and its
218 // predecessors/successors.
219 for (SDep &IDep : AnchorDeps)
220 if (IDep.getSUnit() == &DepSU)
223 // Adjust the latency between the dependent instr and its
224 // successors/predecessors.
225 for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
226 if (IDep.getSUnit() == &AnchorSU)
229 DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
230 FirstSU.print(dbgs(), DAG); dbgs() << " - ";
231 SecondSU.print(dbgs(), DAG); dbgs() << " / ";
232 dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
233 DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
235 if (&SecondSU != &DAG->ExitSU)
236 // Make instructions dependent on FirstSU also dependent on SecondSU to
237 // prevent them from being scheduled between FirstSU and and SecondSU.
238 for (SUnit::const_succ_iterator
239 SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end();
241 if (!SI->getSUnit() || SI->getSUnit() == &SecondSU)
243 DEBUG(dbgs() << " Copy Succ ";
244 SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';);
245 DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial));
255 /// \brief Post-process the DAG to create cluster edges between instrs that may
256 /// be fused by the processor into a single operation.
257 class AArch64MacroFusion : public ScheduleDAGMutation {
259 AArch64MacroFusion() {}
261 void apply(ScheduleDAGInstrs *DAGInstrs) override;
264 void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
265 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
267 // For each of the SUnits in the scheduling block, try to fuse the instr in it
268 // with one in its successors.
269 for (SUnit &ISU : DAG->SUnits)
270 scheduleAdjacentImpl(DAG, ISU);
272 // Try to fuse the instr in the ExitSU with one in its predecessors.
273 scheduleAdjacentImpl(DAG, DAG->ExitSU);
281 std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
282 return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
285 } // end namespace llvm