1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
13 // To be useful for the linker, the LOH must be printed into the assembly file.
15 // A LOH describes a sequence of instructions that may be optimized by the
17 // This same sequence cannot be optimized by the compiler because some of
18 // the information will be known at link time.
19 // For instance, consider the following sequence:
20 // L1: adrp xA, sym@PAGE
21 // L2: add xB, xA, sym@PAGEOFF
22 // L3: ldr xC, [xB, #imm]
23 // This sequence can be turned into:
24 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25 // L3: ldr xC, sym+#imm
26 // It may also be turned into either the following more efficient
28 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
29 // L1: adrp xA, sym@PAGE
30 // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
33 // L3: ldr xC, [xB, #imm]
35 // To be valid a LOH must meet all the requirements needed by all the related
36 // possible linker transformations.
37 // For instance, using the running example, the constraints to emit
38 // ".loh AdrpAddLdr" are:
39 // - L1, L2, and L3 instructions are of the expected type, i.e.,
40 // respectively ADRP, ADD (immediate), and LD.
41 // - The result of L1 is used only by L2.
42 // - The register argument (xA) used in the ADD instruction is defined
44 // - The result of L2 is used only by L3.
45 // - The base address (xB) in L3 is defined only L2.
46 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47 // @PAGE/@PAGEOFF with no additional constants
49 // Currently supported LOHs are:
50 // * So called non-ADRP-related:
51 // - .loh AdrpAddLdr L1, L2, L3:
52 // L1: adrp xA, sym@PAGE
53 // L2: add xB, xA, sym@PAGEOFF
54 // L3: ldr xC, [xB, #imm]
55 // - .loh AdrpLdrGotLdr L1, L2, L3:
56 // L1: adrp xA, sym@GOTPAGE
57 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
58 // L3: ldr xC, [xB, #imm]
59 // - .loh AdrpLdr L1, L3:
60 // L1: adrp xA, sym@PAGE
61 // L3: ldr xC, [xA, sym@PAGEOFF]
62 // - .loh AdrpAddStr L1, L2, L3:
63 // L1: adrp xA, sym@PAGE
64 // L2: add xB, xA, sym@PAGEOFF
65 // L3: str xC, [xB, #imm]
66 // - .loh AdrpLdrGotStr L1, L2, L3:
67 // L1: adrp xA, sym@GOTPAGE
68 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
69 // L3: str xC, [xB, #imm]
70 // - .loh AdrpAdd L1, L2:
71 // L1: adrp xA, sym@PAGE
72 // L2: add xB, xA, sym@PAGEOFF
73 // For all these LOHs, L1, L2, L3 form a simple chain:
74 // L1 result is used only by L2 and L2 result by L3.
75 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
77 // All these LOHs aim at using more efficient load/store patterns by folding
78 // some instructions used to compute the address directly into the load/store.
80 // * So called ADRP-related:
81 // - .loh AdrpAdrp L2, L1:
82 // L2: ADRP xA, sym1@PAGE
83 // L1: ADRP xA, sym2@PAGE
84 // L2 dominates L1 and xA is not redifined between L2 and L1
85 // This LOH aims at getting rid of redundant ADRP instructions.
87 // The overall design for emitting the LOHs is:
88 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90 // 1. Associates them a label.
91 // 2. Emits them in a MCStreamer (EmitLOHDirective).
92 // - The MCMachOStreamer records them into the MCAssembler.
93 // - The MCAsmStreamer prints them.
94 // - Other MCStreamers ignore them.
95 // 3. Closes the MCStreamer:
96 // - The MachObjectWriter gets them from the MCAssembler and writes
97 // them in the object file.
98 // - Other ObjectWriters ignore them.
99 //===----------------------------------------------------------------------===//
102 #include "AArch64InstrInfo.h"
103 #include "AArch64MachineFunctionInfo.h"
104 #include "AArch64Subtarget.h"
105 #include "MCTargetDesc/AArch64AddressingModes.h"
106 #include "llvm/ADT/BitVector.h"
107 #include "llvm/ADT/DenseMap.h"
108 #include "llvm/ADT/MapVector.h"
109 #include "llvm/ADT/SetVector.h"
110 #include "llvm/ADT/SmallVector.h"
111 #include "llvm/ADT/Statistic.h"
112 #include "llvm/CodeGen/MachineBasicBlock.h"
113 #include "llvm/CodeGen/MachineFunctionPass.h"
114 #include "llvm/CodeGen/MachineInstr.h"
115 #include "llvm/CodeGen/MachineInstrBuilder.h"
116 #include "llvm/Support/Debug.h"
117 #include "llvm/Support/ErrorHandling.h"
118 #include "llvm/Support/raw_ostream.h"
119 #include "llvm/Target/TargetMachine.h"
120 #include "llvm/Target/TargetRegisterInfo.h"
121 using namespace llvm;
123 #define DEBUG_TYPE "aarch64-collect-loh"
125 STATISTIC(NumADRPSimpleCandidate,
126 "Number of simplifiable ADRP dominate by another");
127 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
128 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
129 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
130 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
131 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
132 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
134 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
138 struct AArch64CollectLOH : public MachineFunctionPass {
140 AArch64CollectLOH() : MachineFunctionPass(ID) {}
142 bool runOnMachineFunction(MachineFunction &MF) override;
144 MachineFunctionProperties getRequiredProperties() const override {
145 return MachineFunctionProperties().set(
146 MachineFunctionProperties::Property::NoVRegs);
149 StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
151 void getAnalysisUsage(AnalysisUsage &AU) const override {
152 MachineFunctionPass::getAnalysisUsage(AU);
153 AU.setPreservesAll();
157 char AArch64CollectLOH::ID = 0;
159 } // end anonymous namespace.
161 INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
162 AARCH64_COLLECT_LOH_NAME, false, false)
164 static bool canAddBePartOfLOH(const MachineInstr &MI) {
165 // Check immediate to see if the immediate is an address.
166 switch (MI.getOperand(2).getType()) {
169 case MachineOperand::MO_GlobalAddress:
170 case MachineOperand::MO_JumpTableIndex:
171 case MachineOperand::MO_ConstantPoolIndex:
172 case MachineOperand::MO_BlockAddress:
177 /// Answer the following question: Can Def be one of the definition
178 /// involved in a part of a LOH?
179 static bool canDefBePartOfLOH(const MachineInstr &MI) {
180 // Accept ADRP, ADDLow and LOADGot.
181 switch (MI.getOpcode()) {
186 case AArch64::ADDXri:
187 return canAddBePartOfLOH(MI);
188 case AArch64::LDRXui:
189 // Check immediate to see if the immediate is an address.
190 switch (MI.getOperand(2).getType()) {
193 case MachineOperand::MO_GlobalAddress:
194 return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
199 /// Check whether the given instruction can the end of a LOH chain involving a
201 static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
202 switch (MI.getOpcode()) {
205 case AArch64::STRBBui:
206 case AArch64::STRHHui:
207 case AArch64::STRBui:
208 case AArch64::STRHui:
209 case AArch64::STRWui:
210 case AArch64::STRXui:
211 case AArch64::STRSui:
212 case AArch64::STRDui:
213 case AArch64::STRQui:
214 // We can only optimize the index operand.
215 // In case we have str xA, [xA, #imm], this is two different uses
216 // of xA and we cannot fold, otherwise the xA stored may be wrong,
217 // even if #imm == 0.
218 return MI.getOperandNo(&MO) == 1 &&
219 MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
223 /// Check whether the given instruction can be the end of a LOH chain
224 /// involving a load.
225 static bool isCandidateLoad(const MachineInstr &MI) {
226 switch (MI.getOpcode()) {
229 case AArch64::LDRSBWui:
230 case AArch64::LDRSBXui:
231 case AArch64::LDRSHWui:
232 case AArch64::LDRSHXui:
233 case AArch64::LDRSWui:
234 case AArch64::LDRBui:
235 case AArch64::LDRHui:
236 case AArch64::LDRWui:
237 case AArch64::LDRXui:
238 case AArch64::LDRSui:
239 case AArch64::LDRDui:
240 case AArch64::LDRQui:
241 return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
245 /// Check whether the given instruction can load a litteral.
246 static bool supportLoadFromLiteral(const MachineInstr &MI) {
247 switch (MI.getOpcode()) {
250 case AArch64::LDRSWui:
251 case AArch64::LDRWui:
252 case AArch64::LDRXui:
253 case AArch64::LDRSui:
254 case AArch64::LDRDui:
255 case AArch64::LDRQui:
260 /// Number of GPR registers traked by mapRegToGPRIndex()
261 static const unsigned N_GPR_REGS = 31;
262 /// Map register number to index from 0-30.
263 static int mapRegToGPRIndex(MCPhysReg Reg) {
264 static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
265 static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
266 if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
267 return Reg - AArch64::X0;
268 if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
269 return Reg - AArch64::W0;
270 // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
271 // handle them as special cases.
272 if (Reg == AArch64::FP)
274 if (Reg == AArch64::LR)
279 /// State tracked per register.
280 /// The main algorithm walks backwards over a basic block maintaining this
281 /// datastructure for each tracked general purpose register.
283 MCLOHType Type : 8; ///< "Best" type of LOH possible.
284 bool IsCandidate : 1; ///< Possible LOH candidate.
285 bool OneUser : 1; ///< Found exactly one user (yet).
286 bool MultiUsers : 1; ///< Found multiple users.
287 const MachineInstr *MI0; ///< First instruction involved in the LOH.
288 const MachineInstr *MI1; ///< Second instruction involved in the LOH
290 const MachineInstr *LastADRP; ///< Last ADRP in same register.
293 /// Update state \p Info given \p MI uses the tracked register.
294 static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
296 // We have multiple uses if we already found one before.
297 if (Info.MultiUsers || Info.OneUser) {
298 Info.IsCandidate = false;
299 Info.MultiUsers = true;
304 // Start new LOHInfo if applicable.
305 if (isCandidateLoad(MI)) {
306 Info.Type = MCLOH_AdrpLdr;
307 Info.IsCandidate = true;
309 // Note that even this is AdrpLdr now, we can switch to a Ldr variant
311 } else if (isCandidateStore(MI, MO)) {
312 Info.Type = MCLOH_AdrpAddStr;
313 Info.IsCandidate = true;
316 } else if (MI.getOpcode() == AArch64::ADDXri) {
317 Info.Type = MCLOH_AdrpAdd;
318 Info.IsCandidate = true;
320 } else if (MI.getOpcode() == AArch64::LDRXui &&
321 MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
322 Info.Type = MCLOH_AdrpLdrGot;
323 Info.IsCandidate = true;
328 /// Update state \p Info given the tracked register is clobbered.
329 static void handleClobber(LOHInfo &Info) {
330 Info.IsCandidate = false;
331 Info.OneUser = false;
332 Info.MultiUsers = false;
333 Info.LastADRP = nullptr;
336 /// Update state \p Info given that \p MI is possibly the middle instruction
337 /// of an LOH involving 3 instructions.
338 static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
340 if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
342 // Copy LOHInfo for dest register to LOHInfo for source register.
343 if (&DefInfo != &OpInfo) {
345 // Invalidate \p DefInfo because we track it in \p OpInfo now.
346 handleClobber(DefInfo);
348 DefInfo.LastADRP = nullptr;
350 // Advance state machine.
351 assert(OpInfo.IsCandidate && "Expect valid state");
352 if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
353 if (OpInfo.Type == MCLOH_AdrpLdr) {
354 OpInfo.Type = MCLOH_AdrpAddLdr;
355 OpInfo.IsCandidate = true;
358 } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
359 OpInfo.Type = MCLOH_AdrpAddStr;
360 OpInfo.IsCandidate = true;
365 assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
366 assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
367 "Expected GOT relocation");
368 if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
369 OpInfo.Type = MCLOH_AdrpLdrGotStr;
370 OpInfo.IsCandidate = true;
373 } else if (OpInfo.Type == MCLOH_AdrpLdr) {
374 OpInfo.Type = MCLOH_AdrpLdrGotLdr;
375 OpInfo.IsCandidate = true;
383 /// Update state when seeing and ADRP instruction.
384 static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
386 if (Info.LastADRP != nullptr) {
387 DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t'
389 AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
390 ++NumADRPSimpleCandidate;
393 // Produce LOH directive if possible.
394 if (Info.IsCandidate) {
397 DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t'
399 AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
400 ++NumADRSimpleCandidate;
403 if (supportLoadFromLiteral(*Info.MI0)) {
404 DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t'
406 AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
410 case MCLOH_AdrpAddLdr:
411 DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t'
412 << *Info.MI1 << '\t' << *Info.MI0);
413 AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
416 case MCLOH_AdrpAddStr:
417 if (Info.MI1 != nullptr) {
418 DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t'
419 << *Info.MI1 << '\t' << *Info.MI0);
420 AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
424 case MCLOH_AdrpLdrGotLdr:
425 DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t'
426 << *Info.MI1 << '\t' << *Info.MI0);
427 AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
430 case MCLOH_AdrpLdrGotStr:
431 DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t'
432 << *Info.MI1 << '\t' << *Info.MI0);
433 AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
436 case MCLOH_AdrpLdrGot:
437 DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t'
439 AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
442 llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
450 static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
452 if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
454 int Idx = mapRegToGPRIndex(Reg);
456 handleClobber(LOHInfos[Idx]);
459 static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
460 // Handle defs and regmasks.
461 for (const MachineOperand &MO : MI.operands()) {
462 if (MO.isRegMask()) {
463 const uint32_t *RegMask = MO.getRegMask();
464 for (MCPhysReg Reg : AArch64::GPR32RegClass)
465 handleRegMaskClobber(RegMask, Reg, LOHInfos);
466 for (MCPhysReg Reg : AArch64::GPR64RegClass)
467 handleRegMaskClobber(RegMask, Reg, LOHInfos);
470 if (!MO.isReg() || !MO.isDef())
472 int Idx = mapRegToGPRIndex(MO.getReg());
475 handleClobber(LOHInfos[Idx]);
478 for (const MachineOperand &MO : MI.uses()) {
479 if (!MO.isReg() || !MO.readsReg())
481 int Idx = mapRegToGPRIndex(MO.getReg());
484 handleUse(MI, MO, LOHInfos[Idx]);
488 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
489 if (skipFunction(*MF.getFunction()))
492 DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
493 << "Looking in function " << MF.getName() << '\n');
495 LOHInfo LOHInfos[N_GPR_REGS];
496 AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
497 for (const MachineBasicBlock &MBB : MF) {
498 // Reset register tracking state.
499 memset(LOHInfos, 0, sizeof(LOHInfos));
500 // Live-out registers are used.
501 for (const MachineBasicBlock *Succ : MBB.successors()) {
502 for (const auto &LI : Succ->liveins()) {
503 int RegIdx = mapRegToGPRIndex(LI.PhysReg);
505 LOHInfos[RegIdx].OneUser = true;
509 // Walk the basic block backwards and update the per register state machine
511 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
512 unsigned Opcode = MI.getOpcode();
514 case AArch64::ADDXri:
515 case AArch64::LDRXui:
516 if (canDefBePartOfLOH(MI)) {
517 const MachineOperand &Def = MI.getOperand(0);
518 const MachineOperand &Op = MI.getOperand(1);
519 assert(Def.isReg() && Def.isDef() && "Expected reg def");
520 assert(Op.isReg() && Op.isUse() && "Expected reg use");
521 int DefIdx = mapRegToGPRIndex(Def.getReg());
522 int OpIdx = mapRegToGPRIndex(Op.getReg());
523 if (DefIdx >= 0 && OpIdx >= 0 &&
524 handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
529 const MachineOperand &Op0 = MI.getOperand(0);
530 int Idx = mapRegToGPRIndex(Op0.getReg());
532 handleADRP(MI, AFI, LOHInfos[Idx]);
537 handleNormalInst(MI, LOHInfos);
541 // Return "no change": The pass only collects information.
545 FunctionPass *llvm::createAArch64CollectLOHPass() {
546 return new AArch64CollectLOH();