1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
11 // On AArch64, stack frames are structured as follows:
13 // The stack grows downward.
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
19 // At function entry, the "frame" looks as follows:
22 // |-----------------------------------|
24 // | arguments passed on the stack |
26 // |-----------------------------------| <- sp
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
37 // |-----------------------------------|
39 // | arguments passed on the stack |
41 // |-----------------------------------|
43 // | (Win64 only) varargs from reg |
45 // |-----------------------------------|
47 // | prev_fp, prev_lr |
48 // | (a.k.a. "frame record") |
49 // |-----------------------------------| <- fp(=x29)
51 // | other callee-saved registers |
53 // |-----------------------------------|
54 // |.empty.space.to.make.part.below....|
55 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
56 // |.the.standard.16-byte.alignment....| compile time; if present)
57 // |-----------------------------------|
59 // | local variables of fixed size |
60 // | including spill slots |
61 // |-----------------------------------| <- bp(not defined by ABI,
62 // |.variable-sized.local.variables....| LLVM chooses X19)
63 // |.(VLAs)............................| (size of this area is unknown at
64 // |...................................| compile time)
65 // |-----------------------------------| <- sp
69 // To access the data in a frame, at-compile time, a constant offset must be
70 // computable from one of the pointers (fp, bp, sp) to access it. The size
71 // of the areas with a dotted background cannot be computed at compile-time
72 // if they are present, making it required to have all three of fp, bp and
73 // sp to be set up to be able to access all contents in the frame areas,
74 // assuming all of the frame areas are non-empty.
76 // For most functions, some of the frame areas are empty. For those functions,
77 // it may not be necessary to set up fp or bp:
78 // * A base pointer is definitely needed when there are both VLAs and local
79 // variables with more-than-default alignment requirements.
80 // * A frame pointer is definitely needed when there are local variables with
81 // more-than-default alignment requirements.
83 // In some cases when a base pointer is not strictly needed, it is generated
84 // anyway when offsets from the frame pointer to access local variables become
85 // so large that the offset can't be encoded in the immediate fields of loads
88 // FIXME: also explain the redzone concept.
89 // FIXME: also explain the concept of reserved call frames.
91 //===----------------------------------------------------------------------===//
93 #include "AArch64FrameLowering.h"
94 #include "AArch64InstrInfo.h"
95 #include "AArch64MachineFunctionInfo.h"
96 #include "AArch64RegisterInfo.h"
97 #include "AArch64Subtarget.h"
98 #include "AArch64TargetMachine.h"
99 #include "MCTargetDesc/AArch64AddressingModes.h"
100 #include "llvm/ADT/ScopeExit.h"
101 #include "llvm/ADT/SmallVector.h"
102 #include "llvm/ADT/Statistic.h"
103 #include "llvm/CodeGen/LivePhysRegs.h"
104 #include "llvm/CodeGen/MachineBasicBlock.h"
105 #include "llvm/CodeGen/MachineFrameInfo.h"
106 #include "llvm/CodeGen/MachineFunction.h"
107 #include "llvm/CodeGen/MachineInstr.h"
108 #include "llvm/CodeGen/MachineInstrBuilder.h"
109 #include "llvm/CodeGen/MachineMemOperand.h"
110 #include "llvm/CodeGen/MachineModuleInfo.h"
111 #include "llvm/CodeGen/MachineOperand.h"
112 #include "llvm/CodeGen/MachineRegisterInfo.h"
113 #include "llvm/CodeGen/RegisterScavenging.h"
114 #include "llvm/CodeGen/TargetInstrInfo.h"
115 #include "llvm/CodeGen/TargetRegisterInfo.h"
116 #include "llvm/CodeGen/TargetSubtargetInfo.h"
117 #include "llvm/CodeGen/WinEHFuncInfo.h"
118 #include "llvm/IR/Attributes.h"
119 #include "llvm/IR/CallingConv.h"
120 #include "llvm/IR/DataLayout.h"
121 #include "llvm/IR/DebugLoc.h"
122 #include "llvm/IR/Function.h"
123 #include "llvm/MC/MCAsmInfo.h"
124 #include "llvm/MC/MCDwarf.h"
125 #include "llvm/Support/CommandLine.h"
126 #include "llvm/Support/Debug.h"
127 #include "llvm/Support/ErrorHandling.h"
128 #include "llvm/Support/MathExtras.h"
129 #include "llvm/Support/raw_ostream.h"
130 #include "llvm/Target/TargetMachine.h"
131 #include "llvm/Target/TargetOptions.h"
137 using namespace llvm;
139 #define DEBUG_TYPE "frame-info"
141 static cl::opt<bool> EnableRedZone("aarch64-redzone",
142 cl::desc("enable use of redzone on AArch64"),
143 cl::init(false), cl::Hidden);
146 ReverseCSRRestoreSeq("reverse-csr-restore-seq",
147 cl::desc("reverse the CSR restore sequence"),
148 cl::init(false), cl::Hidden);
150 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
152 /// This is the biggest offset to the stack pointer we can encode in aarch64
153 /// instructions (without using a separate calculation and a temp register).
154 /// Note that the exception here are vector stores/loads which cannot encode any
155 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
156 static const unsigned DefaultSafeSPDisplacement = 255;
158 /// Look at each instruction that references stack frames and return the stack
159 /// size limit beyond which some of these instructions will require a scratch
160 /// register during their expansion later.
161 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
162 // FIXME: For now, just conservatively guestimate based on unscaled indexing
163 // range. We'll end up allocating an unnecessary spill slot a lot, but
164 // realistically that's not a big deal at this stage of the game.
165 for (MachineBasicBlock &MBB : MF) {
166 for (MachineInstr &MI : MBB) {
167 if (MI.isDebugInstr() || MI.isPseudo() ||
168 MI.getOpcode() == AArch64::ADDXri ||
169 MI.getOpcode() == AArch64::ADDSXri)
172 for (const MachineOperand &MO : MI.operands()) {
177 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
178 AArch64FrameOffsetCannotUpdate)
183 return DefaultSafeSPDisplacement;
186 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
189 // Don't use the red zone if the function explicitly asks us not to.
190 // This is typically used for kernel code.
191 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
194 const MachineFrameInfo &MFI = MF.getFrameInfo();
195 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
196 unsigned NumBytes = AFI->getLocalStackSize();
198 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
201 /// hasFP - Return true if the specified function should have a dedicated frame
202 /// pointer register.
203 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
206 // Win64 EH requires a frame pointer if funclets are present, as the locals
207 // are accessed off the frame pointer in both the parent function and the
209 if (MF.hasEHFunclets())
211 // Retain behavior of always omitting the FP for leaf functions when possible.
212 if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
214 if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
215 MFI.hasStackMap() || MFI.hasPatchPoint() ||
216 RegInfo->needsStackRealignment(MF))
218 // With large callframes around we may need to use FP to access the scavenging
219 // emergency spillslot.
221 // Unfortunately some calls to hasFP() like machine verifier ->
222 // getReservedReg() -> hasFP in the middle of global isel are too early
223 // to know the max call frame size. Hopefully conservatively returning "true"
224 // in those cases is fine.
225 // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
226 if (!MFI.isMaxCallFrameSizeComputed() ||
227 MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
233 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
234 /// not required, we reserve argument space for call sites in the function
235 /// immediately on entry to the current function. This eliminates the need for
236 /// add/sub sp brackets around call sites. Returns true if the call frame is
237 /// included as part of the stack frame.
239 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
240 return !MF.getFrameInfo().hasVarSizedObjects();
243 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
244 MachineFunction &MF, MachineBasicBlock &MBB,
245 MachineBasicBlock::iterator I) const {
246 const AArch64InstrInfo *TII =
247 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
248 DebugLoc DL = I->getDebugLoc();
249 unsigned Opc = I->getOpcode();
250 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
251 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
253 if (!hasReservedCallFrame(MF)) {
254 unsigned Align = getStackAlignment();
256 int64_t Amount = I->getOperand(0).getImm();
257 Amount = alignTo(Amount, Align);
261 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
262 // doesn't have to pop anything), then the first operand will be zero too so
263 // this adjustment is a no-op.
264 if (CalleePopAmount == 0) {
265 // FIXME: in-function stack adjustment for calls is limited to 24-bits
266 // because there's no guaranteed temporary register available.
268 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
269 // 1) For offset <= 12-bit, we use LSL #0
270 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
271 // LSL #0, and the other uses LSL #12.
273 // Most call frames will be allocated at the start of a function so
274 // this is OK, but it is a limitation that needs dealing with.
275 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
276 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
278 } else if (CalleePopAmount != 0) {
279 // If the calling convention demands that the callee pops arguments from the
280 // stack, we want to add it back if we have a reserved call frame.
281 assert(CalleePopAmount < 0xffffff && "call frame too large");
282 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
288 static bool ShouldSignReturnAddress(MachineFunction &MF) {
289 // The function should be signed in the following situations:
290 // - sign-return-address=all
291 // - sign-return-address=non-leaf and the functions spills the LR
293 const Function &F = MF.getFunction();
294 if (!F.hasFnAttribute("sign-return-address"))
297 StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
298 if (Scope.equals("none"))
301 if (Scope.equals("all"))
304 assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
306 for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
307 if (Info.getReg() == AArch64::LR)
313 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
314 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
315 MachineFunction &MF = *MBB.getParent();
316 MachineFrameInfo &MFI = MF.getFrameInfo();
317 const TargetSubtargetInfo &STI = MF.getSubtarget();
318 const MCRegisterInfo *MRI = STI.getRegisterInfo();
319 const TargetInstrInfo *TII = STI.getInstrInfo();
320 DebugLoc DL = MBB.findDebugLoc(MBBI);
322 // Add callee saved registers to move list.
323 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
327 for (const auto &Info : CSI) {
328 unsigned Reg = Info.getReg();
330 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
331 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
332 unsigned CFIIndex = MF.addFrameInst(
333 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
334 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
335 .addCFIIndex(CFIIndex)
336 .setMIFlags(MachineInstr::FrameSetup);
340 // Find a scratch register that we can use at the start of the prologue to
341 // re-align the stack pointer. We avoid using callee-save registers since they
342 // may appear to be free when this is called from canUseAsPrologue (during
343 // shrink wrapping), but then no longer be free when this is called from
346 // FIXME: This is a bit conservative, since in the above case we could use one
347 // of the callee-save registers as a scratch temp to re-align the stack pointer,
348 // but we would then have to make sure that we were in fact saving at least one
349 // callee-save register in the prologue, which is additional complexity that
350 // doesn't seem worth the benefit.
351 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
352 MachineFunction *MF = MBB->getParent();
354 // If MBB is an entry block, use X9 as the scratch register
355 if (&MF->front() == MBB)
358 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
359 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
360 LivePhysRegs LiveRegs(TRI);
361 LiveRegs.addLiveIns(*MBB);
363 // Mark callee saved registers as used so we will not choose them.
364 const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
365 for (unsigned i = 0; CSRegs[i]; ++i)
366 LiveRegs.addReg(CSRegs[i]);
368 // Prefer X9 since it was historically used for the prologue scratch reg.
369 const MachineRegisterInfo &MRI = MF->getRegInfo();
370 if (LiveRegs.available(MRI, AArch64::X9))
373 for (unsigned Reg : AArch64::GPR64RegClass) {
374 if (LiveRegs.available(MRI, Reg))
377 return AArch64::NoRegister;
380 bool AArch64FrameLowering::canUseAsPrologue(
381 const MachineBasicBlock &MBB) const {
382 const MachineFunction *MF = MBB.getParent();
383 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
384 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
385 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
387 // Don't need a scratch register if we're not going to re-align the stack.
388 if (!RegInfo->needsStackRealignment(*MF))
390 // Otherwise, we can use any block as long as it has a scratch register
392 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
395 static bool windowsRequiresStackProbe(MachineFunction &MF,
396 unsigned StackSizeInBytes) {
397 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
398 if (!Subtarget.isTargetWindows())
400 const Function &F = MF.getFunction();
401 // TODO: When implementing stack protectors, take that into account
402 // for the probe threshold.
403 unsigned StackProbeSize = 4096;
404 if (F.hasFnAttribute("stack-probe-size"))
405 F.getFnAttribute("stack-probe-size")
407 .getAsInteger(0, StackProbeSize);
408 return (StackSizeInBytes >= StackProbeSize) &&
409 !F.hasFnAttribute("no-stack-arg-probe");
412 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
413 MachineFunction &MF, unsigned StackBumpBytes) const {
414 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
415 const MachineFrameInfo &MFI = MF.getFrameInfo();
416 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
417 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
419 if (AFI->getLocalStackSize() == 0)
422 // 512 is the maximum immediate for stp/ldp that will be used for
423 // callee-save save/restores
424 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
427 if (MFI.hasVarSizedObjects())
430 if (RegInfo->needsStackRealignment(MF))
433 // This isn't strictly necessary, but it simplifies things a bit since the
434 // current RedZone handling code assumes the SP is adjusted by the
435 // callee-save save/restore code.
436 if (canUseRedZone(MF))
442 // Given a load or a store instruction, generate an appropriate unwinding SEH
444 static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
445 const TargetInstrInfo &TII,
446 MachineInstr::MIFlag Flag) {
447 unsigned Opc = MBBI->getOpcode();
448 MachineBasicBlock *MBB = MBBI->getParent();
449 MachineFunction &MF = *MBB->getParent();
450 DebugLoc DL = MBBI->getDebugLoc();
451 unsigned ImmIdx = MBBI->getNumOperands() - 1;
452 int Imm = MBBI->getOperand(ImmIdx).getImm();
453 MachineInstrBuilder MIB;
454 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
455 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
459 llvm_unreachable("No SEH Opcode for this instruction");
460 case AArch64::LDPDpost:
463 case AArch64::STPDpre: {
464 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
465 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
466 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
473 case AArch64::LDPXpost:
476 case AArch64::STPXpre: {
477 unsigned Reg0 = MBBI->getOperand(1).getReg();
478 unsigned Reg1 = MBBI->getOperand(2).getReg();
479 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
480 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
484 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
485 .addImm(RegInfo->getSEHRegNum(Reg0))
486 .addImm(RegInfo->getSEHRegNum(Reg1))
491 case AArch64::LDRDpost:
494 case AArch64::STRDpre: {
495 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
496 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
502 case AArch64::LDRXpost:
505 case AArch64::STRXpre: {
506 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
507 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
514 case AArch64::LDPDi: {
515 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
516 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
517 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
525 case AArch64::LDPXi: {
526 unsigned Reg0 = MBBI->getOperand(0).getReg();
527 unsigned Reg1 = MBBI->getOperand(1).getReg();
528 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
529 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
533 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
534 .addImm(RegInfo->getSEHRegNum(Reg0))
535 .addImm(RegInfo->getSEHRegNum(Reg1))
540 case AArch64::STRXui:
541 case AArch64::LDRXui: {
542 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
543 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
549 case AArch64::STRDui:
550 case AArch64::LDRDui: {
551 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
552 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
559 auto I = MBB->insertAfter(MBBI, MIB);
563 // Fix up the SEH opcode associated with the save/restore instruction.
564 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
565 unsigned LocalStackSize) {
566 MachineOperand *ImmOpnd = nullptr;
567 unsigned ImmIdx = MBBI->getNumOperands() - 1;
568 switch (MBBI->getOpcode()) {
570 llvm_unreachable("Fix the offset in the SEH instruction");
571 case AArch64::SEH_SaveFPLR:
572 case AArch64::SEH_SaveRegP:
573 case AArch64::SEH_SaveReg:
574 case AArch64::SEH_SaveFRegP:
575 case AArch64::SEH_SaveFReg:
576 ImmOpnd = &MBBI->getOperand(ImmIdx);
580 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
583 // Convert callee-save register save/restore instruction to do stack pointer
584 // decrement/increment to allocate/deallocate the callee-save stack area by
585 // converting store/load to use pre/post increment version.
586 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
587 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
588 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
589 bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
590 // Ignore instructions that do not operate on SP, i.e. shadow call stack
591 // instructions and associated CFI instruction.
592 while (MBBI->getOpcode() == AArch64::STRXpost ||
593 MBBI->getOpcode() == AArch64::LDRXpre ||
594 MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
595 if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
596 assert(MBBI->getOperand(0).getReg() != AArch64::SP);
601 switch (MBBI->getOpcode()) {
603 llvm_unreachable("Unexpected callee-save save/restore opcode!");
605 NewOpc = AArch64::STPXpre;
609 NewOpc = AArch64::STPDpre;
613 NewOpc = AArch64::STPQpre;
616 case AArch64::STRXui:
617 NewOpc = AArch64::STRXpre;
619 case AArch64::STRDui:
620 NewOpc = AArch64::STRDpre;
622 case AArch64::STRQui:
623 NewOpc = AArch64::STRQpre;
626 NewOpc = AArch64::LDPXpost;
630 NewOpc = AArch64::LDPDpost;
634 NewOpc = AArch64::LDPQpost;
637 case AArch64::LDRXui:
638 NewOpc = AArch64::LDRXpost;
640 case AArch64::LDRDui:
641 NewOpc = AArch64::LDRDpost;
643 case AArch64::LDRQui:
644 NewOpc = AArch64::LDRQpost;
647 // Get rid of the SEH code associated with the old instruction.
649 auto SEH = std::next(MBBI);
650 if (AArch64InstrInfo::isSEHInstruction(*SEH))
651 SEH->eraseFromParent();
654 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
655 MIB.addReg(AArch64::SP, RegState::Define);
657 // Copy all operands other than the immediate offset.
658 unsigned OpndIdx = 0;
659 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
661 MIB.add(MBBI->getOperand(OpndIdx));
663 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
664 "Unexpected immediate offset in first/last callee-save save/restore "
666 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
667 "Unexpected base register in callee-save save/restore instruction!");
668 assert(CSStackSizeInc % Scale == 0);
669 MIB.addImm(CSStackSizeInc / Scale);
671 MIB.setMIFlags(MBBI->getFlags());
672 MIB.setMemRefs(MBBI->memoperands());
674 // Generate a new SEH code that corresponds to the new instruction.
677 InsertSEH(*MIB, *TII,
678 InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
681 return std::prev(MBB.erase(MBBI));
684 // Fixup callee-save register save/restore instructions to take into account
685 // combined SP bump by adding the local stack size to the stack offsets.
686 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
687 unsigned LocalStackSize,
690 if (AArch64InstrInfo::isSEHInstruction(MI))
693 unsigned Opc = MI.getOpcode();
695 // Ignore instructions that do not operate on SP, i.e. shadow call stack
696 // instructions and associated CFI instruction.
697 if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
698 Opc == AArch64::CFI_INSTRUCTION) {
699 if (Opc != AArch64::CFI_INSTRUCTION)
700 assert(MI.getOperand(0).getReg() != AArch64::SP);
707 case AArch64::STRXui:
709 case AArch64::STRDui:
711 case AArch64::LDRXui:
713 case AArch64::LDRDui:
717 case AArch64::STRQui:
719 case AArch64::LDRQui:
723 llvm_unreachable("Unexpected callee-save save/restore opcode!");
726 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
727 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
728 "Unexpected base register in callee-save save/restore instruction!");
729 // Last operand is immediate offset that needs fixing.
730 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
731 // All generated opcodes have scaled offsets.
732 assert(LocalStackSize % Scale == 0);
733 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
737 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
738 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
739 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
740 "Expecting a SEH instruction");
741 fixupSEHOpcode(MBBI, LocalStackSize);
745 static void adaptForLdStOpt(MachineBasicBlock &MBB,
746 MachineBasicBlock::iterator FirstSPPopI,
747 MachineBasicBlock::iterator LastPopI) {
748 // Sometimes (when we restore in the same order as we save), we can end up
749 // with code like this:
751 // ldp x26, x25, [sp]
752 // ldp x24, x23, [sp, #16]
753 // ldp x22, x21, [sp, #32]
754 // ldp x20, x19, [sp, #48]
757 // In this case, it is always better to put the first ldp at the end, so
758 // that the load-store optimizer can run and merge the ldp and the add into
760 // If we managed to grab the first pop instruction, move it to the end.
761 if (ReverseCSRRestoreSeq)
762 MBB.splice(FirstSPPopI, &MBB, LastPopI);
763 // We should end up with something like this now:
765 // ldp x24, x23, [sp, #16]
766 // ldp x22, x21, [sp, #32]
767 // ldp x20, x19, [sp, #48]
768 // ldp x26, x25, [sp]
771 // and the load-store optimizer can merge the last two instructions into:
773 // ldp x26, x25, [sp], #64
777 static bool ShouldSignWithAKey(MachineFunction &MF) {
778 const Function &F = MF.getFunction();
779 if (!F.hasFnAttribute("sign-return-address-key"))
782 const StringRef Key =
783 F.getFnAttribute("sign-return-address-key").getValueAsString();
784 assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
785 return Key.equals_lower("a_key");
788 static bool needsWinCFI(const MachineFunction &MF) {
789 const Function &F = MF.getFunction();
790 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
791 F.needsUnwindTableEntry();
794 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
795 MachineBasicBlock &MBB) const {
796 MachineBasicBlock::iterator MBBI = MBB.begin();
797 const MachineFrameInfo &MFI = MF.getFrameInfo();
798 const Function &F = MF.getFunction();
799 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
800 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
801 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
802 MachineModuleInfo &MMI = MF.getMMI();
803 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
804 bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
805 !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
806 bool HasFP = hasFP(MF);
807 bool NeedsWinCFI = needsWinCFI(MF);
808 bool HasWinCFI = false;
809 auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
811 bool IsFunclet = MBB.isEHFuncletEntry();
813 // At this point, we're going to decide whether or not the function uses a
814 // redzone. In most cases, the function doesn't have a redzone so let's
815 // assume that's false and set it to true in the case that there's a redzone.
816 AFI->setHasRedZone(false);
818 // Debug location must be unknown since the first debug location is used
819 // to determine the end of the prologue.
822 if (ShouldSignReturnAddress(MF)) {
823 if (ShouldSignWithAKey(MF))
824 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
825 .setMIFlag(MachineInstr::FrameSetup);
827 BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
828 .setMIFlag(MachineInstr::FrameSetup);
829 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
830 .setMIFlag(MachineInstr::FrameSetup);
834 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
835 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
836 .addCFIIndex(CFIIndex)
837 .setMIFlags(MachineInstr::FrameSetup);
840 // All calls are tail calls in GHC calling conv, and functions have no
841 // prologue/epilogue.
842 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
845 // Set tagged base pointer to the bottom of the stack frame.
846 // Ideally it should match SP value after prologue.
847 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
849 // getStackSize() includes all the locals in its size calculation. We don't
850 // include these locals when computing the stack size of a funclet, as they
851 // are allocated in the parent's stack frame and accessed via the frame
852 // pointer from the funclet. We only save the callee saved registers in the
853 // funclet, which are really the callee saved registers of the parent
854 // function, including the funclet.
855 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
856 : (int)MFI.getStackSize();
857 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
858 assert(!HasFP && "unexpected function without stack frame but with FP");
859 // All of the stack allocation is for locals.
860 AFI->setLocalStackSize(NumBytes);
863 // REDZONE: If the stack size is less than 128 bytes, we don't need
864 // to actually allocate.
865 if (canUseRedZone(MF)) {
866 AFI->setHasRedZone(true);
867 ++NumRedZoneFunctions;
869 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
870 MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
872 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
873 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
874 // Encode the stack size of the leaf function.
875 unsigned CFIIndex = MF.addFrameInst(
876 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
877 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
878 .addCFIIndex(CFIIndex)
879 .setMIFlags(MachineInstr::FrameSetup);
885 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
886 .setMIFlag(MachineInstr::FrameSetup);
893 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
894 // Var args are accounted for in the containing function, so don't
895 // include them for funclets.
896 unsigned FixedObject = (IsWin64 && !IsFunclet) ?
897 alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
899 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
900 // All of the remaining stack allocations are for locals.
901 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
902 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
904 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
905 MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
907 } else if (PrologueSaveSize != 0) {
908 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
909 MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
910 NumBytes -= PrologueSaveSize;
912 assert(NumBytes >= 0 && "Negative stack allocation size!?");
914 // Move past the saves of the callee-saved registers, fixing up the offsets
915 // and pre-inc if we decided to combine the callee-save and local stack
916 // pointer bump above.
917 MachineBasicBlock::iterator End = MBB.end();
918 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
920 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
921 NeedsWinCFI, &HasWinCFI);
925 // The code below is not applicable to funclets. We have emitted all the SEH
926 // opcodes that we needed to emit. The FP and BP belong to the containing
931 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
932 .setMIFlag(MachineInstr::FrameSetup);
935 // SEH funclets are passed the frame pointer in X1. If the parent
936 // function uses the base register, then the base register is used
937 // directly, and is not retrieved from X1.
938 if (F.hasPersonalityFn()) {
939 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
940 if (isAsynchronousEHPersonality(Per)) {
941 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
942 .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
943 MBB.addLiveIn(AArch64::X1);
951 // Only set up FP if we actually need to. Frame pointer is fp =
952 // sp - fixedobject - 16.
953 int FPOffset = AFI->getCalleeSavedStackSize() - 16;
955 FPOffset += AFI->getLocalStackSize();
957 // Issue sub fp, sp, FPOffset or
958 // mov fp,sp when FPOffset is zero.
959 // Note: All stores of callee-saved registers are marked as "FrameSetup".
960 // This code marks the instruction(s) that set the FP also.
961 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
962 MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
965 if (windowsRequiresStackProbe(MF, NumBytes)) {
966 uint32_t NumWords = NumBytes >> 4;
969 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
970 // exceed this amount. We need to move at most 2^24 - 1 into x15.
971 // This is at most two instructions, MOVZ follwed by MOVK.
972 // TODO: Fix to use multiple stack alloc unwind codes for stacks
973 // exceeding 256MB in size.
974 if (NumBytes >= (1 << 28))
975 report_fatal_error("Stack size cannot exceed 256MB for stack "
976 "unwinding purposes");
978 uint32_t LowNumWords = NumWords & 0xFFFF;
979 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
981 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
982 .setMIFlag(MachineInstr::FrameSetup);
983 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
984 .setMIFlag(MachineInstr::FrameSetup);
985 if ((NumWords & 0xFFFF0000) != 0) {
986 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
987 .addReg(AArch64::X15)
988 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
989 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
990 .setMIFlag(MachineInstr::FrameSetup);
991 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
992 .setMIFlag(MachineInstr::FrameSetup);
995 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
997 .setMIFlags(MachineInstr::FrameSetup);
1000 switch (MF.getTarget().getCodeModel()) {
1001 case CodeModel::Tiny:
1002 case CodeModel::Small:
1003 case CodeModel::Medium:
1004 case CodeModel::Kernel:
1005 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1006 .addExternalSymbol("__chkstk")
1007 .addReg(AArch64::X15, RegState::Implicit)
1008 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1009 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1010 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1011 .setMIFlags(MachineInstr::FrameSetup);
1014 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1015 .setMIFlag(MachineInstr::FrameSetup);
1018 case CodeModel::Large:
1019 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1020 .addReg(AArch64::X16, RegState::Define)
1021 .addExternalSymbol("__chkstk")
1022 .addExternalSymbol("__chkstk")
1023 .setMIFlags(MachineInstr::FrameSetup);
1026 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1027 .setMIFlag(MachineInstr::FrameSetup);
1030 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1031 .addReg(AArch64::X16, RegState::Kill)
1032 .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1033 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1034 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1035 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1036 .setMIFlags(MachineInstr::FrameSetup);
1039 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1040 .setMIFlag(MachineInstr::FrameSetup);
1045 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1046 .addReg(AArch64::SP, RegState::Kill)
1047 .addReg(AArch64::X15, RegState::Kill)
1048 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1049 .setMIFlags(MachineInstr::FrameSetup);
1052 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1054 .setMIFlag(MachineInstr::FrameSetup);
1059 // Allocate space for the rest of the frame.
1061 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1062 unsigned scratchSPReg = AArch64::SP;
1064 if (NeedsRealignment) {
1065 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1066 assert(scratchSPReg != AArch64::NoRegister);
1069 // If we're a leaf function, try using the red zone.
1070 if (!canUseRedZone(MF))
1071 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1072 // the correct value here, as NumBytes also includes padding bytes,
1073 // which shouldn't be counted here.
1074 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
1075 MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1077 if (NeedsRealignment) {
1078 const unsigned Alignment = MFI.getMaxAlignment();
1079 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1080 assert(NrBitsToZero > 1);
1081 assert(scratchSPReg != AArch64::SP);
1083 // SUB X9, SP, NumBytes
1084 // -- X9 is temporary register, so shouldn't contain any live data here,
1085 // -- free to use. This is already produced by emitFrameOffset above.
1086 // AND SP, X9, 0b11111...0000
1087 // The logical immediates have a non-trivial encoding. The following
1088 // formula computes the encoded immediate with all ones but
1089 // NrBitsToZero zero bits as least significant bits.
1090 uint32_t andMaskEncoded = (1 << 12) // = N
1091 | ((64 - NrBitsToZero) << 6) // immr
1092 | ((64 - NrBitsToZero - 1) << 0); // imms
1094 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1095 .addReg(scratchSPReg, RegState::Kill)
1096 .addImm(andMaskEncoded);
1097 AFI->setStackRealigned(true);
1100 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1101 .addImm(NumBytes & andMaskEncoded)
1102 .setMIFlag(MachineInstr::FrameSetup);
1107 // If we need a base pointer, set it up here. It's whatever the value of the
1108 // stack pointer is at this point. Any variable size objects will be allocated
1109 // after this, so we can still use the base pointer to reference locals.
1111 // FIXME: Clarify FrameSetup flags here.
1112 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1114 if (RegInfo->hasBasePointer(MF)) {
1115 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1119 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1120 .setMIFlag(MachineInstr::FrameSetup);
1124 // The very last FrameSetup instruction indicates the end of prologue. Emit a
1125 // SEH opcode indicating the prologue end.
1126 if (NeedsWinCFI && HasWinCFI) {
1127 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1128 .setMIFlag(MachineInstr::FrameSetup);
1131 if (needsFrameMoves) {
1132 const DataLayout &TD = MF.getDataLayout();
1133 const int StackGrowth = -TD.getPointerSize(0);
1134 unsigned FramePtr = RegInfo->getFrameRegister(MF);
1135 // An example of the prologue:
1142 // .cfi_personality 155, ___gxx_personality_v0
1144 // .cfi_lsda 16, Lexception33
1146 // stp xa,bx, [sp, -#offset]!
1148 // stp x28, x27, [sp, #offset-32]
1149 // stp fp, lr, [sp, #offset-16]
1150 // add fp, sp, #offset - 16
1151 // sub sp, sp, #1360
1154 // +-------------------------------------------+
1155 // 10000 | ........ | ........ | ........ | ........ |
1156 // 10004 | ........ | ........ | ........ | ........ |
1157 // +-------------------------------------------+
1158 // 10008 | ........ | ........ | ........ | ........ |
1159 // 1000c | ........ | ........ | ........ | ........ |
1160 // +===========================================+
1161 // 10010 | X28 Register |
1162 // 10014 | X28 Register |
1163 // +-------------------------------------------+
1164 // 10018 | X27 Register |
1165 // 1001c | X27 Register |
1166 // +===========================================+
1167 // 10020 | Frame Pointer |
1168 // 10024 | Frame Pointer |
1169 // +-------------------------------------------+
1170 // 10028 | Link Register |
1171 // 1002c | Link Register |
1172 // +===========================================+
1173 // 10030 | ........ | ........ | ........ | ........ |
1174 // 10034 | ........ | ........ | ........ | ........ |
1175 // +-------------------------------------------+
1176 // 10038 | ........ | ........ | ........ | ........ |
1177 // 1003c | ........ | ........ | ........ | ........ |
1178 // +-------------------------------------------+
1180 // [sp] = 10030 :: >>initial value<<
1181 // sp = 10020 :: stp fp, lr, [sp, #-16]!
1182 // fp = sp == 10020 :: mov fp, sp
1183 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
1184 // sp == 10010 :: >>final value<<
1186 // The frame pointer (w29) points to address 10020. If we use an offset of
1187 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1188 // for w27, and -32 for w28:
1191 // .cfi_def_cfa w29, 16
1193 // .cfi_offset w30, -8
1195 // .cfi_offset w29, -16
1197 // .cfi_offset w27, -24
1199 // .cfi_offset w28, -32
1202 // Define the current CFA rule to use the provided FP.
1203 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1204 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1205 nullptr, Reg, 2 * StackGrowth - FixedObject));
1206 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1207 .addCFIIndex(CFIIndex)
1208 .setMIFlags(MachineInstr::FrameSetup);
1210 // Encode the stack size of the leaf function.
1211 unsigned CFIIndex = MF.addFrameInst(
1212 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1213 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1214 .addCFIIndex(CFIIndex)
1215 .setMIFlags(MachineInstr::FrameSetup);
1218 // Now emit the moves for whatever callee saved regs we have (including FP,
1219 // LR if those are saved).
1220 emitCalleeSavedFrameMoves(MBB, MBBI);
1224 static void InsertReturnAddressAuth(MachineFunction &MF,
1225 MachineBasicBlock &MBB) {
1226 if (!ShouldSignReturnAddress(MF))
1228 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1229 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1231 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1233 if (MBBI != MBB.end())
1234 DL = MBBI->getDebugLoc();
1236 // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1237 // this instruction can safely used for any v8a architecture.
1238 // From v8.3a onwards there are optimised authenticate LR and return
1239 // instructions, namely RETA{A,B}, that can be used instead.
1240 if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
1241 MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1242 BuildMI(MBB, MBBI, DL,
1243 TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
1244 .copyImplicitOps(*MBBI);
1249 TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
1250 .setMIFlag(MachineInstr::FrameDestroy);
1254 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1255 switch (MI.getOpcode()) {
1258 case AArch64::CATCHRET:
1259 case AArch64::CLEANUPRET:
1264 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1265 MachineBasicBlock &MBB) const {
1266 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1267 MachineFrameInfo &MFI = MF.getFrameInfo();
1268 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1269 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1271 bool IsTailCallReturn = false;
1272 bool NeedsWinCFI = needsWinCFI(MF);
1273 bool HasWinCFI = false;
1274 bool IsFunclet = false;
1275 auto WinCFI = make_scope_exit([&]() {
1276 if (!MF.hasWinCFI())
1277 MF.setHasWinCFI(HasWinCFI);
1280 if (MBB.end() != MBBI) {
1281 DL = MBBI->getDebugLoc();
1282 unsigned RetOpcode = MBBI->getOpcode();
1283 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1284 RetOpcode == AArch64::TCRETURNri ||
1285 RetOpcode == AArch64::TCRETURNriBTI;
1286 IsFunclet = isFuncletReturnInstr(*MBBI);
1289 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
1290 : MFI.getStackSize();
1291 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1293 // All calls are tail calls in GHC calling conv, and functions have no
1294 // prologue/epilogue.
1295 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1298 // Initial and residual are named for consistency with the prologue. Note that
1299 // in the epilogue, the residual adjustment is executed first.
1300 uint64_t ArgumentPopSize = 0;
1301 if (IsTailCallReturn) {
1302 MachineOperand &StackAdjust = MBBI->getOperand(1);
1304 // For a tail-call in a callee-pops-arguments environment, some or all of
1305 // the stack may actually be in use for the call's arguments, this is
1306 // calculated during LowerCall and consumed here...
1307 ArgumentPopSize = StackAdjust.getImm();
1309 // ... otherwise the amount to pop is *all* of the argument space,
1310 // conveniently stored in the MachineFunctionInfo by
1311 // LowerFormalArguments. This will, of course, be zero for the C calling
1313 ArgumentPopSize = AFI->getArgumentStackToRestore();
1316 // The stack frame should be like below,
1318 // ---------------------- ---
1320 // | BytesInStackArgArea| CalleeArgStackSize
1321 // | (NumReusableBytes) | (of tail call)
1324 // ---------------------| --- |
1326 // | CalleeSavedReg | | |
1327 // | (CalleeSavedStackSize)| | |
1329 // ---------------------| | NumBytes
1330 // | | StackSize (StackAdjustUp)
1331 // | LocalStackSize | | |
1332 // | (covering callee | | |
1335 // ---------------------- --- ---
1337 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1338 // = StackSize + ArgumentPopSize
1340 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1341 // it as the 2nd argument of AArch64ISD::TC_RETURN.
1343 auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1346 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1347 // Var args are accounted for in the containing function, so don't
1348 // include them for funclets.
1349 unsigned FixedObject =
1350 (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1352 uint64_t AfterCSRPopSize = ArgumentPopSize;
1353 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1354 // We cannot rely on the local stack size set in emitPrologue if the function
1355 // has funclets, as funclets have different local stack size requirements, and
1356 // the current value set in emitPrologue may be that of the containing
1358 if (MF.hasEHFunclets())
1359 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1360 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1361 // Assume we can't combine the last pop with the sp restore.
1363 if (!CombineSPBump && PrologueSaveSize != 0) {
1364 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1365 while (AArch64InstrInfo::isSEHInstruction(*Pop))
1366 Pop = std::prev(Pop);
1367 // Converting the last ldp to a post-index ldp is valid only if the last
1368 // ldp's offset is 0.
1369 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1370 // If the offset is 0, convert it to a post-index ldp.
1371 if (OffsetOp.getImm() == 0)
1372 convertCalleeSaveRestoreToSPPrePostIncDec(
1373 MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1375 // If not, make sure to emit an add after the last ldp.
1376 // We're doing this by transfering the size to be restored from the
1377 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1379 AfterCSRPopSize += PrologueSaveSize;
1383 // Move past the restores of the callee-saved registers.
1384 // If we plan on combining the sp bump of the local stack size and the callee
1385 // save stack size, we might need to adjust the CSR save and restore offsets.
1386 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1387 MachineBasicBlock::iterator Begin = MBB.begin();
1388 while (LastPopI != Begin) {
1390 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1393 } else if (CombineSPBump)
1394 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1395 NeedsWinCFI, &HasWinCFI);
1400 BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1401 .setMIFlag(MachineInstr::FrameDestroy);
1404 // If there is a single SP update, insert it before the ret and we're done.
1405 if (CombineSPBump) {
1406 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1407 NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
1408 false, NeedsWinCFI, &HasWinCFI);
1409 if (NeedsWinCFI && HasWinCFI)
1410 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1411 TII->get(AArch64::SEH_EpilogEnd))
1412 .setMIFlag(MachineInstr::FrameDestroy);
1416 NumBytes -= PrologueSaveSize;
1417 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1420 bool RedZone = canUseRedZone(MF);
1421 // If this was a redzone leaf function, we don't need to restore the
1422 // stack pointer (but we may need to pop stack args for fastcc).
1423 if (RedZone && AfterCSRPopSize == 0)
1426 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1427 int StackRestoreBytes = RedZone ? 0 : NumBytes;
1428 if (NoCalleeSaveRestore)
1429 StackRestoreBytes += AfterCSRPopSize;
1431 // If we were able to combine the local stack pop with the argument pop,
1433 bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1435 // If we're done after this, make sure to help the load store optimizer.
1437 adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1439 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1440 StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
1441 NeedsWinCFI, &HasWinCFI);
1445 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1446 TII->get(AArch64::SEH_EpilogEnd))
1447 .setMIFlag(MachineInstr::FrameDestroy);
1455 // Restore the original stack pointer.
1456 // FIXME: Rather than doing the math here, we should instead just use
1457 // non-post-indexed loads for the restores if we aren't actually going to
1458 // be able to save any instructions.
1459 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
1460 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1461 -AFI->getCalleeSavedStackSize() + 16, TII,
1462 MachineInstr::FrameDestroy, false, NeedsWinCFI);
1464 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
1465 MachineInstr::FrameDestroy, false, NeedsWinCFI);
1467 // This must be placed after the callee-save restore code because that code
1468 // assumes the SP is at the same location as it was after the callee-save save
1469 // code in the prologue.
1470 if (AfterCSRPopSize) {
1471 // Find an insertion point for the first ldp so that it goes before the
1472 // shadow call stack epilog instruction. This ensures that the restore of
1473 // lr from x18 is placed after the restore from sp.
1474 auto FirstSPPopI = MBB.getFirstTerminator();
1475 while (FirstSPPopI != Begin) {
1476 auto Prev = std::prev(FirstSPPopI);
1477 if (Prev->getOpcode() != AArch64::LDRXpre ||
1478 Prev->getOperand(0).getReg() == AArch64::SP)
1483 adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1485 emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1486 AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
1487 NeedsWinCFI, &HasWinCFI);
1489 if (NeedsWinCFI && HasWinCFI)
1490 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1491 .setMIFlag(MachineInstr::FrameDestroy);
1493 MF.setHasWinCFI(HasWinCFI);
1496 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1497 /// debug info. It's the same as what we use for resolving the code-gen
1498 /// references for now. FIXME: This can go wrong when references are
1499 /// SP-relative and simple call frames aren't used.
1500 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1502 unsigned &FrameReg) const {
1503 return resolveFrameIndexReference(
1506 MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1510 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1511 const MachineFunction &MF, int FI) const {
1512 return getSEHFrameIndexOffset(MF, FI);
1515 static int getFPOffset(const MachineFunction &MF, int ObjectOffset) {
1516 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1517 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1519 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1520 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1521 return ObjectOffset + FixedObject + 16;
1524 static int getStackOffset(const MachineFunction &MF, int ObjectOffset) {
1525 const auto &MFI = MF.getFrameInfo();
1526 return ObjectOffset + MFI.getStackSize();
1529 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1531 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1532 MF.getSubtarget().getRegisterInfo());
1533 int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1534 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1535 ? getFPOffset(MF, ObjectOffset)
1536 : getStackOffset(MF, ObjectOffset);
1539 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
1540 int FI, unsigned &FrameReg,
1542 bool ForSimm) const {
1543 const auto &MFI = MF.getFrameInfo();
1544 int ObjectOffset = MFI.getObjectOffset(FI);
1545 bool isFixed = MFI.isFixedObjectIndex(FI);
1546 return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
1550 int AArch64FrameLowering::resolveFrameOffsetReference(
1551 const MachineFunction &MF, int ObjectOffset, bool isFixed,
1552 unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
1553 const auto &MFI = MF.getFrameInfo();
1554 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1555 MF.getSubtarget().getRegisterInfo());
1556 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1557 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1559 int FPOffset = getFPOffset(MF, ObjectOffset);
1560 int Offset = getStackOffset(MF, ObjectOffset);
1562 !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
1564 // Use frame pointer to reference fixed objects. Use it for locals if
1565 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1566 // reliable as a base). Make sure useFPForScavengingIndex() does the
1567 // right thing for the emergency spill slot.
1569 if (AFI->hasStackFrame()) {
1570 // Note: Keeping the following as multiple 'if' statements rather than
1571 // merging to a single expression for readability.
1573 // Argument access should always use the FP.
1576 } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
1577 // References to the CSR area must use FP if we're re-aligning the stack
1578 // since the dynamically-sized alignment padding is between the SP/BP and
1580 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1582 } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
1583 // If the FPOffset is negative and we're producing a signed immediate, we
1584 // have to keep in mind that the available offset range for negative
1585 // offsets is smaller than for positive ones. If an offset is available
1586 // via the FP and the SP, use whichever is closest.
1587 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1588 PreferFP |= Offset > -FPOffset;
1590 if (MFI.hasVarSizedObjects()) {
1591 // If we have variable sized objects, we can use either FP or BP, as the
1592 // SP offset is unknown. We can use the base pointer if we have one and
1593 // FP is not preferred. If not, we're stuck with using FP.
1594 bool CanUseBP = RegInfo->hasBasePointer(MF);
1595 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
1597 else if (!CanUseBP) // Can't use BP. Forced to use FP.
1599 // else we can use BP and FP, but the offset from FP won't fit.
1600 // That will make us scavenge registers which we can probably avoid by
1601 // using BP. If it won't fit for BP either, we'll scavenge anyway.
1602 } else if (FPOffset >= 0) {
1603 // Use SP or FP, whichever gives us the best chance of the offset
1604 // being in range for direct access. If the FPOffset is positive,
1605 // that'll always be best, as the SP will be even further away.
1607 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1608 // Funclets access the locals contained in the parent's stack frame
1609 // via the frame pointer, so we have to use the FP in the parent
1613 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1614 "Funclets should only be present on Win64");
1617 // We have the choice between FP and (SP or BP).
1618 if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
1624 assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1625 "In the presence of dynamic stack pointer realignment, "
1626 "non-argument/CSR objects cannot be accessed through the frame pointer");
1629 FrameReg = RegInfo->getFrameRegister(MF);
1633 // Use the base pointer if we have one.
1634 if (RegInfo->hasBasePointer(MF))
1635 FrameReg = RegInfo->getBaseRegister();
1637 assert(!MFI.hasVarSizedObjects() &&
1638 "Can't use SP when we have var sized objects.");
1639 FrameReg = AArch64::SP;
1640 // If we're using the red zone for this function, the SP won't actually
1641 // be adjusted, so the offsets will be negative. They're also all
1642 // within range of the signed 9-bit immediate instructions.
1643 if (canUseRedZone(MF))
1644 Offset -= AFI->getLocalStackSize();
1650 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1651 // Do not set a kill flag on values that are also marked as live-in. This
1652 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1653 // callee saved registers.
1654 // Omitting the kill flags is conservatively correct even if the live-in
1655 // is not used after all.
1656 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1657 return getKillRegState(!IsLiveIn);
1660 static bool produceCompactUnwindFrame(MachineFunction &MF) {
1661 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1662 AttributeList Attrs = MF.getFunction().getAttributes();
1663 return Subtarget.isTargetMachO() &&
1664 !(Subtarget.getTargetLowering()->supportSwiftError() &&
1665 Attrs.hasAttrSomewhere(Attribute::SwiftError));
1668 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1670 // If we are generating register pairs for a Windows function that requires
1671 // EH support, then pair consecutive registers only. There are no unwind
1672 // opcodes for saves/restores of non-consectuve register pairs.
1673 // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1674 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1676 // TODO: LR can be paired with any register. We don't support this yet in
1677 // the MCLayer. We need to add support for the save_lrpair unwind code.
1680 if (Reg2 == Reg1 + 1)
1687 struct RegPairInfo {
1688 unsigned Reg1 = AArch64::NoRegister;
1689 unsigned Reg2 = AArch64::NoRegister;
1692 enum RegType { GPR, FPR64, FPR128 } Type;
1694 RegPairInfo() = default;
1696 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1699 } // end anonymous namespace
1701 static void computeCalleeSaveRegisterPairs(
1702 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1703 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1704 bool &NeedShadowCallStackProlog) {
1709 bool NeedsWinCFI = needsWinCFI(MF);
1710 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1711 MachineFrameInfo &MFI = MF.getFrameInfo();
1712 CallingConv::ID CC = MF.getFunction().getCallingConv();
1713 unsigned Count = CSI.size();
1715 // MachO's compact unwind format relies on all registers being stored in
1717 assert((!produceCompactUnwindFrame(MF) ||
1718 CC == CallingConv::PreserveMost ||
1719 (Count & 1) == 0) &&
1720 "Odd number of callee-saved regs to spill!");
1721 int Offset = AFI->getCalleeSavedStackSize();
1722 // On Linux, we will have either one or zero non-paired register. On Windows
1723 // with CFI, we can have multiple unpaired registers in order to utilize the
1724 // available unwind codes. This flag assures that the alignment fixup is done
1725 // only once, as intened.
1726 bool FixupDone = false;
1727 for (unsigned i = 0; i < Count; ++i) {
1729 RPI.Reg1 = CSI[i].getReg();
1731 if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1732 RPI.Type = RegPairInfo::GPR;
1733 else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1734 RPI.Type = RegPairInfo::FPR64;
1735 else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1736 RPI.Type = RegPairInfo::FPR128;
1738 llvm_unreachable("Unsupported register class.");
1740 // Add the next reg to the pair if it is in the same register class.
1741 if (i + 1 < Count) {
1742 unsigned NextReg = CSI[i + 1].getReg();
1744 case RegPairInfo::GPR:
1745 if (AArch64::GPR64RegClass.contains(NextReg) &&
1746 !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1749 case RegPairInfo::FPR64:
1750 if (AArch64::FPR64RegClass.contains(NextReg) &&
1751 !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1754 case RegPairInfo::FPR128:
1755 if (AArch64::FPR128RegClass.contains(NextReg))
1761 // If either of the registers to be saved is the lr register, it means that
1762 // we also need to save lr in the shadow call stack.
1763 if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
1764 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1765 if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1766 report_fatal_error("Must reserve x18 to use shadow call stack");
1767 NeedShadowCallStackProlog = true;
1770 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1771 // list to come in sorted by frame index so that we can issue the store
1772 // pair instructions directly. Assert if we see anything otherwise.
1774 // The order of the registers in the list is controlled by
1775 // getCalleeSavedRegs(), so they will always be in-order, as well.
1776 assert((!RPI.isPaired() ||
1777 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1778 "Out of order callee saved regs!");
1780 // MachO's compact unwind format relies on all registers being stored in
1781 // adjacent register pairs.
1782 assert((!produceCompactUnwindFrame(MF) ||
1783 CC == CallingConv::PreserveMost ||
1785 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1786 RPI.Reg1 + 1 == RPI.Reg2))) &&
1787 "Callee-save registers not saved as adjacent register pair!");
1789 RPI.FrameIdx = CSI[i].getFrameIdx();
1791 int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
1792 Offset -= RPI.isPaired() ? 2 * Scale : Scale;
1794 // Round up size of non-pair to pair size if we need to pad the
1795 // callee-save area to ensure 16-byte alignment.
1796 if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
1797 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
1800 assert(Offset % 16 == 0);
1801 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1802 MFI.setObjectAlignment(RPI.FrameIdx, 16);
1805 assert(Offset % Scale == 0);
1806 RPI.Offset = Offset / Scale;
1807 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1808 "Offset out of bounds for LDP/STP immediate");
1810 RegPairs.push_back(RPI);
1816 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1817 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1818 const std::vector<CalleeSavedInfo> &CSI,
1819 const TargetRegisterInfo *TRI) const {
1820 MachineFunction &MF = *MBB.getParent();
1821 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1822 bool NeedsWinCFI = needsWinCFI(MF);
1824 SmallVector<RegPairInfo, 8> RegPairs;
1826 bool NeedShadowCallStackProlog = false;
1827 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1828 NeedShadowCallStackProlog);
1829 const MachineRegisterInfo &MRI = MF.getRegInfo();
1831 if (NeedShadowCallStackProlog) {
1832 // Shadow call stack prolog: str x30, [x18], #8
1833 BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1834 .addReg(AArch64::X18, RegState::Define)
1835 .addReg(AArch64::LR)
1836 .addReg(AArch64::X18)
1838 .setMIFlag(MachineInstr::FrameSetup);
1841 BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1842 .setMIFlag(MachineInstr::FrameSetup);
1844 if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1845 // Emit a CFI instruction that causes 8 to be subtracted from the value of
1846 // x18 when unwinding past this frame.
1847 static const char CFIInst[] = {
1848 dwarf::DW_CFA_val_expression,
1851 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1852 static_cast<char>(-8) & 0x7f, // addend (sleb128)
1854 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1855 nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1856 BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1857 .addCFIIndex(CFIIndex)
1858 .setMIFlag(MachineInstr::FrameSetup);
1861 // This instruction also makes x18 live-in to the entry block.
1862 MBB.addLiveIn(AArch64::X18);
1865 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1867 RegPairInfo RPI = *RPII;
1868 unsigned Reg1 = RPI.Reg1;
1869 unsigned Reg2 = RPI.Reg2;
1872 // Issue sequence of spills for cs regs. The first spill may be converted
1873 // to a pre-decrement store later by emitPrologue if the callee-save stack
1874 // area allocation can't be combined with the local stack area allocation.
1876 // stp x22, x21, [sp, #0] // addImm(+0)
1877 // stp x20, x19, [sp, #16] // addImm(+2)
1878 // stp fp, lr, [sp, #32] // addImm(+4)
1879 // Rationale: This sequence saves uop updates compared to a sequence of
1880 // pre-increment spills like stp xi,xj,[sp,#-16]!
1881 // Note: Similar rationale and sequence for restores in epilog.
1882 unsigned Size, Align;
1884 case RegPairInfo::GPR:
1885 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1889 case RegPairInfo::FPR64:
1890 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1894 case RegPairInfo::FPR128:
1895 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
1900 LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1901 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1902 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1903 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1906 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
1907 "Windows unwdinding requires a consecutive (FP,LR) pair");
1908 // Windows unwind codes require consecutive registers if registers are
1909 // paired. Make the switch here, so that the code below will save (x,x+1)
1911 unsigned FrameIdxReg1 = RPI.FrameIdx;
1912 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1913 if (NeedsWinCFI && RPI.isPaired()) {
1914 std::swap(Reg1, Reg2);
1915 std::swap(FrameIdxReg1, FrameIdxReg2);
1917 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1918 if (!MRI.isReserved(Reg1))
1919 MBB.addLiveIn(Reg1);
1920 if (RPI.isPaired()) {
1921 if (!MRI.isReserved(Reg2))
1922 MBB.addLiveIn(Reg2);
1923 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1924 MIB.addMemOperand(MF.getMachineMemOperand(
1925 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1926 MachineMemOperand::MOStore, Size, Align));
1928 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1929 .addReg(AArch64::SP)
1930 .addImm(RPI.Offset) // [sp, #offset*scale],
1931 // where factor*scale is implicit
1932 .setMIFlag(MachineInstr::FrameSetup);
1933 MIB.addMemOperand(MF.getMachineMemOperand(
1934 MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
1935 MachineMemOperand::MOStore, Size, Align));
1937 InsertSEH(MIB, TII, MachineInstr::FrameSetup);
1943 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1944 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1945 std::vector<CalleeSavedInfo> &CSI,
1946 const TargetRegisterInfo *TRI) const {
1947 MachineFunction &MF = *MBB.getParent();
1948 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1950 SmallVector<RegPairInfo, 8> RegPairs;
1951 bool NeedsWinCFI = needsWinCFI(MF);
1953 if (MI != MBB.end())
1954 DL = MI->getDebugLoc();
1956 bool NeedShadowCallStackProlog = false;
1957 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1958 NeedShadowCallStackProlog);
1960 auto EmitMI = [&](const RegPairInfo &RPI) {
1961 unsigned Reg1 = RPI.Reg1;
1962 unsigned Reg2 = RPI.Reg2;
1964 // Issue sequence of restores for cs regs. The last restore may be converted
1965 // to a post-increment load later by emitEpilogue if the callee-save stack
1966 // area allocation can't be combined with the local stack area allocation.
1968 // ldp fp, lr, [sp, #32] // addImm(+4)
1969 // ldp x20, x19, [sp, #16] // addImm(+2)
1970 // ldp x22, x21, [sp, #0] // addImm(+0)
1971 // Note: see comment in spillCalleeSavedRegisters()
1973 unsigned Size, Align;
1975 case RegPairInfo::GPR:
1976 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1980 case RegPairInfo::FPR64:
1981 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
1985 case RegPairInfo::FPR128:
1986 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
1991 LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
1992 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1993 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1994 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1997 // Windows unwind codes require consecutive registers if registers are
1998 // paired. Make the switch here, so that the code below will save (x,x+1)
2000 unsigned FrameIdxReg1 = RPI.FrameIdx;
2001 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2002 if (NeedsWinCFI && RPI.isPaired()) {
2003 std::swap(Reg1, Reg2);
2004 std::swap(FrameIdxReg1, FrameIdxReg2);
2006 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2007 if (RPI.isPaired()) {
2008 MIB.addReg(Reg2, getDefRegState(true));
2009 MIB.addMemOperand(MF.getMachineMemOperand(
2010 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2011 MachineMemOperand::MOLoad, Size, Align));
2013 MIB.addReg(Reg1, getDefRegState(true))
2014 .addReg(AArch64::SP)
2015 .addImm(RPI.Offset) // [sp, #offset*scale]
2016 // where factor*scale is implicit
2017 .setMIFlag(MachineInstr::FrameDestroy);
2018 MIB.addMemOperand(MF.getMachineMemOperand(
2019 MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2020 MachineMemOperand::MOLoad, Size, Align));
2022 InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2024 if (ReverseCSRRestoreSeq)
2025 for (const RegPairInfo &RPI : reverse(RegPairs))
2028 for (const RegPairInfo &RPI : RegPairs)
2031 if (NeedShadowCallStackProlog) {
2032 // Shadow call stack epilog: ldr x30, [x18, #-8]!
2033 BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2034 .addReg(AArch64::X18, RegState::Define)
2035 .addReg(AArch64::LR, RegState::Define)
2036 .addReg(AArch64::X18)
2038 .setMIFlag(MachineInstr::FrameDestroy);
2044 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2045 BitVector &SavedRegs,
2046 RegScavenger *RS) const {
2047 // All calls are tail calls in GHC calling conv, and functions have no
2048 // prologue/epilogue.
2049 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2052 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2053 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2054 MF.getSubtarget().getRegisterInfo());
2055 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2056 unsigned UnspilledCSGPR = AArch64::NoRegister;
2057 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2059 MachineFrameInfo &MFI = MF.getFrameInfo();
2060 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2062 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2063 ? RegInfo->getBaseRegister()
2064 : (unsigned)AArch64::NoRegister;
2066 unsigned ExtraCSSpill = 0;
2067 // Figure out which callee-saved registers to save/restore.
2068 for (unsigned i = 0; CSRegs[i]; ++i) {
2069 const unsigned Reg = CSRegs[i];
2071 // Add the base pointer register to SavedRegs if it is callee-save.
2072 if (Reg == BasePointerReg)
2075 bool RegUsed = SavedRegs.test(Reg);
2076 unsigned PairedReg = CSRegs[i ^ 1];
2078 if (AArch64::GPR64RegClass.contains(Reg) &&
2079 !RegInfo->isReservedReg(MF, Reg)) {
2080 UnspilledCSGPR = Reg;
2081 UnspilledCSGPRPaired = PairedReg;
2086 // MachO's compact unwind format relies on all registers being stored in
2088 // FIXME: the usual format is actually better if unwinding isn't needed.
2089 if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
2090 !SavedRegs.test(PairedReg)) {
2091 SavedRegs.set(PairedReg);
2092 if (AArch64::GPR64RegClass.contains(PairedReg) &&
2093 !RegInfo->isReservedReg(MF, PairedReg))
2094 ExtraCSSpill = PairedReg;
2098 // Calculates the callee saved stack size.
2099 unsigned CSStackSize = 0;
2100 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2101 const MachineRegisterInfo &MRI = MF.getRegInfo();
2102 for (unsigned Reg : SavedRegs.set_bits())
2103 CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2105 // Save number of saved regs, so we can easily update CSStackSize later.
2106 unsigned NumSavedRegs = SavedRegs.count();
2108 // The frame record needs to be created by saving the appropriate registers
2109 unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2111 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2112 SavedRegs.set(AArch64::FP);
2113 SavedRegs.set(AArch64::LR);
2116 LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
2118 : SavedRegs.set_bits()) dbgs()
2119 << ' ' << printReg(Reg, RegInfo);
2122 // If any callee-saved registers are used, the frame cannot be eliminated.
2123 bool CanEliminateFrame = SavedRegs.count() == 0;
2125 // The CSR spill slots have not been allocated yet, so estimateStackSize
2126 // won't include them.
2127 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2128 bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2129 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2130 AFI->setHasStackFrame(true);
2132 // Estimate if we might need to scavenge a register at some point in order
2133 // to materialize a stack offset. If so, either spill one additional
2134 // callee-saved register or reserve a special spill slot to facilitate
2135 // register scavenging. If we already spilled an extra callee-saved register
2136 // above to keep the number of spills even, we don't need to do anything else
2139 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2140 LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2141 << " to get a scratch register.\n");
2142 SavedRegs.set(UnspilledCSGPR);
2143 // MachO's compact unwind format relies on all registers being stored in
2144 // pairs, so if we need to spill one extra for BigStack, then we need to
2146 if (produceCompactUnwindFrame(MF))
2147 SavedRegs.set(UnspilledCSGPRPaired);
2148 ExtraCSSpill = UnspilledCSGPRPaired;
2151 // If we didn't find an extra callee-saved register to spill, create
2152 // an emergency spill slot.
2153 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2154 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2155 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2156 unsigned Size = TRI->getSpillSize(RC);
2157 unsigned Align = TRI->getSpillAlignment(RC);
2158 int FI = MFI.CreateStackObject(Size, Align, false);
2159 RS->addScavengingFrameIndex(FI);
2160 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2161 << " as the emergency spill slot.\n");
2165 // Adding the size of additional 64bit GPR saves.
2166 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2167 unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2168 LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2169 << EstimatedStackSize + AlignedCSStackSize
2172 // Round up to register pair alignment to avoid additional SP adjustment
2174 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2175 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2178 bool AArch64FrameLowering::enableStackSlotScavenging(
2179 const MachineFunction &MF) const {
2180 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2181 return AFI->hasCalleeSaveStackFreeSpace();
2184 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2185 MachineFunction &MF, RegScavenger *RS) const {
2186 // If this function isn't doing Win64-style C++ EH, we don't need to do
2188 if (!MF.hasEHFunclets())
2190 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2191 MachineFrameInfo &MFI = MF.getFrameInfo();
2192 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2194 MachineBasicBlock &MBB = MF.front();
2195 auto MBBI = MBB.begin();
2196 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2199 // Create an UnwindHelp object.
2201 MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2202 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2203 // We need to store -2 into the UnwindHelp object at the start of the
2206 RS->enterBasicBlockEnd(MBB);
2207 RS->backward(std::prev(MBBI));
2208 unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2209 assert(DstReg && "There must be a free register after frame setup");
2210 BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2211 BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2212 .addReg(DstReg, getKillRegState(true))
2213 .addFrameIndex(UnwindHelpFI)
2217 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2218 /// the update. This is easily retrieved as it is exactly the offset that is set
2219 /// in processFunctionBeforeFrameFinalized.
2220 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2221 const MachineFunction &MF, int FI, unsigned &FrameReg,
2222 bool IgnoreSPUpdates) const {
2223 const MachineFrameInfo &MFI = MF.getFrameInfo();
2224 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2225 << MFI.getObjectOffset(FI) << "\n");
2226 FrameReg = AArch64::SP;
2227 return MFI.getObjectOffset(FI);
2230 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2231 /// the parent's frame pointer
2232 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2233 const MachineFunction &MF) const {
2237 /// Funclets only need to account for space for the callee saved registers,
2238 /// as the locals are accounted for in the parent's stack frame.
2239 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2240 const MachineFunction &MF) const {
2241 // This is the size of the pushed CSRs.
2243 MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2244 // This is the amount of stack a funclet needs to allocate.
2245 return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2246 getStackAlignment());