1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of TargetFrameLowering class.
12 // On AArch64, stack frames are structured as follows:
14 // The stack grows downward.
16 // All of the individual frame areas on the frame below are optional, i.e. it's
17 // possible to create a function so that the particular area isn't present
20 // At function entry, the "frame" looks as follows:
23 // |-----------------------------------|
25 // | arguments passed on the stack |
27 // |-----------------------------------| <- sp
31 // After the prologue has run, the frame has the following general structure.
32 // Note that this doesn't depict the case where a red-zone is used. Also,
33 // technically the last frame area (VLAs) doesn't get created until in the
34 // main function body, after the prologue is run. However, it's depicted here
38 // |-----------------------------------|
40 // | arguments passed on the stack |
42 // |-----------------------------------|
44 // | prev_fp, prev_lr |
45 // | (a.k.a. "frame record") |
46 // |-----------------------------------| <- fp(=x29)
48 // | other callee-saved registers |
50 // |-----------------------------------|
51 // |.empty.space.to.make.part.below....|
52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
53 // |.the.standard.16-byte.alignment....| compile time; if present)
54 // |-----------------------------------|
56 // | local variables of fixed size |
57 // | including spill slots |
58 // |-----------------------------------| <- bp(not defined by ABI,
59 // |.variable-sized.local.variables....| LLVM chooses X19)
60 // |.(VLAs)............................| (size of this area is unknown at
61 // |...................................| compile time)
62 // |-----------------------------------| <- sp
66 // To access the data in a frame, at-compile time, a constant offset must be
67 // computable from one of the pointers (fp, bp, sp) to access it. The size
68 // of the areas with a dotted background cannot be computed at compile-time
69 // if they are present, making it required to have all three of fp, bp and
70 // sp to be set up to be able to access all contents in the frame areas,
71 // assuming all of the frame areas are non-empty.
73 // For most functions, some of the frame areas are empty. For those functions,
74 // it may not be necessary to set up fp or bp:
75 // * A base pointer is definitely needed when there are both VLAs and local
76 // variables with more-than-default alignment requirements.
77 // * A frame pointer is definitely needed when there are local variables with
78 // more-than-default alignment requirements.
80 // In some cases when a base pointer is not strictly needed, it is generated
81 // anyway when offsets from the frame pointer to access local variables become
82 // so large that the offset can't be encoded in the immediate fields of loads
85 // FIXME: also explain the redzone concept.
86 // FIXME: also explain the concept of reserved call frames.
88 //===----------------------------------------------------------------------===//
90 #include "AArch64FrameLowering.h"
91 #include "AArch64InstrInfo.h"
92 #include "AArch64MachineFunctionInfo.h"
93 #include "AArch64RegisterInfo.h"
94 #include "AArch64Subtarget.h"
95 #include "AArch64TargetMachine.h"
96 #include "llvm/ADT/SmallVector.h"
97 #include "llvm/ADT/Statistic.h"
98 #include "llvm/CodeGen/LivePhysRegs.h"
99 #include "llvm/CodeGen/MachineBasicBlock.h"
100 #include "llvm/CodeGen/MachineFrameInfo.h"
101 #include "llvm/CodeGen/MachineFunction.h"
102 #include "llvm/CodeGen/MachineInstr.h"
103 #include "llvm/CodeGen/MachineInstrBuilder.h"
104 #include "llvm/CodeGen/MachineMemOperand.h"
105 #include "llvm/CodeGen/MachineModuleInfo.h"
106 #include "llvm/CodeGen/MachineOperand.h"
107 #include "llvm/CodeGen/MachineRegisterInfo.h"
108 #include "llvm/CodeGen/RegisterScavenging.h"
109 #include "llvm/IR/Attributes.h"
110 #include "llvm/IR/CallingConv.h"
111 #include "llvm/IR/DataLayout.h"
112 #include "llvm/IR/DebugLoc.h"
113 #include "llvm/IR/Function.h"
114 #include "llvm/MC/MCDwarf.h"
115 #include "llvm/Support/CommandLine.h"
116 #include "llvm/Support/Debug.h"
117 #include "llvm/Support/ErrorHandling.h"
118 #include "llvm/Support/MathExtras.h"
119 #include "llvm/Support/raw_ostream.h"
120 #include "llvm/Target/TargetInstrInfo.h"
121 #include "llvm/Target/TargetMachine.h"
122 #include "llvm/Target/TargetOptions.h"
123 #include "llvm/Target/TargetRegisterInfo.h"
124 #include "llvm/Target/TargetSubtargetInfo.h"
130 using namespace llvm;
132 #define DEBUG_TYPE "frame-info"
134 static cl::opt<bool> EnableRedZone("aarch64-redzone",
135 cl::desc("enable use of redzone on AArch64"),
136 cl::init(false), cl::Hidden);
138 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
140 /// Look at each instruction that references stack frames and return the stack
141 /// size limit beyond which some of these instructions will require a scratch
142 /// register during their expansion later.
143 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
144 // FIXME: For now, just conservatively guestimate based on unscaled indexing
145 // range. We'll end up allocating an unnecessary spill slot a lot, but
146 // realistically that's not a big deal at this stage of the game.
147 for (MachineBasicBlock &MBB : MF) {
148 for (MachineInstr &MI : MBB) {
149 if (MI.isDebugValue() || MI.isPseudo() ||
150 MI.getOpcode() == AArch64::ADDXri ||
151 MI.getOpcode() == AArch64::ADDSXri)
154 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
155 if (!MI.getOperand(i).isFI())
159 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
160 AArch64FrameOffsetCannotUpdate)
168 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
171 // Don't use the red zone if the function explicitly asks us not to.
172 // This is typically used for kernel code.
173 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
176 const MachineFrameInfo &MFI = MF.getFrameInfo();
177 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
178 unsigned NumBytes = AFI->getLocalStackSize();
180 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
183 /// hasFP - Return true if the specified function should have a dedicated frame
184 /// pointer register.
185 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
186 const MachineFrameInfo &MFI = MF.getFrameInfo();
187 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
188 // Retain behavior of always omitting the FP for leaf functions when possible.
189 return (MFI.hasCalls() &&
190 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
191 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
192 MFI.hasStackMap() || MFI.hasPatchPoint() ||
193 RegInfo->needsStackRealignment(MF);
196 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
197 /// not required, we reserve argument space for call sites in the function
198 /// immediately on entry to the current function. This eliminates the need for
199 /// add/sub sp brackets around call sites. Returns true if the call frame is
200 /// included as part of the stack frame.
202 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
203 return !MF.getFrameInfo().hasVarSizedObjects();
206 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
207 MachineFunction &MF, MachineBasicBlock &MBB,
208 MachineBasicBlock::iterator I) const {
209 const AArch64InstrInfo *TII =
210 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
211 DebugLoc DL = I->getDebugLoc();
212 unsigned Opc = I->getOpcode();
213 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
214 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
216 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
217 if (!TFI->hasReservedCallFrame(MF)) {
218 unsigned Align = getStackAlignment();
220 int64_t Amount = I->getOperand(0).getImm();
221 Amount = alignTo(Amount, Align);
225 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
226 // doesn't have to pop anything), then the first operand will be zero too so
227 // this adjustment is a no-op.
228 if (CalleePopAmount == 0) {
229 // FIXME: in-function stack adjustment for calls is limited to 24-bits
230 // because there's no guaranteed temporary register available.
232 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
233 // 1) For offset <= 12-bit, we use LSL #0
234 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
235 // LSL #0, and the other uses LSL #12.
237 // Most call frames will be allocated at the start of a function so
238 // this is OK, but it is a limitation that needs dealing with.
239 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
240 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
242 } else if (CalleePopAmount != 0) {
243 // If the calling convention demands that the callee pops arguments from the
244 // stack, we want to add it back if we have a reserved call frame.
245 assert(CalleePopAmount < 0xffffff && "call frame too large");
246 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
252 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
253 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
254 MachineFunction &MF = *MBB.getParent();
255 MachineFrameInfo &MFI = MF.getFrameInfo();
256 const TargetSubtargetInfo &STI = MF.getSubtarget();
257 const MCRegisterInfo *MRI = STI.getRegisterInfo();
258 const TargetInstrInfo *TII = STI.getInstrInfo();
259 DebugLoc DL = MBB.findDebugLoc(MBBI);
261 // Add callee saved registers to move list.
262 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
266 for (const auto &Info : CSI) {
267 unsigned Reg = Info.getReg();
269 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
270 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
271 unsigned CFIIndex = MF.addFrameInst(
272 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
273 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
274 .addCFIIndex(CFIIndex)
275 .setMIFlags(MachineInstr::FrameSetup);
279 // Find a scratch register that we can use at the start of the prologue to
280 // re-align the stack pointer. We avoid using callee-save registers since they
281 // may appear to be free when this is called from canUseAsPrologue (during
282 // shrink wrapping), but then no longer be free when this is called from
285 // FIXME: This is a bit conservative, since in the above case we could use one
286 // of the callee-save registers as a scratch temp to re-align the stack pointer,
287 // but we would then have to make sure that we were in fact saving at least one
288 // callee-save register in the prologue, which is additional complexity that
289 // doesn't seem worth the benefit.
290 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
291 MachineFunction *MF = MBB->getParent();
293 // If MBB is an entry block, use X9 as the scratch register
294 if (&MF->front() == MBB)
297 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
298 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
299 LivePhysRegs LiveRegs(TRI);
300 LiveRegs.addLiveIns(*MBB);
302 // Mark callee saved registers as used so we will not choose them.
303 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
304 for (unsigned i = 0; CSRegs[i]; ++i)
305 LiveRegs.addReg(CSRegs[i]);
307 // Prefer X9 since it was historically used for the prologue scratch reg.
308 const MachineRegisterInfo &MRI = MF->getRegInfo();
309 if (LiveRegs.available(MRI, AArch64::X9))
312 for (unsigned Reg : AArch64::GPR64RegClass) {
313 if (LiveRegs.available(MRI, Reg))
316 return AArch64::NoRegister;
319 bool AArch64FrameLowering::canUseAsPrologue(
320 const MachineBasicBlock &MBB) const {
321 const MachineFunction *MF = MBB.getParent();
322 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
323 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
324 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
326 // Don't need a scratch register if we're not going to re-align the stack.
327 if (!RegInfo->needsStackRealignment(*MF))
329 // Otherwise, we can use any block as long as it has a scratch register
331 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
334 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
335 MachineFunction &MF, unsigned StackBumpBytes) const {
336 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
339 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
341 if (AFI->getLocalStackSize() == 0)
344 // 512 is the maximum immediate for stp/ldp that will be used for
345 // callee-save save/restores
346 if (StackBumpBytes >= 512)
349 if (MFI.hasVarSizedObjects())
352 if (RegInfo->needsStackRealignment(MF))
355 // This isn't strictly necessary, but it simplifies things a bit since the
356 // current RedZone handling code assumes the SP is adjusted by the
357 // callee-save save/restore code.
358 if (canUseRedZone(MF))
364 // Convert callee-save register save/restore instruction to do stack pointer
365 // decrement/increment to allocate/deallocate the callee-save stack area by
366 // converting store/load to use pre/post increment version.
367 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
368 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
369 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
371 bool NewIsUnscaled = false;
372 switch (MBBI->getOpcode()) {
374 llvm_unreachable("Unexpected callee-save save/restore opcode!");
376 NewOpc = AArch64::STPXpre;
379 NewOpc = AArch64::STPDpre;
381 case AArch64::STRXui:
382 NewOpc = AArch64::STRXpre;
383 NewIsUnscaled = true;
385 case AArch64::STRDui:
386 NewOpc = AArch64::STRDpre;
387 NewIsUnscaled = true;
390 NewOpc = AArch64::LDPXpost;
393 NewOpc = AArch64::LDPDpost;
395 case AArch64::LDRXui:
396 NewOpc = AArch64::LDRXpost;
397 NewIsUnscaled = true;
399 case AArch64::LDRDui:
400 NewOpc = AArch64::LDRDpost;
401 NewIsUnscaled = true;
405 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
406 MIB.addReg(AArch64::SP, RegState::Define);
408 // Copy all operands other than the immediate offset.
409 unsigned OpndIdx = 0;
410 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
412 MIB.add(MBBI->getOperand(OpndIdx));
414 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
415 "Unexpected immediate offset in first/last callee-save save/restore "
417 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
418 "Unexpected base register in callee-save save/restore instruction!");
419 // Last operand is immediate offset that needs fixing.
420 assert(CSStackSizeInc % 8 == 0);
421 int64_t CSStackSizeIncImm = CSStackSizeInc;
423 CSStackSizeIncImm /= 8;
424 MIB.addImm(CSStackSizeIncImm);
426 MIB.setMIFlags(MBBI->getFlags());
427 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
429 return std::prev(MBB.erase(MBBI));
432 // Fixup callee-save register save/restore instructions to take into account
433 // combined SP bump by adding the local stack size to the stack offsets.
434 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
435 unsigned LocalStackSize) {
436 unsigned Opc = MI.getOpcode();
438 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
439 Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
440 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
441 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
442 "Unexpected callee-save save/restore opcode!");
444 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
445 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
446 "Unexpected base register in callee-save save/restore instruction!");
447 // Last operand is immediate offset that needs fixing.
448 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
449 // All generated opcodes have scaled offsets.
450 assert(LocalStackSize % 8 == 0);
451 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
454 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
455 MachineBasicBlock &MBB) const {
456 MachineBasicBlock::iterator MBBI = MBB.begin();
457 const MachineFrameInfo &MFI = MF.getFrameInfo();
458 const Function *Fn = MF.getFunction();
459 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
460 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
461 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
462 MachineModuleInfo &MMI = MF.getMMI();
463 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
464 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
465 bool HasFP = hasFP(MF);
467 // Debug location must be unknown since the first debug location is used
468 // to determine the end of the prologue.
471 // All calls are tail calls in GHC calling conv, and functions have no
472 // prologue/epilogue.
473 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
476 int NumBytes = (int)MFI.getStackSize();
477 if (!AFI->hasStackFrame()) {
478 assert(!HasFP && "unexpected function without stack frame but with FP");
480 // All of the stack allocation is for locals.
481 AFI->setLocalStackSize(NumBytes);
485 // REDZONE: If the stack size is less than 128 bytes, we don't need
486 // to actually allocate.
487 if (canUseRedZone(MF))
488 ++NumRedZoneFunctions;
490 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
491 MachineInstr::FrameSetup);
493 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
494 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
495 // Encode the stack size of the leaf function.
496 unsigned CFIIndex = MF.addFrameInst(
497 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
498 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
499 .addCFIIndex(CFIIndex)
500 .setMIFlags(MachineInstr::FrameSetup);
505 auto CSStackSize = AFI->getCalleeSavedStackSize();
506 // All of the remaining stack allocations are for locals.
507 AFI->setLocalStackSize(NumBytes - CSStackSize);
509 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
511 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
512 MachineInstr::FrameSetup);
514 } else if (CSStackSize != 0) {
515 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
517 NumBytes -= CSStackSize;
519 assert(NumBytes >= 0 && "Negative stack allocation size!?");
521 // Move past the saves of the callee-saved registers, fixing up the offsets
522 // and pre-inc if we decided to combine the callee-save and local stack
523 // pointer bump above.
524 MachineBasicBlock::iterator End = MBB.end();
525 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
527 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
531 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
532 int FPOffset = CSStackSize - 16;
534 FPOffset += AFI->getLocalStackSize();
536 // Issue sub fp, sp, FPOffset or
537 // mov fp,sp when FPOffset is zero.
538 // Note: All stores of callee-saved registers are marked as "FrameSetup".
539 // This code marks the instruction(s) that set the FP also.
540 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
541 MachineInstr::FrameSetup);
544 // Allocate space for the rest of the frame.
546 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
547 unsigned scratchSPReg = AArch64::SP;
549 if (NeedsRealignment) {
550 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
551 assert(scratchSPReg != AArch64::NoRegister);
554 // If we're a leaf function, try using the red zone.
555 if (!canUseRedZone(MF))
556 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
557 // the correct value here, as NumBytes also includes padding bytes,
558 // which shouldn't be counted here.
559 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
560 MachineInstr::FrameSetup);
562 if (NeedsRealignment) {
563 const unsigned Alignment = MFI.getMaxAlignment();
564 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
565 assert(NrBitsToZero > 1);
566 assert(scratchSPReg != AArch64::SP);
568 // SUB X9, SP, NumBytes
569 // -- X9 is temporary register, so shouldn't contain any live data here,
570 // -- free to use. This is already produced by emitFrameOffset above.
571 // AND SP, X9, 0b11111...0000
572 // The logical immediates have a non-trivial encoding. The following
573 // formula computes the encoded immediate with all ones but
574 // NrBitsToZero zero bits as least significant bits.
575 uint32_t andMaskEncoded = (1 << 12) // = N
576 | ((64 - NrBitsToZero) << 6) // immr
577 | ((64 - NrBitsToZero - 1) << 0); // imms
579 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
580 .addReg(scratchSPReg, RegState::Kill)
581 .addImm(andMaskEncoded);
582 AFI->setStackRealigned(true);
586 // If we need a base pointer, set it up here. It's whatever the value of the
587 // stack pointer is at this point. Any variable size objects will be allocated
588 // after this, so we can still use the base pointer to reference locals.
590 // FIXME: Clarify FrameSetup flags here.
591 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
593 if (RegInfo->hasBasePointer(MF)) {
594 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
598 if (needsFrameMoves) {
599 const DataLayout &TD = MF.getDataLayout();
600 const int StackGrowth = -TD.getPointerSize(0);
601 unsigned FramePtr = RegInfo->getFrameRegister(MF);
602 // An example of the prologue:
609 // .cfi_personality 155, ___gxx_personality_v0
611 // .cfi_lsda 16, Lexception33
613 // stp xa,bx, [sp, -#offset]!
615 // stp x28, x27, [sp, #offset-32]
616 // stp fp, lr, [sp, #offset-16]
617 // add fp, sp, #offset - 16
621 // +-------------------------------------------+
622 // 10000 | ........ | ........ | ........ | ........ |
623 // 10004 | ........ | ........ | ........ | ........ |
624 // +-------------------------------------------+
625 // 10008 | ........ | ........ | ........ | ........ |
626 // 1000c | ........ | ........ | ........ | ........ |
627 // +===========================================+
628 // 10010 | X28 Register |
629 // 10014 | X28 Register |
630 // +-------------------------------------------+
631 // 10018 | X27 Register |
632 // 1001c | X27 Register |
633 // +===========================================+
634 // 10020 | Frame Pointer |
635 // 10024 | Frame Pointer |
636 // +-------------------------------------------+
637 // 10028 | Link Register |
638 // 1002c | Link Register |
639 // +===========================================+
640 // 10030 | ........ | ........ | ........ | ........ |
641 // 10034 | ........ | ........ | ........ | ........ |
642 // +-------------------------------------------+
643 // 10038 | ........ | ........ | ........ | ........ |
644 // 1003c | ........ | ........ | ........ | ........ |
645 // +-------------------------------------------+
647 // [sp] = 10030 :: >>initial value<<
648 // sp = 10020 :: stp fp, lr, [sp, #-16]!
649 // fp = sp == 10020 :: mov fp, sp
650 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
651 // sp == 10010 :: >>final value<<
653 // The frame pointer (w29) points to address 10020. If we use an offset of
654 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
655 // for w27, and -32 for w28:
658 // .cfi_def_cfa w29, 16
660 // .cfi_offset w30, -8
662 // .cfi_offset w29, -16
664 // .cfi_offset w27, -24
666 // .cfi_offset w28, -32
669 // Define the current CFA rule to use the provided FP.
670 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
671 unsigned CFIIndex = MF.addFrameInst(
672 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
673 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
674 .addCFIIndex(CFIIndex)
675 .setMIFlags(MachineInstr::FrameSetup);
677 // Encode the stack size of the leaf function.
678 unsigned CFIIndex = MF.addFrameInst(
679 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
680 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
681 .addCFIIndex(CFIIndex)
682 .setMIFlags(MachineInstr::FrameSetup);
685 // Now emit the moves for whatever callee saved regs we have (including FP,
686 // LR if those are saved).
687 emitCalleeSavedFrameMoves(MBB, MBBI);
691 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
692 MachineBasicBlock &MBB) const {
693 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
694 MachineFrameInfo &MFI = MF.getFrameInfo();
695 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
696 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
698 bool IsTailCallReturn = false;
699 if (MBB.end() != MBBI) {
700 DL = MBBI->getDebugLoc();
701 unsigned RetOpcode = MBBI->getOpcode();
702 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
703 RetOpcode == AArch64::TCRETURNri;
705 int NumBytes = MFI.getStackSize();
706 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
708 // All calls are tail calls in GHC calling conv, and functions have no
709 // prologue/epilogue.
710 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
713 // Initial and residual are named for consistency with the prologue. Note that
714 // in the epilogue, the residual adjustment is executed first.
715 uint64_t ArgumentPopSize = 0;
716 if (IsTailCallReturn) {
717 MachineOperand &StackAdjust = MBBI->getOperand(1);
719 // For a tail-call in a callee-pops-arguments environment, some or all of
720 // the stack may actually be in use for the call's arguments, this is
721 // calculated during LowerCall and consumed here...
722 ArgumentPopSize = StackAdjust.getImm();
724 // ... otherwise the amount to pop is *all* of the argument space,
725 // conveniently stored in the MachineFunctionInfo by
726 // LowerFormalArguments. This will, of course, be zero for the C calling
728 ArgumentPopSize = AFI->getArgumentStackToRestore();
731 // The stack frame should be like below,
733 // ---------------------- ---
735 // | BytesInStackArgArea| CalleeArgStackSize
736 // | (NumReusableBytes) | (of tail call)
739 // ---------------------| --- |
741 // | CalleeSavedReg | | |
742 // | (CalleeSavedStackSize)| | |
744 // ---------------------| | NumBytes
745 // | | StackSize (StackAdjustUp)
746 // | LocalStackSize | | |
747 // | (covering callee | | |
750 // ---------------------- --- ---
752 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
753 // = StackSize + ArgumentPopSize
755 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
756 // it as the 2nd argument of AArch64ISD::TC_RETURN.
758 auto CSStackSize = AFI->getCalleeSavedStackSize();
759 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
761 if (!CombineSPBump && CSStackSize != 0)
762 convertCalleeSaveRestoreToSPPrePostIncDec(
763 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
765 // Move past the restores of the callee-saved registers.
766 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
767 MachineBasicBlock::iterator Begin = MBB.begin();
768 while (LastPopI != Begin) {
770 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
773 } else if (CombineSPBump)
774 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
777 // If there is a single SP update, insert it before the ret and we're done.
779 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
780 NumBytes + ArgumentPopSize, TII,
781 MachineInstr::FrameDestroy);
785 NumBytes -= CSStackSize;
786 assert(NumBytes >= 0 && "Negative stack allocation size!?");
789 bool RedZone = canUseRedZone(MF);
790 // If this was a redzone leaf function, we don't need to restore the
791 // stack pointer (but we may need to pop stack args for fastcc).
792 if (RedZone && ArgumentPopSize == 0)
795 bool NoCalleeSaveRestore = CSStackSize == 0;
796 int StackRestoreBytes = RedZone ? 0 : NumBytes;
797 if (NoCalleeSaveRestore)
798 StackRestoreBytes += ArgumentPopSize;
799 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
800 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
801 // If we were able to combine the local stack pop with the argument pop,
803 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
808 // Restore the original stack pointer.
809 // FIXME: Rather than doing the math here, we should instead just use
810 // non-post-indexed loads for the restores if we aren't actually going to
811 // be able to save any instructions.
812 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
813 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
814 -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
816 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
817 MachineInstr::FrameDestroy);
819 // This must be placed after the callee-save restore code because that code
820 // assumes the SP is at the same location as it was after the callee-save save
821 // code in the prologue.
823 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
824 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
827 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
828 /// debug info. It's the same as what we use for resolving the code-gen
829 /// references for now. FIXME: This can go wrong when references are
830 /// SP-relative and simple call frames aren't used.
831 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
833 unsigned &FrameReg) const {
834 return resolveFrameIndexReference(MF, FI, FrameReg);
837 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
838 int FI, unsigned &FrameReg,
839 bool PreferFP) const {
840 const MachineFrameInfo &MFI = MF.getFrameInfo();
841 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
842 MF.getSubtarget().getRegisterInfo());
843 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
844 int FPOffset = MFI.getObjectOffset(FI) + 16;
845 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
846 bool isFixed = MFI.isFixedObjectIndex(FI);
848 // Use frame pointer to reference fixed objects. Use it for locals if
849 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
850 // reliable as a base). Make sure useFPForScavengingIndex() does the
851 // right thing for the emergency spill slot.
853 if (AFI->hasStackFrame()) {
854 // Note: Keeping the following as multiple 'if' statements rather than
855 // merging to a single expression for readability.
857 // Argument access should always use the FP.
860 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
861 !RegInfo->needsStackRealignment(MF)) {
862 // Use SP or FP, whichever gives us the best chance of the offset
863 // being in range for direct access. If the FPOffset is positive,
864 // that'll always be best, as the SP will be even further away.
865 // If the FPOffset is negative, we have to keep in mind that the
866 // available offset range for negative offsets is smaller than for
867 // positive ones. If we have variable sized objects, we're stuck with
868 // using the FP regardless, though, as the SP offset is unknown
869 // and we don't have a base pointer available. If an offset is
870 // available via the FP and the SP, use whichever is closest.
871 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
872 (FPOffset >= -256 && Offset > -FPOffset))
877 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
878 "In the presence of dynamic stack pointer realignment, "
879 "non-argument objects cannot be accessed through the frame pointer");
882 FrameReg = RegInfo->getFrameRegister(MF);
886 // Use the base pointer if we have one.
887 if (RegInfo->hasBasePointer(MF))
888 FrameReg = RegInfo->getBaseRegister();
890 FrameReg = AArch64::SP;
891 // If we're using the red zone for this function, the SP won't actually
892 // be adjusted, so the offsets will be negative. They're also all
893 // within range of the signed 9-bit immediate instructions.
894 if (canUseRedZone(MF))
895 Offset -= AFI->getLocalStackSize();
901 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
902 // Do not set a kill flag on values that are also marked as live-in. This
903 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
904 // callee saved registers.
905 // Omitting the kill flags is conservatively correct even if the live-in
906 // is not used after all.
907 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
908 return getKillRegState(!IsLiveIn);
911 static bool produceCompactUnwindFrame(MachineFunction &MF) {
912 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
913 AttributeList Attrs = MF.getFunction()->getAttributes();
914 return Subtarget.isTargetMachO() &&
915 !(Subtarget.getTargetLowering()->supportSwiftError() &&
916 Attrs.hasAttrSomewhere(Attribute::SwiftError));
922 unsigned Reg1 = AArch64::NoRegister;
923 unsigned Reg2 = AArch64::NoRegister;
928 RegPairInfo() = default;
930 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
933 } // end anonymous namespace
935 static void computeCalleeSaveRegisterPairs(
936 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
937 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
942 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
943 MachineFrameInfo &MFI = MF.getFrameInfo();
944 CallingConv::ID CC = MF.getFunction()->getCallingConv();
945 unsigned Count = CSI.size();
947 // MachO's compact unwind format relies on all registers being stored in
949 assert((!produceCompactUnwindFrame(MF) ||
950 CC == CallingConv::PreserveMost ||
952 "Odd number of callee-saved regs to spill!");
953 unsigned Offset = AFI->getCalleeSavedStackSize();
955 for (unsigned i = 0; i < Count; ++i) {
957 RPI.Reg1 = CSI[i].getReg();
959 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
960 AArch64::FPR64RegClass.contains(RPI.Reg1));
961 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
963 // Add the next reg to the pair if it is in the same register class.
965 unsigned NextReg = CSI[i + 1].getReg();
966 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
967 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
971 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
972 // list to come in sorted by frame index so that we can issue the store
973 // pair instructions directly. Assert if we see anything otherwise.
975 // The order of the registers in the list is controlled by
976 // getCalleeSavedRegs(), so they will always be in-order, as well.
977 assert((!RPI.isPaired() ||
978 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
979 "Out of order callee saved regs!");
981 // MachO's compact unwind format relies on all registers being stored in
982 // adjacent register pairs.
983 assert((!produceCompactUnwindFrame(MF) ||
984 CC == CallingConv::PreserveMost ||
986 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
987 RPI.Reg1 + 1 == RPI.Reg2))) &&
988 "Callee-save registers not saved as adjacent register pair!");
990 RPI.FrameIdx = CSI[i].getFrameIdx();
992 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
993 // Round up size of non-pair to pair size if we need to pad the
994 // callee-save area to ensure 16-byte alignment.
996 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
997 MFI.setObjectAlignment(RPI.FrameIdx, 16);
998 AFI->setCalleeSaveStackHasFreeSpace(true);
1000 Offset -= RPI.isPaired() ? 16 : 8;
1001 assert(Offset % 8 == 0);
1002 RPI.Offset = Offset / 8;
1003 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1004 "Offset out of bounds for LDP/STP immediate");
1006 RegPairs.push_back(RPI);
1012 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1013 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1014 const std::vector<CalleeSavedInfo> &CSI,
1015 const TargetRegisterInfo *TRI) const {
1016 MachineFunction &MF = *MBB.getParent();
1017 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1019 SmallVector<RegPairInfo, 8> RegPairs;
1021 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
1022 const MachineRegisterInfo &MRI = MF.getRegInfo();
1024 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1026 RegPairInfo RPI = *RPII;
1027 unsigned Reg1 = RPI.Reg1;
1028 unsigned Reg2 = RPI.Reg2;
1031 // Issue sequence of spills for cs regs. The first spill may be converted
1032 // to a pre-decrement store later by emitPrologue if the callee-save stack
1033 // area allocation can't be combined with the local stack area allocation.
1035 // stp x22, x21, [sp, #0] // addImm(+0)
1036 // stp x20, x19, [sp, #16] // addImm(+2)
1037 // stp fp, lr, [sp, #32] // addImm(+4)
1038 // Rationale: This sequence saves uop updates compared to a sequence of
1039 // pre-increment spills like stp xi,xj,[sp,#-16]!
1040 // Note: Similar rationale and sequence for restores in epilog.
1042 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1044 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1045 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
1047 dbgs() << ", " << TRI->getName(Reg2);
1048 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1050 dbgs() << ", " << RPI.FrameIdx+1;
1053 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1054 if (!MRI.isReserved(Reg1))
1055 MBB.addLiveIn(Reg1);
1056 if (RPI.isPaired()) {
1057 if (!MRI.isReserved(Reg2))
1058 MBB.addLiveIn(Reg2);
1059 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1060 MIB.addMemOperand(MF.getMachineMemOperand(
1061 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1062 MachineMemOperand::MOStore, 8, 8));
1064 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1065 .addReg(AArch64::SP)
1066 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
1067 .setMIFlag(MachineInstr::FrameSetup);
1068 MIB.addMemOperand(MF.getMachineMemOperand(
1069 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1070 MachineMemOperand::MOStore, 8, 8));
1075 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1076 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1077 const std::vector<CalleeSavedInfo> &CSI,
1078 const TargetRegisterInfo *TRI) const {
1079 MachineFunction &MF = *MBB.getParent();
1080 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1082 SmallVector<RegPairInfo, 8> RegPairs;
1084 if (MI != MBB.end())
1085 DL = MI->getDebugLoc();
1087 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
1089 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
1091 RegPairInfo RPI = *RPII;
1092 unsigned Reg1 = RPI.Reg1;
1093 unsigned Reg2 = RPI.Reg2;
1095 // Issue sequence of restores for cs regs. The last restore may be converted
1096 // to a post-increment load later by emitEpilogue if the callee-save stack
1097 // area allocation can't be combined with the local stack area allocation.
1099 // ldp fp, lr, [sp, #32] // addImm(+4)
1100 // ldp x20, x19, [sp, #16] // addImm(+2)
1101 // ldp x22, x21, [sp, #0] // addImm(+0)
1102 // Note: see comment in spillCalleeSavedRegisters()
1105 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1107 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
1108 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
1110 dbgs() << ", " << TRI->getName(Reg2);
1111 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1113 dbgs() << ", " << RPI.FrameIdx+1;
1116 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
1117 if (RPI.isPaired()) {
1118 MIB.addReg(Reg2, getDefRegState(true));
1119 MIB.addMemOperand(MF.getMachineMemOperand(
1120 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1121 MachineMemOperand::MOLoad, 8, 8));
1123 MIB.addReg(Reg1, getDefRegState(true))
1124 .addReg(AArch64::SP)
1125 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
1126 .setMIFlag(MachineInstr::FrameDestroy);
1127 MIB.addMemOperand(MF.getMachineMemOperand(
1128 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1129 MachineMemOperand::MOLoad, 8, 8));
1134 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1135 BitVector &SavedRegs,
1136 RegScavenger *RS) const {
1137 // All calls are tail calls in GHC calling conv, and functions have no
1138 // prologue/epilogue.
1139 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
1142 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1143 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
1144 MF.getSubtarget().getRegisterInfo());
1145 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1146 unsigned UnspilledCSGPR = AArch64::NoRegister;
1147 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
1149 // The frame record needs to be created by saving the appropriate registers
1151 SavedRegs.set(AArch64::FP);
1152 SavedRegs.set(AArch64::LR);
1155 unsigned BasePointerReg = AArch64::NoRegister;
1156 if (RegInfo->hasBasePointer(MF))
1157 BasePointerReg = RegInfo->getBaseRegister();
1159 unsigned ExtraCSSpill = 0;
1160 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1161 // Figure out which callee-saved registers to save/restore.
1162 for (unsigned i = 0; CSRegs[i]; ++i) {
1163 const unsigned Reg = CSRegs[i];
1165 // Add the base pointer register to SavedRegs if it is callee-save.
1166 if (Reg == BasePointerReg)
1169 bool RegUsed = SavedRegs.test(Reg);
1170 unsigned PairedReg = CSRegs[i ^ 1];
1172 if (AArch64::GPR64RegClass.contains(Reg) &&
1173 !RegInfo->isReservedReg(MF, Reg)) {
1174 UnspilledCSGPR = Reg;
1175 UnspilledCSGPRPaired = PairedReg;
1180 // MachO's compact unwind format relies on all registers being stored in
1182 // FIXME: the usual format is actually better if unwinding isn't needed.
1183 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
1184 SavedRegs.set(PairedReg);
1185 if (AArch64::GPR64RegClass.contains(PairedReg) &&
1186 !RegInfo->isReservedReg(MF, PairedReg))
1187 ExtraCSSpill = PairedReg;
1191 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1192 for (unsigned Reg : SavedRegs.set_bits())
1193 dbgs() << ' ' << PrintReg(Reg, RegInfo);
1196 // If any callee-saved registers are used, the frame cannot be eliminated.
1197 unsigned NumRegsSpilled = SavedRegs.count();
1198 bool CanEliminateFrame = NumRegsSpilled == 0;
1200 // The CSR spill slots have not been allocated yet, so estimateStackSize
1201 // won't include them.
1202 MachineFrameInfo &MFI = MF.getFrameInfo();
1203 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
1204 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1205 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
1206 bool BigStack = (CFSize > EstimatedStackSizeLimit);
1207 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1208 AFI->setHasStackFrame(true);
1210 // Estimate if we might need to scavenge a register at some point in order
1211 // to materialize a stack offset. If so, either spill one additional
1212 // callee-saved register or reserve a special spill slot to facilitate
1213 // register scavenging. If we already spilled an extra callee-saved register
1214 // above to keep the number of spills even, we don't need to do anything else
1217 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
1218 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
1219 << " to get a scratch register.\n");
1220 SavedRegs.set(UnspilledCSGPR);
1221 // MachO's compact unwind format relies on all registers being stored in
1222 // pairs, so if we need to spill one extra for BigStack, then we need to
1224 if (produceCompactUnwindFrame(MF))
1225 SavedRegs.set(UnspilledCSGPRPaired);
1226 ExtraCSSpill = UnspilledCSGPRPaired;
1227 NumRegsSpilled = SavedRegs.count();
1230 // If we didn't find an extra callee-saved register to spill, create
1231 // an emergency spill slot.
1232 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
1233 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1234 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
1235 unsigned Size = TRI->getSpillSize(RC);
1236 unsigned Align = TRI->getSpillAlignment(RC);
1237 int FI = MFI.CreateStackObject(Size, Align, false);
1238 RS->addScavengingFrameIndex(FI);
1239 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1240 << " as the emergency spill slot.\n");
1244 // Round up to register pair alignment to avoid additional SP adjustment
1246 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
1249 bool AArch64FrameLowering::enableStackSlotScavenging(
1250 const MachineFunction &MF) const {
1251 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1252 return AFI->hasCalleeSaveStackFreeSpace();