1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of TargetFrameLowering class.
12 // On AArch64, stack frames are structured as follows:
14 // The stack grows downward.
16 // All of the individual frame areas on the frame below are optional, i.e. it's
17 // possible to create a function so that the particular area isn't present
20 // At function entry, the "frame" looks as follows:
23 // |-----------------------------------|
25 // | arguments passed on the stack |
27 // |-----------------------------------| <- sp
31 // After the prologue has run, the frame has the following general structure.
32 // Note that this doesn't depict the case where a red-zone is used. Also,
33 // technically the last frame area (VLAs) doesn't get created until in the
34 // main function body, after the prologue is run. However, it's depicted here
38 // |-----------------------------------|
40 // | arguments passed on the stack |
42 // |-----------------------------------|
44 // | (Win64 only) varargs from reg |
46 // |-----------------------------------|
48 // | prev_fp, prev_lr |
49 // | (a.k.a. "frame record") |
50 // |-----------------------------------| <- fp(=x29)
52 // | other callee-saved registers |
54 // |-----------------------------------|
55 // |.empty.space.to.make.part.below....|
56 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
57 // |.the.standard.16-byte.alignment....| compile time; if present)
58 // |-----------------------------------|
60 // | local variables of fixed size |
61 // | including spill slots |
62 // |-----------------------------------| <- bp(not defined by ABI,
63 // |.variable-sized.local.variables....| LLVM chooses X19)
64 // |.(VLAs)............................| (size of this area is unknown at
65 // |...................................| compile time)
66 // |-----------------------------------| <- sp
70 // To access the data in a frame, at-compile time, a constant offset must be
71 // computable from one of the pointers (fp, bp, sp) to access it. The size
72 // of the areas with a dotted background cannot be computed at compile-time
73 // if they are present, making it required to have all three of fp, bp and
74 // sp to be set up to be able to access all contents in the frame areas,
75 // assuming all of the frame areas are non-empty.
77 // For most functions, some of the frame areas are empty. For those functions,
78 // it may not be necessary to set up fp or bp:
79 // * A base pointer is definitely needed when there are both VLAs and local
80 // variables with more-than-default alignment requirements.
81 // * A frame pointer is definitely needed when there are local variables with
82 // more-than-default alignment requirements.
84 // In some cases when a base pointer is not strictly needed, it is generated
85 // anyway when offsets from the frame pointer to access local variables become
86 // so large that the offset can't be encoded in the immediate fields of loads
89 // FIXME: also explain the redzone concept.
90 // FIXME: also explain the concept of reserved call frames.
92 //===----------------------------------------------------------------------===//
94 #include "AArch64FrameLowering.h"
95 #include "AArch64InstrInfo.h"
96 #include "AArch64MachineFunctionInfo.h"
97 #include "AArch64RegisterInfo.h"
98 #include "AArch64Subtarget.h"
99 #include "AArch64TargetMachine.h"
100 #include "MCTargetDesc/AArch64AddressingModes.h"
101 #include "llvm/ADT/SmallVector.h"
102 #include "llvm/ADT/Statistic.h"
103 #include "llvm/CodeGen/LivePhysRegs.h"
104 #include "llvm/CodeGen/MachineBasicBlock.h"
105 #include "llvm/CodeGen/MachineFrameInfo.h"
106 #include "llvm/CodeGen/MachineFunction.h"
107 #include "llvm/CodeGen/MachineInstr.h"
108 #include "llvm/CodeGen/MachineInstrBuilder.h"
109 #include "llvm/CodeGen/MachineMemOperand.h"
110 #include "llvm/CodeGen/MachineModuleInfo.h"
111 #include "llvm/CodeGen/MachineOperand.h"
112 #include "llvm/CodeGen/MachineRegisterInfo.h"
113 #include "llvm/CodeGen/RegisterScavenging.h"
114 #include "llvm/CodeGen/TargetInstrInfo.h"
115 #include "llvm/CodeGen/TargetRegisterInfo.h"
116 #include "llvm/CodeGen/TargetSubtargetInfo.h"
117 #include "llvm/IR/Attributes.h"
118 #include "llvm/IR/CallingConv.h"
119 #include "llvm/IR/DataLayout.h"
120 #include "llvm/IR/DebugLoc.h"
121 #include "llvm/IR/Function.h"
122 #include "llvm/MC/MCDwarf.h"
123 #include "llvm/Support/CommandLine.h"
124 #include "llvm/Support/Debug.h"
125 #include "llvm/Support/ErrorHandling.h"
126 #include "llvm/Support/MathExtras.h"
127 #include "llvm/Support/raw_ostream.h"
128 #include "llvm/Target/TargetMachine.h"
129 #include "llvm/Target/TargetOptions.h"
135 using namespace llvm;
137 #define DEBUG_TYPE "frame-info"
139 static cl::opt<bool> EnableRedZone("aarch64-redzone",
140 cl::desc("enable use of redzone on AArch64"),
141 cl::init(false), cl::Hidden);
143 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
145 /// Look at each instruction that references stack frames and return the stack
146 /// size limit beyond which some of these instructions will require a scratch
147 /// register during their expansion later.
148 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
149 // FIXME: For now, just conservatively guestimate based on unscaled indexing
150 // range. We'll end up allocating an unnecessary spill slot a lot, but
151 // realistically that's not a big deal at this stage of the game.
152 for (MachineBasicBlock &MBB : MF) {
153 for (MachineInstr &MI : MBB) {
154 if (MI.isDebugValue() || MI.isPseudo() ||
155 MI.getOpcode() == AArch64::ADDXri ||
156 MI.getOpcode() == AArch64::ADDSXri)
159 for (const MachineOperand &MO : MI.operands()) {
164 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
165 AArch64FrameOffsetCannotUpdate)
173 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
176 // Don't use the red zone if the function explicitly asks us not to.
177 // This is typically used for kernel code.
178 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
181 const MachineFrameInfo &MFI = MF.getFrameInfo();
182 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
183 unsigned NumBytes = AFI->getLocalStackSize();
185 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
188 /// hasFP - Return true if the specified function should have a dedicated frame
189 /// pointer register.
190 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
191 const MachineFrameInfo &MFI = MF.getFrameInfo();
192 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
193 // Retain behavior of always omitting the FP for leaf functions when possible.
194 return (MFI.hasCalls() &&
195 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
196 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
197 MFI.hasStackMap() || MFI.hasPatchPoint() ||
198 RegInfo->needsStackRealignment(MF);
201 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
202 /// not required, we reserve argument space for call sites in the function
203 /// immediately on entry to the current function. This eliminates the need for
204 /// add/sub sp brackets around call sites. Returns true if the call frame is
205 /// included as part of the stack frame.
207 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
208 return !MF.getFrameInfo().hasVarSizedObjects();
211 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
212 MachineFunction &MF, MachineBasicBlock &MBB,
213 MachineBasicBlock::iterator I) const {
214 const AArch64InstrInfo *TII =
215 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
216 DebugLoc DL = I->getDebugLoc();
217 unsigned Opc = I->getOpcode();
218 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
219 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
221 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
222 if (!TFI->hasReservedCallFrame(MF)) {
223 unsigned Align = getStackAlignment();
225 int64_t Amount = I->getOperand(0).getImm();
226 Amount = alignTo(Amount, Align);
230 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
231 // doesn't have to pop anything), then the first operand will be zero too so
232 // this adjustment is a no-op.
233 if (CalleePopAmount == 0) {
234 // FIXME: in-function stack adjustment for calls is limited to 24-bits
235 // because there's no guaranteed temporary register available.
237 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
238 // 1) For offset <= 12-bit, we use LSL #0
239 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
240 // LSL #0, and the other uses LSL #12.
242 // Most call frames will be allocated at the start of a function so
243 // this is OK, but it is a limitation that needs dealing with.
244 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
245 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
247 } else if (CalleePopAmount != 0) {
248 // If the calling convention demands that the callee pops arguments from the
249 // stack, we want to add it back if we have a reserved call frame.
250 assert(CalleePopAmount < 0xffffff && "call frame too large");
251 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
257 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
258 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
259 MachineFunction &MF = *MBB.getParent();
260 MachineFrameInfo &MFI = MF.getFrameInfo();
261 const TargetSubtargetInfo &STI = MF.getSubtarget();
262 const MCRegisterInfo *MRI = STI.getRegisterInfo();
263 const TargetInstrInfo *TII = STI.getInstrInfo();
264 DebugLoc DL = MBB.findDebugLoc(MBBI);
266 // Add callee saved registers to move list.
267 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
271 for (const auto &Info : CSI) {
272 unsigned Reg = Info.getReg();
274 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
275 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
276 unsigned CFIIndex = MF.addFrameInst(
277 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
278 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
279 .addCFIIndex(CFIIndex)
280 .setMIFlags(MachineInstr::FrameSetup);
284 // Find a scratch register that we can use at the start of the prologue to
285 // re-align the stack pointer. We avoid using callee-save registers since they
286 // may appear to be free when this is called from canUseAsPrologue (during
287 // shrink wrapping), but then no longer be free when this is called from
290 // FIXME: This is a bit conservative, since in the above case we could use one
291 // of the callee-save registers as a scratch temp to re-align the stack pointer,
292 // but we would then have to make sure that we were in fact saving at least one
293 // callee-save register in the prologue, which is additional complexity that
294 // doesn't seem worth the benefit.
295 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
296 MachineFunction *MF = MBB->getParent();
298 // If MBB is an entry block, use X9 as the scratch register
299 if (&MF->front() == MBB)
302 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
303 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
304 LivePhysRegs LiveRegs(TRI);
305 LiveRegs.addLiveIns(*MBB);
307 // Mark callee saved registers as used so we will not choose them.
308 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
309 for (unsigned i = 0; CSRegs[i]; ++i)
310 LiveRegs.addReg(CSRegs[i]);
312 // Prefer X9 since it was historically used for the prologue scratch reg.
313 const MachineRegisterInfo &MRI = MF->getRegInfo();
314 if (LiveRegs.available(MRI, AArch64::X9))
317 for (unsigned Reg : AArch64::GPR64RegClass) {
318 if (LiveRegs.available(MRI, Reg))
321 return AArch64::NoRegister;
324 bool AArch64FrameLowering::canUseAsPrologue(
325 const MachineBasicBlock &MBB) const {
326 const MachineFunction *MF = MBB.getParent();
327 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
328 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
329 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
331 // Don't need a scratch register if we're not going to re-align the stack.
332 if (!RegInfo->needsStackRealignment(*MF))
334 // Otherwise, we can use any block as long as it has a scratch register
336 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
339 static bool windowsRequiresStackProbe(MachineFunction &MF,
340 unsigned StackSizeInBytes) {
341 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
342 if (!Subtarget.isTargetWindows())
344 const Function &F = MF.getFunction();
345 // TODO: When implementing stack protectors, take that into account
346 // for the probe threshold.
347 unsigned StackProbeSize = 4096;
348 if (F.hasFnAttribute("stack-probe-size"))
349 F.getFnAttribute("stack-probe-size")
351 .getAsInteger(0, StackProbeSize);
352 return StackSizeInBytes >= StackProbeSize;
355 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
356 MachineFunction &MF, unsigned StackBumpBytes) const {
357 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
358 const MachineFrameInfo &MFI = MF.getFrameInfo();
359 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
360 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
362 if (AFI->getLocalStackSize() == 0)
365 // 512 is the maximum immediate for stp/ldp that will be used for
366 // callee-save save/restores
367 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
370 if (MFI.hasVarSizedObjects())
373 if (RegInfo->needsStackRealignment(MF))
376 // This isn't strictly necessary, but it simplifies things a bit since the
377 // current RedZone handling code assumes the SP is adjusted by the
378 // callee-save save/restore code.
379 if (canUseRedZone(MF))
385 // Convert callee-save register save/restore instruction to do stack pointer
386 // decrement/increment to allocate/deallocate the callee-save stack area by
387 // converting store/load to use pre/post increment version.
388 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
389 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
390 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
392 bool NewIsUnscaled = false;
393 switch (MBBI->getOpcode()) {
395 llvm_unreachable("Unexpected callee-save save/restore opcode!");
397 NewOpc = AArch64::STPXpre;
400 NewOpc = AArch64::STPDpre;
402 case AArch64::STRXui:
403 NewOpc = AArch64::STRXpre;
404 NewIsUnscaled = true;
406 case AArch64::STRDui:
407 NewOpc = AArch64::STRDpre;
408 NewIsUnscaled = true;
411 NewOpc = AArch64::LDPXpost;
414 NewOpc = AArch64::LDPDpost;
416 case AArch64::LDRXui:
417 NewOpc = AArch64::LDRXpost;
418 NewIsUnscaled = true;
420 case AArch64::LDRDui:
421 NewOpc = AArch64::LDRDpost;
422 NewIsUnscaled = true;
426 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
427 MIB.addReg(AArch64::SP, RegState::Define);
429 // Copy all operands other than the immediate offset.
430 unsigned OpndIdx = 0;
431 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
433 MIB.add(MBBI->getOperand(OpndIdx));
435 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
436 "Unexpected immediate offset in first/last callee-save save/restore "
438 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
439 "Unexpected base register in callee-save save/restore instruction!");
440 // Last operand is immediate offset that needs fixing.
441 assert(CSStackSizeInc % 8 == 0);
442 int64_t CSStackSizeIncImm = CSStackSizeInc;
444 CSStackSizeIncImm /= 8;
445 MIB.addImm(CSStackSizeIncImm);
447 MIB.setMIFlags(MBBI->getFlags());
448 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
450 return std::prev(MBB.erase(MBBI));
453 // Fixup callee-save register save/restore instructions to take into account
454 // combined SP bump by adding the local stack size to the stack offsets.
455 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
456 unsigned LocalStackSize) {
457 unsigned Opc = MI.getOpcode();
459 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
460 Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
461 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
462 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
463 "Unexpected callee-save save/restore opcode!");
465 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
466 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
467 "Unexpected base register in callee-save save/restore instruction!");
468 // Last operand is immediate offset that needs fixing.
469 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
470 // All generated opcodes have scaled offsets.
471 assert(LocalStackSize % 8 == 0);
472 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
475 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
476 MachineBasicBlock &MBB) const {
477 MachineBasicBlock::iterator MBBI = MBB.begin();
478 const MachineFrameInfo &MFI = MF.getFrameInfo();
479 const Function &F = MF.getFunction();
480 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
481 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
482 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
483 MachineModuleInfo &MMI = MF.getMMI();
484 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
485 bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
486 bool HasFP = hasFP(MF);
488 // Debug location must be unknown since the first debug location is used
489 // to determine the end of the prologue.
492 // All calls are tail calls in GHC calling conv, and functions have no
493 // prologue/epilogue.
494 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
497 int NumBytes = (int)MFI.getStackSize();
498 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
499 assert(!HasFP && "unexpected function without stack frame but with FP");
501 // All of the stack allocation is for locals.
502 AFI->setLocalStackSize(NumBytes);
506 // REDZONE: If the stack size is less than 128 bytes, we don't need
507 // to actually allocate.
508 if (canUseRedZone(MF))
509 ++NumRedZoneFunctions;
511 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
512 MachineInstr::FrameSetup);
514 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
515 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
516 // Encode the stack size of the leaf function.
517 unsigned CFIIndex = MF.addFrameInst(
518 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
519 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
520 .addCFIIndex(CFIIndex)
521 .setMIFlags(MachineInstr::FrameSetup);
527 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
528 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
530 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
531 // All of the remaining stack allocations are for locals.
532 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
534 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
536 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
537 MachineInstr::FrameSetup);
539 } else if (PrologueSaveSize != 0) {
540 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
542 NumBytes -= PrologueSaveSize;
544 assert(NumBytes >= 0 && "Negative stack allocation size!?");
546 // Move past the saves of the callee-saved registers, fixing up the offsets
547 // and pre-inc if we decided to combine the callee-save and local stack
548 // pointer bump above.
549 MachineBasicBlock::iterator End = MBB.end();
550 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
552 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
556 // Only set up FP if we actually need to. Frame pointer is fp =
557 // sp - fixedobject - 16.
558 int FPOffset = AFI->getCalleeSavedStackSize() - 16;
560 FPOffset += AFI->getLocalStackSize();
562 // Issue sub fp, sp, FPOffset or
563 // mov fp,sp when FPOffset is zero.
564 // Note: All stores of callee-saved registers are marked as "FrameSetup".
565 // This code marks the instruction(s) that set the FP also.
566 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
567 MachineInstr::FrameSetup);
570 if (windowsRequiresStackProbe(MF, NumBytes)) {
571 uint32_t NumWords = NumBytes >> 4;
573 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
575 .setMIFlags(MachineInstr::FrameSetup);
577 switch (MF.getTarget().getCodeModel()) {
578 case CodeModel::Small:
579 case CodeModel::Medium:
580 case CodeModel::Kernel:
581 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
582 .addExternalSymbol("__chkstk")
583 .addReg(AArch64::X15, RegState::Implicit)
584 .setMIFlags(MachineInstr::FrameSetup);
586 case CodeModel::Large:
587 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
588 .addReg(AArch64::X16, RegState::Define)
589 .addExternalSymbol("__chkstk")
590 .addExternalSymbol("__chkstk")
591 .setMIFlags(MachineInstr::FrameSetup);
593 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
594 .addReg(AArch64::X16, RegState::Kill)
595 .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
596 .setMIFlags(MachineInstr::FrameSetup);
600 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
601 .addReg(AArch64::SP, RegState::Kill)
602 .addReg(AArch64::X15, RegState::Kill)
603 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
604 .setMIFlags(MachineInstr::FrameSetup);
608 // Allocate space for the rest of the frame.
610 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
611 unsigned scratchSPReg = AArch64::SP;
613 if (NeedsRealignment) {
614 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
615 assert(scratchSPReg != AArch64::NoRegister);
618 // If we're a leaf function, try using the red zone.
619 if (!canUseRedZone(MF))
620 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
621 // the correct value here, as NumBytes also includes padding bytes,
622 // which shouldn't be counted here.
623 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
624 MachineInstr::FrameSetup);
626 if (NeedsRealignment) {
627 const unsigned Alignment = MFI.getMaxAlignment();
628 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
629 assert(NrBitsToZero > 1);
630 assert(scratchSPReg != AArch64::SP);
632 // SUB X9, SP, NumBytes
633 // -- X9 is temporary register, so shouldn't contain any live data here,
634 // -- free to use. This is already produced by emitFrameOffset above.
635 // AND SP, X9, 0b11111...0000
636 // The logical immediates have a non-trivial encoding. The following
637 // formula computes the encoded immediate with all ones but
638 // NrBitsToZero zero bits as least significant bits.
639 uint32_t andMaskEncoded = (1 << 12) // = N
640 | ((64 - NrBitsToZero) << 6) // immr
641 | ((64 - NrBitsToZero - 1) << 0); // imms
643 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
644 .addReg(scratchSPReg, RegState::Kill)
645 .addImm(andMaskEncoded);
646 AFI->setStackRealigned(true);
650 // If we need a base pointer, set it up here. It's whatever the value of the
651 // stack pointer is at this point. Any variable size objects will be allocated
652 // after this, so we can still use the base pointer to reference locals.
654 // FIXME: Clarify FrameSetup flags here.
655 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
657 if (RegInfo->hasBasePointer(MF)) {
658 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
662 if (needsFrameMoves) {
663 const DataLayout &TD = MF.getDataLayout();
664 const int StackGrowth = -TD.getPointerSize(0);
665 unsigned FramePtr = RegInfo->getFrameRegister(MF);
666 // An example of the prologue:
673 // .cfi_personality 155, ___gxx_personality_v0
675 // .cfi_lsda 16, Lexception33
677 // stp xa,bx, [sp, -#offset]!
679 // stp x28, x27, [sp, #offset-32]
680 // stp fp, lr, [sp, #offset-16]
681 // add fp, sp, #offset - 16
685 // +-------------------------------------------+
686 // 10000 | ........ | ........ | ........ | ........ |
687 // 10004 | ........ | ........ | ........ | ........ |
688 // +-------------------------------------------+
689 // 10008 | ........ | ........ | ........ | ........ |
690 // 1000c | ........ | ........ | ........ | ........ |
691 // +===========================================+
692 // 10010 | X28 Register |
693 // 10014 | X28 Register |
694 // +-------------------------------------------+
695 // 10018 | X27 Register |
696 // 1001c | X27 Register |
697 // +===========================================+
698 // 10020 | Frame Pointer |
699 // 10024 | Frame Pointer |
700 // +-------------------------------------------+
701 // 10028 | Link Register |
702 // 1002c | Link Register |
703 // +===========================================+
704 // 10030 | ........ | ........ | ........ | ........ |
705 // 10034 | ........ | ........ | ........ | ........ |
706 // +-------------------------------------------+
707 // 10038 | ........ | ........ | ........ | ........ |
708 // 1003c | ........ | ........ | ........ | ........ |
709 // +-------------------------------------------+
711 // [sp] = 10030 :: >>initial value<<
712 // sp = 10020 :: stp fp, lr, [sp, #-16]!
713 // fp = sp == 10020 :: mov fp, sp
714 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
715 // sp == 10010 :: >>final value<<
717 // The frame pointer (w29) points to address 10020. If we use an offset of
718 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
719 // for w27, and -32 for w28:
722 // .cfi_def_cfa w29, 16
724 // .cfi_offset w30, -8
726 // .cfi_offset w29, -16
728 // .cfi_offset w27, -24
730 // .cfi_offset w28, -32
733 // Define the current CFA rule to use the provided FP.
734 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
735 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
736 nullptr, Reg, 2 * StackGrowth - FixedObject));
737 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
738 .addCFIIndex(CFIIndex)
739 .setMIFlags(MachineInstr::FrameSetup);
741 // Encode the stack size of the leaf function.
742 unsigned CFIIndex = MF.addFrameInst(
743 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
744 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
745 .addCFIIndex(CFIIndex)
746 .setMIFlags(MachineInstr::FrameSetup);
749 // Now emit the moves for whatever callee saved regs we have (including FP,
750 // LR if those are saved).
751 emitCalleeSavedFrameMoves(MBB, MBBI);
755 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
756 MachineBasicBlock &MBB) const {
757 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
758 MachineFrameInfo &MFI = MF.getFrameInfo();
759 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
760 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
762 bool IsTailCallReturn = false;
763 if (MBB.end() != MBBI) {
764 DL = MBBI->getDebugLoc();
765 unsigned RetOpcode = MBBI->getOpcode();
766 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
767 RetOpcode == AArch64::TCRETURNri;
769 int NumBytes = MFI.getStackSize();
770 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
772 // All calls are tail calls in GHC calling conv, and functions have no
773 // prologue/epilogue.
774 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
777 // Initial and residual are named for consistency with the prologue. Note that
778 // in the epilogue, the residual adjustment is executed first.
779 uint64_t ArgumentPopSize = 0;
780 if (IsTailCallReturn) {
781 MachineOperand &StackAdjust = MBBI->getOperand(1);
783 // For a tail-call in a callee-pops-arguments environment, some or all of
784 // the stack may actually be in use for the call's arguments, this is
785 // calculated during LowerCall and consumed here...
786 ArgumentPopSize = StackAdjust.getImm();
788 // ... otherwise the amount to pop is *all* of the argument space,
789 // conveniently stored in the MachineFunctionInfo by
790 // LowerFormalArguments. This will, of course, be zero for the C calling
792 ArgumentPopSize = AFI->getArgumentStackToRestore();
795 // The stack frame should be like below,
797 // ---------------------- ---
799 // | BytesInStackArgArea| CalleeArgStackSize
800 // | (NumReusableBytes) | (of tail call)
803 // ---------------------| --- |
805 // | CalleeSavedReg | | |
806 // | (CalleeSavedStackSize)| | |
808 // ---------------------| | NumBytes
809 // | | StackSize (StackAdjustUp)
810 // | LocalStackSize | | |
811 // | (covering callee | | |
814 // ---------------------- --- ---
816 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
817 // = StackSize + ArgumentPopSize
819 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
820 // it as the 2nd argument of AArch64ISD::TC_RETURN.
823 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
824 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
826 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
827 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
829 if (!CombineSPBump && PrologueSaveSize != 0)
830 convertCalleeSaveRestoreToSPPrePostIncDec(
831 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
833 // Move past the restores of the callee-saved registers.
834 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
835 MachineBasicBlock::iterator Begin = MBB.begin();
836 while (LastPopI != Begin) {
838 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
841 } else if (CombineSPBump)
842 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
845 // If there is a single SP update, insert it before the ret and we're done.
847 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
848 NumBytes + ArgumentPopSize, TII,
849 MachineInstr::FrameDestroy);
853 NumBytes -= PrologueSaveSize;
854 assert(NumBytes >= 0 && "Negative stack allocation size!?");
857 bool RedZone = canUseRedZone(MF);
858 // If this was a redzone leaf function, we don't need to restore the
859 // stack pointer (but we may need to pop stack args for fastcc).
860 if (RedZone && ArgumentPopSize == 0)
863 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
864 int StackRestoreBytes = RedZone ? 0 : NumBytes;
865 if (NoCalleeSaveRestore)
866 StackRestoreBytes += ArgumentPopSize;
867 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
868 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
869 // If we were able to combine the local stack pop with the argument pop,
871 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
876 // Restore the original stack pointer.
877 // FIXME: Rather than doing the math here, we should instead just use
878 // non-post-indexed loads for the restores if we aren't actually going to
879 // be able to save any instructions.
880 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
881 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
882 -AFI->getCalleeSavedStackSize() + 16, TII,
883 MachineInstr::FrameDestroy);
885 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
886 MachineInstr::FrameDestroy);
888 // This must be placed after the callee-save restore code because that code
889 // assumes the SP is at the same location as it was after the callee-save save
890 // code in the prologue.
892 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
893 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
896 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
897 /// debug info. It's the same as what we use for resolving the code-gen
898 /// references for now. FIXME: This can go wrong when references are
899 /// SP-relative and simple call frames aren't used.
900 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
902 unsigned &FrameReg) const {
903 return resolveFrameIndexReference(MF, FI, FrameReg);
906 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
907 int FI, unsigned &FrameReg,
908 bool PreferFP) const {
909 const MachineFrameInfo &MFI = MF.getFrameInfo();
910 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
911 MF.getSubtarget().getRegisterInfo());
912 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
913 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
915 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
916 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
917 int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
918 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
919 bool isFixed = MFI.isFixedObjectIndex(FI);
920 bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
921 -((int)AFI->getCalleeSavedStackSize());
923 // Use frame pointer to reference fixed objects. Use it for locals if
924 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
925 // reliable as a base). Make sure useFPForScavengingIndex() does the
926 // right thing for the emergency spill slot.
928 if (AFI->hasStackFrame()) {
929 // Note: Keeping the following as multiple 'if' statements rather than
930 // merging to a single expression for readability.
932 // Argument access should always use the FP.
935 } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
936 // References to the CSR area must use FP if we're re-aligning the stack
937 // since the dynamically-sized alignment padding is between the SP/BP and
939 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
941 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
942 !RegInfo->needsStackRealignment(MF)) {
943 // Use SP or FP, whichever gives us the best chance of the offset
944 // being in range for direct access. If the FPOffset is positive,
945 // that'll always be best, as the SP will be even further away.
946 // If the FPOffset is negative, we have to keep in mind that the
947 // available offset range for negative offsets is smaller than for
948 // positive ones. If we have variable sized objects, we're stuck with
949 // using the FP regardless, though, as the SP offset is unknown
950 // and we don't have a base pointer available. If an offset is
951 // available via the FP and the SP, use whichever is closest.
952 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
953 (FPOffset >= -256 && Offset > -FPOffset))
958 assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
959 "In the presence of dynamic stack pointer realignment, "
960 "non-argument/CSR objects cannot be accessed through the frame pointer");
963 FrameReg = RegInfo->getFrameRegister(MF);
967 // Use the base pointer if we have one.
968 if (RegInfo->hasBasePointer(MF))
969 FrameReg = RegInfo->getBaseRegister();
971 FrameReg = AArch64::SP;
972 // If we're using the red zone for this function, the SP won't actually
973 // be adjusted, so the offsets will be negative. They're also all
974 // within range of the signed 9-bit immediate instructions.
975 if (canUseRedZone(MF))
976 Offset -= AFI->getLocalStackSize();
982 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
983 // Do not set a kill flag on values that are also marked as live-in. This
984 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
985 // callee saved registers.
986 // Omitting the kill flags is conservatively correct even if the live-in
987 // is not used after all.
988 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
989 return getKillRegState(!IsLiveIn);
992 static bool produceCompactUnwindFrame(MachineFunction &MF) {
993 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
994 AttributeList Attrs = MF.getFunction().getAttributes();
995 return Subtarget.isTargetMachO() &&
996 !(Subtarget.getTargetLowering()->supportSwiftError() &&
997 Attrs.hasAttrSomewhere(Attribute::SwiftError));
1002 struct RegPairInfo {
1003 unsigned Reg1 = AArch64::NoRegister;
1004 unsigned Reg2 = AArch64::NoRegister;
1009 RegPairInfo() = default;
1011 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1014 } // end anonymous namespace
1016 static void computeCalleeSaveRegisterPairs(
1017 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1018 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
1023 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1024 MachineFrameInfo &MFI = MF.getFrameInfo();
1025 CallingConv::ID CC = MF.getFunction().getCallingConv();
1026 unsigned Count = CSI.size();
1028 // MachO's compact unwind format relies on all registers being stored in
1030 assert((!produceCompactUnwindFrame(MF) ||
1031 CC == CallingConv::PreserveMost ||
1032 (Count & 1) == 0) &&
1033 "Odd number of callee-saved regs to spill!");
1034 int Offset = AFI->getCalleeSavedStackSize();
1036 for (unsigned i = 0; i < Count; ++i) {
1038 RPI.Reg1 = CSI[i].getReg();
1040 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
1041 AArch64::FPR64RegClass.contains(RPI.Reg1));
1042 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
1044 // Add the next reg to the pair if it is in the same register class.
1045 if (i + 1 < Count) {
1046 unsigned NextReg = CSI[i + 1].getReg();
1047 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
1048 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
1052 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1053 // list to come in sorted by frame index so that we can issue the store
1054 // pair instructions directly. Assert if we see anything otherwise.
1056 // The order of the registers in the list is controlled by
1057 // getCalleeSavedRegs(), so they will always be in-order, as well.
1058 assert((!RPI.isPaired() ||
1059 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1060 "Out of order callee saved regs!");
1062 // MachO's compact unwind format relies on all registers being stored in
1063 // adjacent register pairs.
1064 assert((!produceCompactUnwindFrame(MF) ||
1065 CC == CallingConv::PreserveMost ||
1067 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1068 RPI.Reg1 + 1 == RPI.Reg2))) &&
1069 "Callee-save registers not saved as adjacent register pair!");
1071 RPI.FrameIdx = CSI[i].getFrameIdx();
1073 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
1074 // Round up size of non-pair to pair size if we need to pad the
1075 // callee-save area to ensure 16-byte alignment.
1077 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1078 MFI.setObjectAlignment(RPI.FrameIdx, 16);
1079 AFI->setCalleeSaveStackHasFreeSpace(true);
1081 Offset -= RPI.isPaired() ? 16 : 8;
1082 assert(Offset % 8 == 0);
1083 RPI.Offset = Offset / 8;
1084 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1085 "Offset out of bounds for LDP/STP immediate");
1087 RegPairs.push_back(RPI);
1093 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1094 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1095 const std::vector<CalleeSavedInfo> &CSI,
1096 const TargetRegisterInfo *TRI) const {
1097 MachineFunction &MF = *MBB.getParent();
1098 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1100 SmallVector<RegPairInfo, 8> RegPairs;
1102 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
1103 const MachineRegisterInfo &MRI = MF.getRegInfo();
1105 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1107 RegPairInfo RPI = *RPII;
1108 unsigned Reg1 = RPI.Reg1;
1109 unsigned Reg2 = RPI.Reg2;
1112 // Issue sequence of spills for cs regs. The first spill may be converted
1113 // to a pre-decrement store later by emitPrologue if the callee-save stack
1114 // area allocation can't be combined with the local stack area allocation.
1116 // stp x22, x21, [sp, #0] // addImm(+0)
1117 // stp x20, x19, [sp, #16] // addImm(+2)
1118 // stp fp, lr, [sp, #32] // addImm(+4)
1119 // Rationale: This sequence saves uop updates compared to a sequence of
1120 // pre-increment spills like stp xi,xj,[sp,#-16]!
1121 // Note: Similar rationale and sequence for restores in epilog.
1123 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1125 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1126 DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1128 dbgs() << ", " << printReg(Reg2, TRI);
1129 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1131 dbgs() << ", " << RPI.FrameIdx+1;
1134 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1135 if (!MRI.isReserved(Reg1))
1136 MBB.addLiveIn(Reg1);
1137 if (RPI.isPaired()) {
1138 if (!MRI.isReserved(Reg2))
1139 MBB.addLiveIn(Reg2);
1140 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1141 MIB.addMemOperand(MF.getMachineMemOperand(
1142 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1143 MachineMemOperand::MOStore, 8, 8));
1145 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1146 .addReg(AArch64::SP)
1147 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
1148 .setMIFlag(MachineInstr::FrameSetup);
1149 MIB.addMemOperand(MF.getMachineMemOperand(
1150 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1151 MachineMemOperand::MOStore, 8, 8));
1156 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1157 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1158 std::vector<CalleeSavedInfo> &CSI,
1159 const TargetRegisterInfo *TRI) const {
1160 MachineFunction &MF = *MBB.getParent();
1161 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1163 SmallVector<RegPairInfo, 8> RegPairs;
1165 if (MI != MBB.end())
1166 DL = MI->getDebugLoc();
1168 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
1170 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
1172 RegPairInfo RPI = *RPII;
1173 unsigned Reg1 = RPI.Reg1;
1174 unsigned Reg2 = RPI.Reg2;
1176 // Issue sequence of restores for cs regs. The last restore may be converted
1177 // to a post-increment load later by emitEpilogue if the callee-save stack
1178 // area allocation can't be combined with the local stack area allocation.
1180 // ldp fp, lr, [sp, #32] // addImm(+4)
1181 // ldp x20, x19, [sp, #16] // addImm(+2)
1182 // ldp x22, x21, [sp, #0] // addImm(+0)
1183 // Note: see comment in spillCalleeSavedRegisters()
1186 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1188 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
1189 DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
1191 dbgs() << ", " << printReg(Reg2, TRI);
1192 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1194 dbgs() << ", " << RPI.FrameIdx+1;
1197 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
1198 if (RPI.isPaired()) {
1199 MIB.addReg(Reg2, getDefRegState(true));
1200 MIB.addMemOperand(MF.getMachineMemOperand(
1201 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1202 MachineMemOperand::MOLoad, 8, 8));
1204 MIB.addReg(Reg1, getDefRegState(true))
1205 .addReg(AArch64::SP)
1206 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
1207 .setMIFlag(MachineInstr::FrameDestroy);
1208 MIB.addMemOperand(MF.getMachineMemOperand(
1209 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1210 MachineMemOperand::MOLoad, 8, 8));
1215 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1216 BitVector &SavedRegs,
1217 RegScavenger *RS) const {
1218 // All calls are tail calls in GHC calling conv, and functions have no
1219 // prologue/epilogue.
1220 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1223 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1224 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
1225 MF.getSubtarget().getRegisterInfo());
1226 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1227 unsigned UnspilledCSGPR = AArch64::NoRegister;
1228 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
1230 MachineFrameInfo &MFI = MF.getFrameInfo();
1231 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1233 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
1234 ? RegInfo->getBaseRegister()
1235 : (unsigned)AArch64::NoRegister;
1237 unsigned SpillEstimate = SavedRegs.count();
1238 for (unsigned i = 0; CSRegs[i]; ++i) {
1239 unsigned Reg = CSRegs[i];
1240 unsigned PairedReg = CSRegs[i ^ 1];
1241 if (Reg == BasePointerReg)
1243 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
1246 SpillEstimate += 2; // Conservatively include FP+LR in the estimate
1247 unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
1249 // The frame record needs to be created by saving the appropriate registers
1250 if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
1251 SavedRegs.set(AArch64::FP);
1252 SavedRegs.set(AArch64::LR);
1255 unsigned ExtraCSSpill = 0;
1256 // Figure out which callee-saved registers to save/restore.
1257 for (unsigned i = 0; CSRegs[i]; ++i) {
1258 const unsigned Reg = CSRegs[i];
1260 // Add the base pointer register to SavedRegs if it is callee-save.
1261 if (Reg == BasePointerReg)
1264 bool RegUsed = SavedRegs.test(Reg);
1265 unsigned PairedReg = CSRegs[i ^ 1];
1267 if (AArch64::GPR64RegClass.contains(Reg) &&
1268 !RegInfo->isReservedReg(MF, Reg)) {
1269 UnspilledCSGPR = Reg;
1270 UnspilledCSGPRPaired = PairedReg;
1275 // MachO's compact unwind format relies on all registers being stored in
1277 // FIXME: the usual format is actually better if unwinding isn't needed.
1278 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
1279 SavedRegs.set(PairedReg);
1280 if (AArch64::GPR64RegClass.contains(PairedReg) &&
1281 !RegInfo->isReservedReg(MF, PairedReg))
1282 ExtraCSSpill = PairedReg;
1286 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1287 for (unsigned Reg : SavedRegs.set_bits())
1288 dbgs() << ' ' << printReg(Reg, RegInfo);
1291 // If any callee-saved registers are used, the frame cannot be eliminated.
1292 unsigned NumRegsSpilled = SavedRegs.count();
1293 bool CanEliminateFrame = NumRegsSpilled == 0;
1295 // The CSR spill slots have not been allocated yet, so estimateStackSize
1296 // won't include them.
1297 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
1298 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1299 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
1300 bool BigStack = (CFSize > EstimatedStackSizeLimit);
1301 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1302 AFI->setHasStackFrame(true);
1304 // Estimate if we might need to scavenge a register at some point in order
1305 // to materialize a stack offset. If so, either spill one additional
1306 // callee-saved register or reserve a special spill slot to facilitate
1307 // register scavenging. If we already spilled an extra callee-saved register
1308 // above to keep the number of spills even, we don't need to do anything else
1311 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
1312 DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
1313 << " to get a scratch register.\n");
1314 SavedRegs.set(UnspilledCSGPR);
1315 // MachO's compact unwind format relies on all registers being stored in
1316 // pairs, so if we need to spill one extra for BigStack, then we need to
1318 if (produceCompactUnwindFrame(MF))
1319 SavedRegs.set(UnspilledCSGPRPaired);
1320 ExtraCSSpill = UnspilledCSGPRPaired;
1321 NumRegsSpilled = SavedRegs.count();
1324 // If we didn't find an extra callee-saved register to spill, create
1325 // an emergency spill slot.
1326 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
1327 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1328 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
1329 unsigned Size = TRI->getSpillSize(RC);
1330 unsigned Align = TRI->getSpillAlignment(RC);
1331 int FI = MFI.CreateStackObject(Size, Align, false);
1332 RS->addScavengingFrameIndex(FI);
1333 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1334 << " as the emergency spill slot.\n");
1338 // Round up to register pair alignment to avoid additional SP adjustment
1340 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
1343 bool AArch64FrameLowering::enableStackSlotScavenging(
1344 const MachineFunction &MF) const {
1345 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1346 return AFI->hasCalleeSaveStackFreeSpace();