1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the PPC implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39 cl::desc("Enable spills in prologue to vector registers."),
40 cl::init(false), cl::Hidden);
42 /// VRRegNo - Map from a numbered VR register to its enum value.
44 static const MCPhysReg VRRegNo[] = {
45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
53 return STI.isPPC64() ? 16 : 8;
55 return STI.isPPC64() ? 16 : 4;
58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
60 return STI.isPPC64() ? 40 : 20;
61 return STI.isELFv2ABI() ? 24 : 40;
64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
65 // First slot in the general register save area.
66 return STI.isPPC64() ? -8U : -4U;
69 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
70 if (STI.isAIXABI() || STI.isPPC64())
71 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
78 // Third slot in the general purpose register save area.
79 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
82 // Second slot in the general purpose register save area.
83 return STI.isPPC64() ? -16U : -8U;
86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
87 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
91 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
92 STI.getPlatformStackAlignment(), 0),
93 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
94 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
95 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
96 LinkageSize(computeLinkageSize(Subtarget)),
97 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
98 CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102 unsigned &NumEntries) const {
104 // Floating-point register save area offsets.
105 #define CALLEE_SAVED_FPRS \
125 // 32-bit general purpose register save area offsets shared by ELF and
126 // AIX. AIX has an extra CSR with r13.
127 #define CALLEE_SAVED_GPRS32 \
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
183 // Note that the offsets here overlap, but this is fixed up in
184 // processFunctionBeforeFrameFinalized.
186 static const SpillSlot ELFOffsets32[] = {
190 // CR save area offset. We map each of the nonvolatile CR fields
191 // to the slot for CR2, which is the first of the nonvolatile CR
192 // fields to be assigned, so that we only allocate one save slot.
193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
196 // VRSAVE save area offset.
201 // SPE register save area (overlaps Vector save area).
221 static const SpillSlot ELFOffsets64[] = {
225 // VRSAVE save area offset.
230 static const SpillSlot AIXOffsets32[] = {
233 // Add AIX's extra CSR.
235 // TODO: Update when we add vector support for AIX.
238 static const SpillSlot AIXOffsets64[] = {
241 // TODO: Update when we add vector support for AIX.
244 if (Subtarget.is64BitELFABI()) {
245 NumEntries = array_lengthof(ELFOffsets64);
249 if (Subtarget.is32BitELFABI()) {
250 NumEntries = array_lengthof(ELFOffsets32);
254 assert(Subtarget.isAIXABI() && "Unexpected ABI.");
256 if (Subtarget.isPPC64()) {
257 NumEntries = array_lengthof(AIXOffsets64);
261 NumEntries = array_lengthof(AIXOffsets32);
265 /// RemoveVRSaveCode - We have found that this function does not need any code
266 /// to manipulate the VRSAVE register, even though it uses vector registers.
267 /// This can happen when the only registers used are known to be live in or out
268 /// of the function. Remove all of the VRSAVE related code from the function.
269 /// FIXME: The removal of the code results in a compile failure at -O0 when the
270 /// function contains a function call, as the GPR containing original VRSAVE
271 /// contents is spilled and reloaded around the call. Without the prolog code,
272 /// the spill instruction refers to an undefined register. This code needs
273 /// to account for all uses of that GPR.
274 static void RemoveVRSaveCode(MachineInstr &MI) {
275 MachineBasicBlock *Entry = MI.getParent();
276 MachineFunction *MF = Entry->getParent();
278 // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
279 MachineBasicBlock::iterator MBBI = MI;
281 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
282 MBBI->eraseFromParent();
284 bool RemovedAllMTVRSAVEs = true;
285 // See if we can find and remove the MTVRSAVE instruction from all of the
287 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
288 // If last instruction is a return instruction, add an epilogue
289 if (I->isReturnBlock()) {
290 bool FoundIt = false;
291 for (MBBI = I->end(); MBBI != I->begin(); ) {
293 if (MBBI->getOpcode() == PPC::MTVRSAVE) {
294 MBBI->eraseFromParent(); // remove it.
299 RemovedAllMTVRSAVEs &= FoundIt;
303 // If we found and removed all MTVRSAVE instructions, remove the read of
305 if (RemovedAllMTVRSAVEs) {
307 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
309 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
310 MBBI->eraseFromParent();
313 // Finally, nuke the UPDATE_VRSAVE.
314 MI.eraseFromParent();
317 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
318 // instruction selector. Based on the vector registers that have been used,
319 // transform this into the appropriate ORI instruction.
320 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
321 MachineFunction *MF = MI.getParent()->getParent();
322 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
323 DebugLoc dl = MI.getDebugLoc();
325 const MachineRegisterInfo &MRI = MF->getRegInfo();
326 unsigned UsedRegMask = 0;
327 for (unsigned i = 0; i != 32; ++i)
328 if (MRI.isPhysRegModified(VRRegNo[i]))
329 UsedRegMask |= 1 << (31-i);
331 // Live in and live out values already must be in the mask, so don't bother
333 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
334 unsigned RegNo = TRI->getEncodingValue(LI.first);
335 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
336 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
339 // Live out registers appear as use operands on return instructions.
340 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
341 UsedRegMask != 0 && BI != BE; ++BI) {
342 const MachineBasicBlock &MBB = *BI;
343 if (!MBB.isReturnBlock())
345 const MachineInstr &Ret = MBB.back();
346 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
347 const MachineOperand &MO = Ret.getOperand(I);
348 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
350 unsigned RegNo = TRI->getEncodingValue(MO.getReg());
351 UsedRegMask &= ~(1 << (31-RegNo));
355 // If no registers are used, turn this into a copy.
356 if (UsedRegMask == 0) {
357 // Remove all VRSAVE code.
358 RemoveVRSaveCode(MI);
362 Register SrcReg = MI.getOperand(1).getReg();
363 Register DstReg = MI.getOperand(0).getReg();
365 if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
366 if (DstReg != SrcReg)
367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
369 .addImm(UsedRegMask);
371 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
372 .addReg(SrcReg, RegState::Kill)
373 .addImm(UsedRegMask);
374 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
375 if (DstReg != SrcReg)
376 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
378 .addImm(UsedRegMask >> 16);
380 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
381 .addReg(SrcReg, RegState::Kill)
382 .addImm(UsedRegMask >> 16);
384 if (DstReg != SrcReg)
385 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
387 .addImm(UsedRegMask >> 16);
389 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
390 .addReg(SrcReg, RegState::Kill)
391 .addImm(UsedRegMask >> 16);
393 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
394 .addReg(DstReg, RegState::Kill)
395 .addImm(UsedRegMask & 0xFFFF);
398 // Remove the old UPDATE_VRSAVE instruction.
399 MI.eraseFromParent();
402 static bool spillsCR(const MachineFunction &MF) {
403 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
404 return FuncInfo->isCRSpilled();
407 static bool spillsVRSAVE(const MachineFunction &MF) {
408 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
409 return FuncInfo->isVRSAVESpilled();
412 static bool hasSpills(const MachineFunction &MF) {
413 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
414 return FuncInfo->hasSpills();
417 static bool hasNonRISpills(const MachineFunction &MF) {
418 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
419 return FuncInfo->hasNonRISpills();
422 /// MustSaveLR - Return true if this function requires that we save the LR
423 /// register onto the stack in the prolog and restore it in the epilog of the
425 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
426 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
428 // We need a save/restore of LR if there is any def of LR (which is
429 // defined by calls, including the PIC setup sequence), or if there is
430 // some use of the LR stack slot (e.g. for builtin_return_address).
431 // (LR comes in 32 and 64 bit versions.)
432 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
433 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
436 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
437 /// call frame size. Update the MachineFunction object with the stack size.
439 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
440 bool UseEstimate) const {
441 unsigned NewMaxCallFrameSize = 0;
442 unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
443 &NewMaxCallFrameSize);
444 MF.getFrameInfo().setStackSize(FrameSize);
445 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
449 /// determineFrameLayout - Determine the size of the frame and maximum call
452 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
454 unsigned *NewMaxCallFrameSize) const {
455 const MachineFrameInfo &MFI = MF.getFrameInfo();
456 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
458 // Get the number of bytes to allocate from the FrameInfo
460 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
462 // Get stack alignments. The frame must be aligned to the greatest of these:
463 Align TargetAlign = getStackAlign(); // alignment required per the ABI
464 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
465 Align Alignment = std::max(TargetAlign, MaxAlign);
467 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
469 unsigned LR = RegInfo->getRARegister();
470 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
471 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
472 !MFI.adjustsStack() && // No calls.
473 !MustSaveLR(MF, LR) && // No need to save LR.
474 !FI->mustSaveTOC() && // No need to save TOC.
475 !RegInfo->hasBasePointer(MF); // No special alignment.
477 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
478 // code if all local vars are reg-allocated.
479 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
481 // Check whether we can skip adjusting the stack pointer (by using red zone)
482 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
487 // Get the maximum call frame size of all the calls.
488 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
490 // Maximum call frame needs to be at least big enough for linkage area.
491 unsigned minCallFrameSize = getLinkageSize();
492 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
494 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
495 // that allocations will be aligned.
496 if (MFI.hasVarSizedObjects())
497 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
499 // Update the new max call frame size if the caller passes in a valid pointer.
500 if (NewMaxCallFrameSize)
501 *NewMaxCallFrameSize = maxCallFrameSize;
503 // Include call frame size in total.
504 FrameSize += maxCallFrameSize;
506 // Make sure the frame is aligned.
507 FrameSize = alignTo(FrameSize, Alignment);
512 // hasFP - Return true if the specified function actually has a dedicated frame
514 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
515 const MachineFrameInfo &MFI = MF.getFrameInfo();
516 // FIXME: This is pretty much broken by design: hasFP() might be called really
517 // early, before the stack layout was calculated and thus hasFP() might return
518 // true or false here depending on the time of call.
519 return (MFI.getStackSize()) && needsFP(MF);
522 // needsFP - Return true if the specified function should have a dedicated frame
523 // pointer register. This is true if the function has variable sized allocas or
524 // if frame pointer elimination is disabled.
525 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
526 const MachineFrameInfo &MFI = MF.getFrameInfo();
528 // Naked functions have no stack frame pushed, so we don't have a frame
530 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
533 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
534 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
535 (MF.getTarget().Options.GuaranteedTailCallOpt &&
536 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
539 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
540 bool is31 = needsFP(MF);
541 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
542 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
544 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
545 bool HasBP = RegInfo->hasBasePointer(MF);
546 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
547 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
549 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
551 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
553 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
554 MachineOperand &MO = MBBI->getOperand(I);
558 switch (MO.getReg()) {
577 /* This function will do the following:
578 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
579 respectively (defaults recommended by the ABI) and return true
580 - If MBB is not an entry block, initialize the register scavenger and look
581 for available registers.
582 - If the defaults (R0/R12) are available, return true
583 - If TwoUniqueRegsRequired is set to true, it looks for two unique
584 registers. Otherwise, look for a single available register.
585 - If the required registers are found, set SR1 and SR2 and return true.
586 - If the required registers are not found, set SR2 or both SR1 and SR2 to
587 PPC::NoRegister and return false.
589 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
590 is not set, this function will attempt to find two different registers, but
591 still return true if only one register is available (and set SR1 == SR2).
594 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
596 bool TwoUniqueRegsRequired,
598 Register *SR2) const {
600 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
601 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
603 // Set the defaults for the two scratch registers.
608 assert (SR1 && "Asking for the second scratch register but not the first?");
612 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
613 if ((UseAtEnd && MBB->isReturnBlock()) ||
614 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
617 RS.enterBasicBlock(*MBB);
619 if (UseAtEnd && !MBB->empty()) {
620 // The scratch register will be used at the end of the block, so must
621 // consider all registers used within the block
623 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
624 // If no terminator, back iterator up to previous instruction.
625 if (MBBI == MBB->end())
626 MBBI = std::prev(MBBI);
628 if (MBBI != MBB->begin())
632 // If the two registers are available, we're all good.
633 // Note that we only return here if both R0 and R12 are available because
634 // although the function may not require two unique registers, it may benefit
635 // from having two so we should try to provide them.
636 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
639 // Get the list of callee-saved registers for the target.
640 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
641 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
643 // Get all the available registers in the block.
644 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
647 // We shouldn't use callee-saved registers as scratch registers as they may be
648 // available when looking for a candidate block for shrink wrapping but not
649 // available when the actual prologue/epilogue is being emitted because they
650 // were added as live-in to the prologue block by PrologueEpilogueInserter.
651 for (int i = 0; CSRegs[i]; ++i)
654 // Set the first scratch register to the first available one.
656 int FirstScratchReg = BV.find_first();
657 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
660 // If there is another one available, set the second scratch register to that.
661 // Otherwise, set it to either PPC::NoRegister if this function requires two
662 // or to whatever SR1 is set to if this function doesn't require two.
664 int SecondScratchReg = BV.find_next(*SR1);
665 if (SecondScratchReg != -1)
666 *SR2 = SecondScratchReg;
668 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
671 // Now that we've done our best to provide both registers, double check
672 // whether we were unable to provide enough.
673 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
679 // We need a scratch register for spilling LR and for spilling CR. By default,
680 // we use two scratch registers to hide latency. However, if only one scratch
681 // register is available, we can adjust for that by not overlapping the spill
682 // code. However, if we need to realign the stack (i.e. have a base pointer)
683 // and the stack frame is large, we need two scratch registers.
685 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
686 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
687 MachineFunction &MF = *(MBB->getParent());
688 bool HasBP = RegInfo->hasBasePointer(MF);
689 unsigned FrameSize = determineFrameLayout(MF);
690 int NegFrameSize = -FrameSize;
691 bool IsLargeFrame = !isInt<16>(NegFrameSize);
692 MachineFrameInfo &MFI = MF.getFrameInfo();
693 Align MaxAlign = MFI.getMaxAlign();
694 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
696 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
699 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
700 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
702 return findScratchRegister(TmpMBB, false,
703 twoUniqueScratchRegsRequired(TmpMBB));
706 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
707 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
709 return findScratchRegister(TmpMBB, true);
712 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
713 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
714 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
716 // Abort if there is no register info or function info.
720 // Only move the stack update on ELFv2 ABI and PPC64.
721 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
724 // Check the frame size first and return false if it does not fit the
726 // We need a non-zero frame size as well as a frame that will fit in the red
727 // zone. This is because by moving the stack pointer update we are now storing
728 // to the red zone until the stack pointer is updated. If we get an interrupt
729 // inside the prologue but before the stack update we now have a number of
730 // stores to the red zone and those stores must all fit.
731 MachineFrameInfo &MFI = MF.getFrameInfo();
732 unsigned FrameSize = MFI.getStackSize();
733 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
736 // Frame pointers and base pointers complicate matters so don't do anything
737 // if we have them. For example having a frame pointer will sometimes require
738 // a copy of r1 into r31 and that makes keeping track of updates to r1 more
740 if (hasFP(MF) || RegInfo->hasBasePointer(MF))
743 // Calls to fast_cc functions use different rules for passing parameters on
744 // the stack from the ABI and using PIC base in the function imposes
745 // similar restrictions to using the base pointer. It is not generally safe
746 // to move the stack pointer update in these situations.
747 if (FI->hasFastCall() || FI->usesPICBase())
750 // Finally we can move the stack update if we do not require register
751 // scavenging. Register scavenging can introduce more spills and so
752 // may make the frame size larger than we have computed.
753 return !RegInfo->requiresFrameIndexScavenging(MF);
756 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
757 MachineBasicBlock &MBB) const {
758 MachineBasicBlock::iterator MBBI = MBB.begin();
759 MachineFrameInfo &MFI = MF.getFrameInfo();
760 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
761 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
762 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
764 MachineModuleInfo &MMI = MF.getMMI();
765 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
767 // AIX assembler does not support cfi directives.
768 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
770 // Get processor type.
771 bool isPPC64 = Subtarget.isPPC64();
773 bool isSVR4ABI = Subtarget.isSVR4ABI();
774 bool isAIXABI = Subtarget.isAIXABI();
775 bool isELFv2ABI = Subtarget.isELFv2ABI();
776 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
778 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
781 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
782 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
784 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
785 HandleVRSaveUpdate(*MBBI, TII);
790 // Move MBBI back to the beginning of the prologue block.
793 // Work out frame sizes.
794 unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
795 int NegFrameSize = -FrameSize;
796 if (!isInt<32>(NegFrameSize))
797 llvm_unreachable("Unhandled stack size!");
799 if (MFI.isFrameAddressTaken())
800 replaceFPWithRealFP(MF);
802 // Check if the link register (LR) must be saved.
803 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
804 bool MustSaveLR = FI->mustSaveLR();
805 bool MustSaveTOC = FI->mustSaveTOC();
806 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
807 bool MustSaveCR = !MustSaveCRs.empty();
808 // Do we have a frame pointer and/or base pointer for this function?
809 bool HasFP = hasFP(MF);
810 bool HasBP = RegInfo->hasBasePointer(MF);
811 bool HasRedZone = isPPC64 || !isSVR4ABI;
813 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
814 Register BPReg = RegInfo->getBaseRegister(MF);
815 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
816 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
817 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
819 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
820 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
821 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
823 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
825 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
827 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
829 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
831 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
833 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
835 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
837 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
839 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
841 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
843 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
844 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
845 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
846 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
847 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
848 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
850 // Using the same bool variable as below to suppress compiler warnings.
851 // Stack probe requires two scratch registers, one for old sp, one for large
852 // frame and large probe size.
853 bool SingleScratchReg = findScratchRegister(
855 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
856 &ScratchReg, &TempReg);
857 assert(SingleScratchReg &&
858 "Required number of registers not available in this block");
860 SingleScratchReg = ScratchReg == TempReg;
862 int LROffset = getReturnSaveOffset();
867 MachineFrameInfo &MFI = MF.getFrameInfo();
868 int FPIndex = FI->getFramePointerSaveIndex();
869 assert(FPIndex && "No Frame Pointer Save Slot!");
870 FPOffset = MFI.getObjectOffset(FPIndex);
872 FPOffset = getFramePointerSaveOffset();
879 MachineFrameInfo &MFI = MF.getFrameInfo();
880 int BPIndex = FI->getBasePointerSaveIndex();
881 assert(BPIndex && "No Base Pointer Save Slot!");
882 BPOffset = MFI.getObjectOffset(BPIndex);
884 BPOffset = getBasePointerSaveOffset();
889 if (FI->usesPICBase()) {
890 MachineFrameInfo &MFI = MF.getFrameInfo();
891 int PBPIndex = FI->getPICBasePointerSaveIndex();
892 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
893 PBPOffset = MFI.getObjectOffset(PBPIndex);
896 // Get stack alignments.
897 Align MaxAlign = MFI.getMaxAlign();
898 if (HasBP && MaxAlign > 1)
899 assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
901 // Frames of 32KB & larger require special handling because they cannot be
902 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
903 bool isLargeFrame = !isInt<16>(NegFrameSize);
905 // Check if we can move the stack update instruction (stdu) down the prologue
906 // past the callee saves. Hopefully this will avoid the situation where the
907 // saves are waiting for the update on the store with update to complete.
908 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
909 bool MovingStackUpdateDown = false;
911 // Check if we can move the stack update.
912 if (stackUpdateCanBeMoved(MF)) {
913 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
914 for (CalleeSavedInfo CSI : Info) {
915 int FrIdx = CSI.getFrameIdx();
916 // If the frame index is not negative the callee saved info belongs to a
917 // stack object that is not a fixed stack object. We ignore non-fixed
918 // stack objects because we won't move the stack update pointer past them.
922 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
924 MovingStackUpdateDown = true;
926 // We need all of the Frame Indices to meet these conditions.
927 // If they do not, abort the whole operation.
928 StackUpdateLoc = MBBI;
929 MovingStackUpdateDown = false;
934 // If the operation was not aborted then update the object offset.
935 if (MovingStackUpdateDown) {
936 for (CalleeSavedInfo CSI : Info) {
937 int FrIdx = CSI.getFrameIdx();
939 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
944 // Where in the prologue we move the CR fields depends on how many scratch
945 // registers we have, and if we need to save the link register or not. This
946 // lambda is to avoid duplicating the logic in 2 places.
947 auto BuildMoveFromCR = [&]() {
948 if (isELFv2ABI && MustSaveCRs.size() == 1) {
949 // In the ELFv2 ABI, we are not required to save all CR fields.
950 // If only one CR field is clobbered, it is more efficient to use
951 // mfocrf to selectively save just that field, because mfocrf has short
952 // latency compares to mfcr.
953 assert(isPPC64 && "V2 ABI is 64-bit only.");
954 MachineInstrBuilder MIB =
955 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
956 MIB.addReg(MustSaveCRs[0], RegState::Kill);
958 MachineInstrBuilder MIB =
959 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
960 for (unsigned CRfield : MustSaveCRs)
961 MIB.addReg(CRfield, RegState::ImplicitKill);
965 // If we need to spill the CR and the LR but we don't have two separate
966 // registers available, we must spill them one at a time
967 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
969 BuildMI(MBB, MBBI, dl, StoreWordInst)
970 .addReg(TempReg, getKillRegState(true))
971 .addImm(CRSaveOffset)
976 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
978 if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
983 BuildMI(MBB, MBBI, dl, StoreInst)
987 if (FI->usesPICBase())
988 BuildMI(MBB, MBBI, dl, StoreInst)
993 BuildMI(MBB, MBBI, dl, StoreInst)
1000 BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1001 .addReg(ScratchReg, getKillRegState(true))
1006 !(SingleScratchReg && MustSaveLR)) {
1007 assert(HasRedZone && "A red zone is always available on PPC64");
1008 BuildMI(MBB, MBBI, dl, StoreWordInst)
1009 .addReg(TempReg, getKillRegState(true))
1010 .addImm(CRSaveOffset)
1014 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1018 // Adjust stack pointer: r1 += NegFrameSize.
1019 // If there is a preferred stack alignment, align R1 now
1021 if (HasBP && HasRedZone) {
1022 // Save a copy of r1 as the base pointer.
1023 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1028 // Have we generated a STUX instruction to claim stack frame? If so,
1029 // the negated frame size will be placed in ScratchReg.
1030 bool HasSTUX = false;
1032 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
1033 // pointer is always stored at SP, we will get a free probe due to an essential
1034 // STU(X) instruction.
1035 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
1036 // To be consistent with other targets, a pseudo instruction is emitted and
1037 // will be later expanded in `inlineStackProbe`.
1038 BuildMI(MBB, MBBI, dl,
1039 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
1040 : PPC::PROBED_STACKALLOC_32))
1042 .addDef(TempReg) // TempReg stores the old sp.
1043 .addImm(NegFrameSize);
1044 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
1045 // update the ScratchReg to meet the assumption that ScratchReg contains
1046 // the NegFrameSize. This solution is rather tricky.
1048 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1054 // This condition must be kept in sync with canUseAsPrologue.
1055 if (HasBP && MaxAlign > 1) {
1057 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1060 .addImm(64 - Log2(MaxAlign));
1062 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1065 .addImm(32 - Log2(MaxAlign))
1067 if (!isLargeFrame) {
1068 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1069 .addReg(ScratchReg, RegState::Kill)
1070 .addImm(NegFrameSize);
1072 assert(!SingleScratchReg && "Only a single scratch reg available");
1073 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1074 .addImm(NegFrameSize >> 16);
1075 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1076 .addReg(TempReg, RegState::Kill)
1077 .addImm(NegFrameSize & 0xFFFF);
1078 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1079 .addReg(ScratchReg, RegState::Kill)
1080 .addReg(TempReg, RegState::Kill);
1083 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1084 .addReg(SPReg, RegState::Kill)
1086 .addReg(ScratchReg);
1089 } else if (!isLargeFrame) {
1090 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1092 .addImm(NegFrameSize)
1096 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1097 .addImm(NegFrameSize >> 16);
1098 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1099 .addReg(ScratchReg, RegState::Kill)
1100 .addImm(NegFrameSize & 0xFFFF);
1101 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1102 .addReg(SPReg, RegState::Kill)
1104 .addReg(ScratchReg);
1109 // Save the TOC register after the stack pointer update if a prologue TOC
1110 // save is required for the function.
1112 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1113 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1114 .addReg(TOCReg, getKillRegState(true))
1115 .addImm(TOCSaveOffset)
1120 assert(!isPPC64 && "A red zone is always available on PPC64");
1122 // The negated frame size is in ScratchReg, and the SPReg has been
1123 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1124 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1125 // the stack frame (i.e. the old SP), ideally, we would put the old
1126 // SP into a register and use it as the base for the stores. The
1127 // problem is that the only available register may be ScratchReg,
1128 // which could be R0, and R0 cannot be used as a base address.
1130 // First, set ScratchReg to the old SP. This may need to be modified
1132 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1133 .addReg(ScratchReg, RegState::Kill)
1136 if (ScratchReg == PPC::R0) {
1137 // R0 cannot be used as a base register, but it can be used as an
1138 // index in a store-indexed.
1141 // R0 += (FPOffset-LastOffset).
1142 // Need addic, since addi treats R0 as 0.
1143 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1145 .addImm(FPOffset-LastOffset);
1146 LastOffset = FPOffset;
1147 // Store FP into *R0.
1148 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1149 .addReg(FPReg, RegState::Kill) // Save FP.
1151 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1153 if (FI->usesPICBase()) {
1154 // R0 += (PBPOffset-LastOffset).
1155 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1157 .addImm(PBPOffset-LastOffset);
1158 LastOffset = PBPOffset;
1159 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1160 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1162 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1165 // R0 += (BPOffset-LastOffset).
1166 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1168 .addImm(BPOffset-LastOffset);
1169 LastOffset = BPOffset;
1170 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1171 .addReg(BPReg, RegState::Kill) // Save BP.
1173 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1174 // BP = R0-LastOffset
1175 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1176 .addReg(ScratchReg, RegState::Kill)
1177 .addImm(-LastOffset);
1180 // ScratchReg is not R0, so use it as the base register. It is
1181 // already set to the old SP, so we can use the offsets directly.
1183 // Now that the stack frame has been allocated, save all the necessary
1184 // registers using ScratchReg as the base address.
1186 BuildMI(MBB, MBBI, dl, StoreInst)
1189 .addReg(ScratchReg);
1190 if (FI->usesPICBase())
1191 BuildMI(MBB, MBBI, dl, StoreInst)
1194 .addReg(ScratchReg);
1196 BuildMI(MBB, MBBI, dl, StoreInst)
1199 .addReg(ScratchReg);
1200 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1201 .addReg(ScratchReg, RegState::Kill)
1202 .addReg(ScratchReg);
1206 // The frame size is a known 16-bit constant (fitting in the immediate
1207 // field of STWU). To be here we have to be compiling for PPC32.
1208 // Since the SPReg has been decreased by FrameSize, add it back to each
1211 BuildMI(MBB, MBBI, dl, StoreInst)
1213 .addImm(FrameSize + FPOffset)
1215 if (FI->usesPICBase())
1216 BuildMI(MBB, MBBI, dl, StoreInst)
1218 .addImm(FrameSize + PBPOffset)
1221 BuildMI(MBB, MBBI, dl, StoreInst)
1223 .addImm(FrameSize + BPOffset)
1225 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1232 // Add Call Frame Information for the instructions we generated above.
1237 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1238 // because if the stack needed aligning then CFA won't be at a fixed
1239 // offset from FP/SP.
1240 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1241 CFIIndex = MF.addFrameInst(
1242 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1244 // Adjust the definition of CFA to account for the change in SP.
1245 assert(NegFrameSize);
1246 CFIIndex = MF.addFrameInst(
1247 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1249 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1250 .addCFIIndex(CFIIndex);
1253 // Describe where FP was saved, at a fixed offset from CFA.
1254 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1255 CFIIndex = MF.addFrameInst(
1256 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1257 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1258 .addCFIIndex(CFIIndex);
1261 if (FI->usesPICBase()) {
1262 // Describe where FP was saved, at a fixed offset from CFA.
1263 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1264 CFIIndex = MF.addFrameInst(
1265 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1266 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1267 .addCFIIndex(CFIIndex);
1271 // Describe where BP was saved, at a fixed offset from CFA.
1272 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1273 CFIIndex = MF.addFrameInst(
1274 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1275 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276 .addCFIIndex(CFIIndex);
1280 // Describe where LR was saved, at a fixed offset from CFA.
1281 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1282 CFIIndex = MF.addFrameInst(
1283 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1284 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285 .addCFIIndex(CFIIndex);
1289 // If there is a frame pointer, copy R1 into R31
1291 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1295 if (!HasBP && needsCFI) {
1296 // Change the definition of CFA from SP+offset to FP+offset, because SP
1297 // will change at every alloca.
1298 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1299 unsigned CFIIndex = MF.addFrameInst(
1300 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1302 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1303 .addCFIIndex(CFIIndex);
1308 // Describe where callee saved registers were saved, at fixed offsets from
1310 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1311 for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1312 unsigned Reg = CSI[I].getReg();
1313 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1315 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1316 // subregisters of CR2. We just need to emit a move of CR2.
1317 if (PPC::CRBITRCRegClass.contains(Reg))
1320 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1323 // For SVR4, don't emit a move for the CR spill slot if we haven't
1325 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1329 // For 64-bit SVR4 when we have spilled CRs, the spill location
1330 // is SP+8, not a frame-relative slot.
1331 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1332 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1333 // the whole CR word. In the ELFv2 ABI, every CR that was
1334 // actually saved gets its own CFI record.
1335 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1336 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1337 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1338 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1339 .addCFIIndex(CFIIndex);
1343 if (CSI[I].isSpilledToReg()) {
1344 unsigned SpilledReg = CSI[I].getDstReg();
1345 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1346 nullptr, MRI->getDwarfRegNum(Reg, true),
1347 MRI->getDwarfRegNum(SpilledReg, true)));
1348 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1349 .addCFIIndex(CFIRegister);
1351 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1352 // We have changed the object offset above but we do not want to change
1353 // the actual offsets in the CFI instruction so we have to undo the
1354 // offset change here.
1355 if (MovingStackUpdateDown)
1356 Offset -= NegFrameSize;
1358 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1359 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1360 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1361 .addCFIIndex(CFIIndex);
1367 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1368 MachineBasicBlock &PrologMBB) const {
1369 // TODO: Generate CFI instructions.
1370 bool isPPC64 = Subtarget.isPPC64();
1371 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1372 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1373 MachineFrameInfo &MFI = MF.getFrameInfo();
1374 MachineModuleInfo &MMI = MF.getMMI();
1375 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1376 // AIX assembler does not support cfi directives.
1377 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1378 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1379 int Opc = MI.getOpcode();
1380 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1382 if (StackAllocMIPos == PrologMBB.end())
1384 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1385 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1386 MachineInstr &MI = *StackAllocMIPos;
1387 int64_t NegFrameSize = MI.getOperand(2).getImm();
1388 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
1389 assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1390 int64_t NumBlocks = NegFrameSize / NegProbeSize;
1391 int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1392 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1393 Register ScratchReg = MI.getOperand(0).getReg();
1394 Register FPReg = MI.getOperand(1).getReg();
1395 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1396 bool HasBP = RegInfo->hasBasePointer(MF);
1397 Align MaxAlign = MFI.getMaxAlign();
1398 // Initialize current frame pointer.
1399 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1400 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1401 // Subroutines to generate .cfi_* directives.
1402 auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1403 MachineBasicBlock::iterator MBBI, Register Reg) {
1404 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1405 unsigned CFIIndex = MF.addFrameInst(
1406 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1407 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1408 .addCFIIndex(CFIIndex);
1410 auto buildDefCFA = [&](MachineBasicBlock &MBB,
1411 MachineBasicBlock::iterator MBBI, Register Reg,
1413 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1414 unsigned CFIIndex = MBB.getParent()->addFrameInst(
1415 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1416 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1417 .addCFIIndex(CFIIndex);
1419 // Subroutine to determine if we can use the Imm as part of d-form.
1420 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1421 // Subroutine to materialize the Imm into TempReg.
1422 auto MaterializeImm = [&](MachineBasicBlock &MBB,
1423 MachineBasicBlock::iterator MBBI, int64_t Imm,
1424 Register &TempReg) {
1425 assert(isInt<32>(Imm) && "Unhandled imm");
1427 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1430 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1432 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1434 .addImm(Imm & 0xFFFF);
1437 // Subroutine to store frame pointer and decrease stack pointer by probe size.
1438 auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1439 MachineBasicBlock::iterator MBBI, int64_t NegSize,
1440 Register NegSizeReg, bool UseDForm) {
1442 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1447 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1450 .addReg(NegSizeReg);
1452 // Use FPReg to calculate CFA.
1454 buildDefCFA(PrologMBB, {MI}, FPReg, 0);
1455 // For case HasBP && MaxAlign > 1, we have to align the SP by performing
1456 // SP = SP - SP % MaxAlign.
1457 if (HasBP && MaxAlign > 1) {
1459 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1462 .addImm(64 - Log2(MaxAlign));
1464 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1467 .addImm(32 - Log2(MaxAlign))
1469 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
1474 // Probe residual part.
1475 if (NegResidualSize) {
1476 bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1477 if (!ResidualUseDForm)
1478 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
1479 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
1482 bool UseDForm = CanUseDForm(NegProbeSize);
1483 // If number of blocks is small, just probe them directly.
1484 if (NumBlocks < 3) {
1486 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1487 for (int i = 0; i < NumBlocks; ++i)
1488 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
1490 // Restore using SPReg to calculate CFA.
1491 buildDefCFAReg(PrologMBB, {MI}, SPReg);
1494 // Since CTR is a volatile register and current shrinkwrap implementation
1495 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1496 // CTR loop to probe.
1497 // Calculate trip count and stores it in CTRReg.
1498 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
1499 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1500 .addReg(ScratchReg, RegState::Kill);
1502 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1503 // Create MBBs of the loop.
1504 MachineFunction::iterator MBBInsertPoint =
1505 std::next(PrologMBB.getIterator());
1506 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1507 MF.insert(MBBInsertPoint, LoopMBB);
1508 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1509 MF.insert(MBBInsertPoint, ExitMBB);
1510 // Synthesize the loop body.
1511 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1513 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1515 LoopMBB->addSuccessor(ExitMBB);
1516 LoopMBB->addSuccessor(LoopMBB);
1517 // Synthesize the exit MBB.
1518 ExitMBB->splice(ExitMBB->end(), &PrologMBB,
1519 std::next(MachineBasicBlock::iterator(MI)),
1521 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
1522 PrologMBB.addSuccessor(LoopMBB);
1524 // Restore using SPReg to calculate CFA.
1525 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1528 recomputeLiveIns(*LoopMBB);
1529 recomputeLiveIns(*ExitMBB);
1532 MI.eraseFromParent();
1535 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1536 MachineBasicBlock &MBB) const {
1537 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1540 if (MBBI != MBB.end())
1541 dl = MBBI->getDebugLoc();
1543 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1544 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1546 // Get alignment info so we know how to restore the SP.
1547 const MachineFrameInfo &MFI = MF.getFrameInfo();
1549 // Get the number of bytes allocated from the FrameInfo.
1550 int FrameSize = MFI.getStackSize();
1552 // Get processor type.
1553 bool isPPC64 = Subtarget.isPPC64();
1555 bool isSVR4ABI = Subtarget.isSVR4ABI();
1557 // Check if the link register (LR) has been saved.
1558 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1559 bool MustSaveLR = FI->mustSaveLR();
1560 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1561 bool MustSaveCR = !MustSaveCRs.empty();
1562 // Do we have a frame pointer and/or base pointer for this function?
1563 bool HasFP = hasFP(MF);
1564 bool HasBP = RegInfo->hasBasePointer(MF);
1565 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1567 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1568 Register BPReg = RegInfo->getBaseRegister(MF);
1569 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1570 Register ScratchReg;
1571 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1572 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1574 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1576 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1578 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1580 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1582 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1584 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1586 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1588 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1590 int LROffset = getReturnSaveOffset();
1594 // Using the same bool variable as below to suppress compiler warnings.
1595 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1597 assert(SingleScratchReg &&
1598 "Could not find an available scratch register");
1600 SingleScratchReg = ScratchReg == TempReg;
1604 int FPIndex = FI->getFramePointerSaveIndex();
1605 assert(FPIndex && "No Frame Pointer Save Slot!");
1606 FPOffset = MFI.getObjectOffset(FPIndex);
1608 FPOffset = getFramePointerSaveOffset();
1615 int BPIndex = FI->getBasePointerSaveIndex();
1616 assert(BPIndex && "No Base Pointer Save Slot!");
1617 BPOffset = MFI.getObjectOffset(BPIndex);
1619 BPOffset = getBasePointerSaveOffset();
1624 if (FI->usesPICBase()) {
1625 int PBPIndex = FI->getPICBasePointerSaveIndex();
1626 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1627 PBPOffset = MFI.getObjectOffset(PBPIndex);
1630 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1632 if (IsReturnBlock) {
1633 unsigned RetOpcode = MBBI->getOpcode();
1634 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1635 RetOpcode == PPC::TCRETURNdi ||
1636 RetOpcode == PPC::TCRETURNai ||
1637 RetOpcode == PPC::TCRETURNri8 ||
1638 RetOpcode == PPC::TCRETURNdi8 ||
1639 RetOpcode == PPC::TCRETURNai8;
1642 int MaxTCRetDelta = FI->getTailCallSPDelta();
1643 MachineOperand &StackAdjust = MBBI->getOperand(1);
1644 assert(StackAdjust.isImm() && "Expecting immediate value.");
1645 // Adjust stack pointer.
1646 int StackAdj = StackAdjust.getImm();
1647 int Delta = StackAdj - MaxTCRetDelta;
1648 assert((Delta >= 0) && "Delta must be positive");
1649 if (MaxTCRetDelta>0)
1650 FrameSize += (StackAdj +Delta);
1652 FrameSize += StackAdj;
1656 // Frames of 32KB & larger require special handling because they cannot be
1657 // indexed into with a simple LD/LWZ immediate offset operand.
1658 bool isLargeFrame = !isInt<16>(FrameSize);
1660 // On targets without red zone, the SP needs to be restored last, so that
1661 // all live contents of the stack frame are upwards of the SP. This means
1662 // that we cannot restore SP just now, since there may be more registers
1663 // to restore from the stack frame (e.g. R31). If the frame size is not
1664 // a simple immediate value, we will need a spare register to hold the
1665 // restored SP. If the frame size is known and small, we can simply adjust
1666 // the offsets of the registers to be restored, and still use SP to restore
1667 // them. In such case, the final update of SP will be to add the frame
1669 // To simplify the code, set RBReg to the base register used to restore
1670 // values from the stack, and set SPAdd to the value that needs to be added
1671 // to the SP at the end. The default values are as if red zone was present.
1672 unsigned RBReg = SPReg;
1675 // Check if we can move the stack update instruction up the epilogue
1676 // past the callee saves. This will allow the move to LR instruction
1677 // to be executed before the restores of the callee saves which means
1678 // that the callee saves can hide the latency from the MTLR instrcution.
1679 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1680 if (stackUpdateCanBeMoved(MF)) {
1681 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1682 for (CalleeSavedInfo CSI : Info) {
1683 int FrIdx = CSI.getFrameIdx();
1684 // If the frame index is not negative the callee saved info belongs to a
1685 // stack object that is not a fixed stack object. We ignore non-fixed
1686 // stack objects because we won't move the update of the stack pointer
1691 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1694 // Abort the operation as we can't update all CSR restores.
1695 StackUpdateLoc = MBBI;
1702 // In the prologue, the loaded (or persistent) stack pointer value is
1703 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1704 // zone add this offset back now.
1706 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1707 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1708 // call which invalidates the stack pointer value in SP(0). So we use the
1709 // value of R31 in this case.
1710 if (FI->hasFastCall()) {
1711 assert(HasFP && "Expecting a valid frame pointer.");
1714 if (!isLargeFrame) {
1715 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1716 .addReg(FPReg).addImm(FrameSize);
1718 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1719 .addImm(FrameSize >> 16);
1720 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1721 .addReg(ScratchReg, RegState::Kill)
1722 .addImm(FrameSize & 0xFFFF);
1723 BuildMI(MBB, MBBI, dl, AddInst)
1726 .addReg(ScratchReg);
1728 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1730 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1734 // Make sure that adding FrameSize will not overflow the max offset
1736 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1737 "Local offsets should be negative");
1739 FPOffset += FrameSize;
1740 BPOffset += FrameSize;
1741 PBPOffset += FrameSize;
1744 // We don't want to use ScratchReg as a base register, because it
1745 // could happen to be R0. Use FP instead, but make sure to preserve it.
1747 // If FP is not saved, copy it to ScratchReg.
1749 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1754 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1759 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1760 // If there is no red zone, ScratchReg may be needed for holding a useful
1761 // value (although not the base register). Make sure it is not overwritten
1764 // If we need to restore both the LR and the CR and we only have one
1765 // available scratch register, we must do them one at a time.
1766 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1767 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1769 assert(HasRedZone && "Expecting red zone");
1770 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1771 .addImm(CRSaveOffset)
1773 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1774 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1775 .addReg(TempReg, getKillRegState(i == e-1));
1778 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1779 // LR is stored in the caller's stack frame. ScratchReg will be needed
1780 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1781 // a base register anyway, because it may happen to be R0.
1782 bool LoadedLR = false;
1783 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1784 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1785 .addImm(LROffset+SPAdd)
1790 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1791 assert(RBReg == SPReg && "Should be using SP as a base register");
1792 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1793 .addImm(CRSaveOffset)
1798 // If there is red zone, restore FP directly, since SP has already been
1799 // restored. Otherwise, restore the value of FP into ScratchReg.
1800 if (HasRedZone || RBReg == SPReg)
1801 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1805 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1810 if (FI->usesPICBase())
1811 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1816 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1820 // There is nothing more to be loaded from the stack, so now we can
1821 // restore SP: SP = RBReg + SPAdd.
1822 if (RBReg != SPReg || SPAdd != 0) {
1823 assert(!HasRedZone && "This should not happen with red zone");
1824 // If SPAdd is 0, generate a copy.
1826 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1830 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1834 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1836 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1838 .addReg(ScratchReg);
1840 // Now load the LR from the caller's stack frame.
1841 if (MustSaveLR && !LoadedLR)
1842 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1848 !(SingleScratchReg && MustSaveLR))
1849 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1850 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1851 .addReg(TempReg, getKillRegState(i == e-1));
1854 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1856 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1857 // call optimization
1858 if (IsReturnBlock) {
1859 unsigned RetOpcode = MBBI->getOpcode();
1860 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1861 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1862 MF.getFunction().getCallingConv() == CallingConv::Fast) {
1863 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1864 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1866 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1867 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1868 .addReg(SPReg).addImm(CallerAllocatedAmt);
1870 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1871 .addImm(CallerAllocatedAmt >> 16);
1872 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1873 .addReg(ScratchReg, RegState::Kill)
1874 .addImm(CallerAllocatedAmt & 0xFFFF);
1875 BuildMI(MBB, MBBI, dl, AddInst)
1878 .addReg(ScratchReg);
1881 createTailCallBranchInstr(MBB);
1886 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1887 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1889 // If we got this far a first terminator should exist.
1890 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1892 DebugLoc dl = MBBI->getDebugLoc();
1893 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1895 // Create branch instruction for pseudo tail call return instruction.
1896 // The TCRETURNdi variants are direct calls. Valid targets for those are
1897 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1898 // since we can tail call external functions with PC-Rel (i.e. we don't need
1899 // to worry about different TOC pointers). Some of the external functions will
1900 // be MO_GlobalAddress while others like memcpy for example, are going to
1901 // be MO_ExternalSymbol.
1902 unsigned RetOpcode = MBBI->getOpcode();
1903 if (RetOpcode == PPC::TCRETURNdi) {
1904 MBBI = MBB.getLastNonDebugInstr();
1905 MachineOperand &JumpTarget = MBBI->getOperand(0);
1906 if (JumpTarget.isGlobal())
1907 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1908 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1909 else if (JumpTarget.isSymbol())
1910 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1911 addExternalSymbol(JumpTarget.getSymbolName());
1913 llvm_unreachable("Expecting Global or External Symbol");
1914 } else if (RetOpcode == PPC::TCRETURNri) {
1915 MBBI = MBB.getLastNonDebugInstr();
1916 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1917 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1918 } else if (RetOpcode == PPC::TCRETURNai) {
1919 MBBI = MBB.getLastNonDebugInstr();
1920 MachineOperand &JumpTarget = MBBI->getOperand(0);
1921 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1922 } else if (RetOpcode == PPC::TCRETURNdi8) {
1923 MBBI = MBB.getLastNonDebugInstr();
1924 MachineOperand &JumpTarget = MBBI->getOperand(0);
1925 if (JumpTarget.isGlobal())
1926 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1927 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1928 else if (JumpTarget.isSymbol())
1929 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1930 addExternalSymbol(JumpTarget.getSymbolName());
1932 llvm_unreachable("Expecting Global or External Symbol");
1933 } else if (RetOpcode == PPC::TCRETURNri8) {
1934 MBBI = MBB.getLastNonDebugInstr();
1935 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1936 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1937 } else if (RetOpcode == PPC::TCRETURNai8) {
1938 MBBI = MBB.getLastNonDebugInstr();
1939 MachineOperand &JumpTarget = MBBI->getOperand(0);
1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1944 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1945 BitVector &SavedRegs,
1946 RegScavenger *RS) const {
1947 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1949 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1951 // Save and clear the LR state.
1952 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1953 unsigned LR = RegInfo->getRARegister();
1954 FI->setMustSaveLR(MustSaveLR(MF, LR));
1955 SavedRegs.reset(LR);
1957 // Save R31 if necessary
1958 int FPSI = FI->getFramePointerSaveIndex();
1959 const bool isPPC64 = Subtarget.isPPC64();
1960 MachineFrameInfo &MFI = MF.getFrameInfo();
1962 // If the frame pointer save index hasn't been defined yet.
1963 if (!FPSI && needsFP(MF)) {
1964 // Find out what the fix offset of the frame pointer save area.
1965 int FPOffset = getFramePointerSaveOffset();
1966 // Allocate the frame index for frame pointer save area.
1967 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1969 FI->setFramePointerSaveIndex(FPSI);
1972 int BPSI = FI->getBasePointerSaveIndex();
1973 if (!BPSI && RegInfo->hasBasePointer(MF)) {
1974 int BPOffset = getBasePointerSaveOffset();
1975 // Allocate the frame index for the base pointer save area.
1976 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1978 FI->setBasePointerSaveIndex(BPSI);
1981 // Reserve stack space for the PIC Base register (R30).
1982 // Only used in SVR4 32-bit.
1983 if (FI->usesPICBase()) {
1984 int PBPSI = MFI.CreateFixedObject(4, -8, true);
1985 FI->setPICBasePointerSaveIndex(PBPSI);
1988 // Make sure we don't explicitly spill r31, because, for example, we have
1989 // some inline asm which explicitly clobbers it, when we otherwise have a
1990 // frame pointer and are using r31's spill slot for the prologue/epilogue
1991 // code. Same goes for the base pointer and the PIC base register.
1993 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1994 if (RegInfo->hasBasePointer(MF))
1995 SavedRegs.reset(RegInfo->getBaseRegister(MF));
1996 if (FI->usesPICBase())
1997 SavedRegs.reset(PPC::R30);
1999 // Reserve stack space to move the linkage area to in case of a tail call.
2001 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2002 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2003 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2006 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2007 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2008 // object at the offset of the CR-save slot in the linkage area. The actual
2009 // save and restore of the condition register will be created as part of the
2010 // prologue and epilogue insertion, but the FixedStack object is needed to
2011 // keep the CalleSavedInfo valid.
2012 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2013 SavedRegs.test(PPC::CR4))) {
2014 const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2015 const int64_t SpillOffset =
2016 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2018 MFI.CreateFixedObject(SpillSize, SpillOffset,
2019 /* IsImmutable */ true, /* IsAliased */ false);
2020 FI->setCRSpillFrameIndex(FrameIdx);
2024 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2025 RegScavenger *RS) const {
2026 // Get callee saved register information.
2027 MachineFrameInfo &MFI = MF.getFrameInfo();
2028 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2030 // If the function is shrink-wrapped, and if the function has a tail call, the
2031 // tail call might not be in the new RestoreBlock, so real branch instruction
2032 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2033 // RestoreBlock. So we handle this case here.
2034 if (MFI.getSavePoint() && MFI.hasTailCall()) {
2035 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2036 for (MachineBasicBlock &MBB : MF) {
2037 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2038 createTailCallBranchInstr(MBB);
2042 // Early exit if no callee saved registers are modified!
2043 if (CSI.empty() && !needsFP(MF)) {
2044 addScavengingSpillSlot(MF, RS);
2048 unsigned MinGPR = PPC::R31;
2049 unsigned MinG8R = PPC::X31;
2050 unsigned MinFPR = PPC::F31;
2051 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2053 bool HasGPSaveArea = false;
2054 bool HasG8SaveArea = false;
2055 bool HasFPSaveArea = false;
2056 bool HasVRSAVESaveArea = false;
2057 bool HasVRSaveArea = false;
2059 SmallVector<CalleeSavedInfo, 18> GPRegs;
2060 SmallVector<CalleeSavedInfo, 18> G8Regs;
2061 SmallVector<CalleeSavedInfo, 18> FPRegs;
2062 SmallVector<CalleeSavedInfo, 18> VRegs;
2064 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2065 unsigned Reg = CSI[i].getReg();
2066 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2067 (Reg != PPC::X2 && Reg != PPC::R2)) &&
2068 "Not expecting to try to spill R2 in a function that must save TOC");
2069 if (PPC::GPRCRegClass.contains(Reg)) {
2070 HasGPSaveArea = true;
2072 GPRegs.push_back(CSI[i]);
2077 } else if (PPC::G8RCRegClass.contains(Reg)) {
2078 HasG8SaveArea = true;
2080 G8Regs.push_back(CSI[i]);
2085 } else if (PPC::F8RCRegClass.contains(Reg)) {
2086 HasFPSaveArea = true;
2088 FPRegs.push_back(CSI[i]);
2093 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2094 PPC::CRRCRegClass.contains(Reg)) {
2095 ; // do nothing, as we already know whether CRs are spilled
2096 } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
2097 HasVRSAVESaveArea = true;
2098 } else if (PPC::VRRCRegClass.contains(Reg) ||
2099 PPC::SPERCRegClass.contains(Reg)) {
2100 // Altivec and SPE are mutually exclusive, but have the same stack
2101 // alignment requirements, so overload the save area for both cases.
2102 HasVRSaveArea = true;
2104 VRegs.push_back(CSI[i]);
2110 llvm_unreachable("Unknown RegisterClass!");
2114 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2115 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2117 int64_t LowerBound = 0;
2119 // Take into account stack space reserved for tail calls.
2121 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2122 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2123 LowerBound = TCSPDelta;
2126 // The Floating-point register save area is right below the back chain word
2127 // of the previous stack frame.
2128 if (HasFPSaveArea) {
2129 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2130 int FI = FPRegs[i].getFrameIdx();
2132 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2135 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2138 // Check whether the frame pointer register is allocated. If so, make sure it
2139 // is spilled to the correct offset.
2141 int FI = PFI->getFramePointerSaveIndex();
2142 assert(FI && "No Frame Pointer Save Slot!");
2143 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2144 // FP is R31/X31, so no need to update MinGPR/MinG8R.
2145 HasGPSaveArea = true;
2148 if (PFI->usesPICBase()) {
2149 int FI = PFI->getPICBasePointerSaveIndex();
2150 assert(FI && "No PIC Base Pointer Save Slot!");
2151 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2153 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2154 HasGPSaveArea = true;
2157 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2158 if (RegInfo->hasBasePointer(MF)) {
2159 int FI = PFI->getBasePointerSaveIndex();
2160 assert(FI && "No Base Pointer Save Slot!");
2161 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2163 Register BP = RegInfo->getBaseRegister(MF);
2164 if (PPC::G8RCRegClass.contains(BP)) {
2165 MinG8R = std::min<unsigned>(MinG8R, BP);
2166 HasG8SaveArea = true;
2167 } else if (PPC::GPRCRegClass.contains(BP)) {
2168 MinGPR = std::min<unsigned>(MinGPR, BP);
2169 HasGPSaveArea = true;
2173 // General register save area starts right below the Floating-point
2174 // register save area.
2175 if (HasGPSaveArea || HasG8SaveArea) {
2176 // Move general register save area spill slots down, taking into account
2177 // the size of the Floating-point register save area.
2178 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2179 if (!GPRegs[i].isSpilledToReg()) {
2180 int FI = GPRegs[i].getFrameIdx();
2181 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2185 // Move general register save area spill slots down, taking into account
2186 // the size of the Floating-point register save area.
2187 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2188 if (!G8Regs[i].isSpilledToReg()) {
2189 int FI = G8Regs[i].getFrameIdx();
2190 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2195 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2196 TRI->getEncodingValue(MinG8R));
2198 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2199 LowerBound -= (31 - MinReg + 1) * GPRegSize;
2202 // For 32-bit only, the CR save area is below the general register
2203 // save area. For 64-bit SVR4, the CR save area is addressed relative
2204 // to the stack pointer and hence does not need an adjustment here.
2205 // Only CR2 (the first nonvolatile spilled) has an associated frame
2206 // index so that we have a single uniform save area.
2207 if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2208 // Adjust the frame index of the CR spill slot.
2209 for (const auto &CSInfo : CSI) {
2210 if (CSInfo.getReg() == PPC::CR2) {
2211 int FI = CSInfo.getFrameIdx();
2212 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2217 LowerBound -= 4; // The CR save area is always 4 bytes long.
2220 if (HasVRSAVESaveArea) {
2221 // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2222 // which have the VRSAVE register class?
2223 // Adjust the frame index of the VRSAVE spill slot.
2224 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2225 unsigned Reg = CSI[i].getReg();
2227 if (PPC::VRSAVERCRegClass.contains(Reg)) {
2228 int FI = CSI[i].getFrameIdx();
2230 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2234 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2237 // Both Altivec and SPE have the same alignment and padding requirements
2238 // within the stack frame.
2239 if (HasVRSaveArea) {
2240 // Insert alignment padding, we need 16-byte alignment. Note: for positive
2241 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2242 // we are using negative number here (the stack grows downward). We should
2243 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2244 // is the alignment size ( n = 16 here) and y is the size after aligning.
2245 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2246 LowerBound &= ~(15);
2248 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2249 int FI = VRegs[i].getFrameIdx();
2251 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2255 addScavengingSpillSlot(MF, RS);
2259 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2260 RegScavenger *RS) const {
2261 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2262 // a large stack, which will require scavenging a register to materialize a
2265 // We need to have a scavenger spill slot for spills if the frame size is
2266 // large. In case there is no free register for large-offset addressing,
2267 // this slot is used for the necessary emergency spill. Also, we need the
2268 // slot for dynamic stack allocations.
2270 // The scavenger might be invoked if the frame offset does not fit into
2271 // the 16-bit immediate. We don't know the complete frame size here
2272 // because we've not yet computed callee-saved register spills or the
2273 // needed alignment padding.
2274 unsigned StackSize = determineFrameLayout(MF, true);
2275 MachineFrameInfo &MFI = MF.getFrameInfo();
2276 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2277 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2278 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2279 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2280 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2281 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2282 unsigned Size = TRI.getSpillSize(RC);
2283 Align Alignment = TRI.getSpillAlign(RC);
2284 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2286 // Might we have over-aligned allocas?
2288 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2290 // These kinds of spills might need two registers.
2291 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2292 RS->addScavengingFrameIndex(
2293 MFI.CreateStackObject(Size, Alignment, false));
2297 // This function checks if a callee saved gpr can be spilled to a volatile
2298 // vector register. This occurs for leaf functions when the option
2299 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2300 // which were not spilled to vectors, return false so the target independent
2301 // code can handle them by assigning a FrameIdx to a stack slot.
2302 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2303 MachineFunction &MF, const TargetRegisterInfo *TRI,
2304 std::vector<CalleeSavedInfo> &CSI) const {
2307 return true; // Early exit if no callee saved registers are modified!
2309 // Early exit if cannot spill gprs to volatile vector registers.
2310 MachineFrameInfo &MFI = MF.getFrameInfo();
2311 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2314 // Build a BitVector of VSRs that can be used for spilling GPRs.
2315 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2316 BitVector BVCalleeSaved(TRI->getNumRegs());
2317 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2318 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2319 for (unsigned i = 0; CSRegs[i]; ++i)
2320 BVCalleeSaved.set(CSRegs[i]);
2322 for (unsigned Reg : BVAllocatable.set_bits()) {
2323 // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2324 // used in the function.
2325 if (BVCalleeSaved[Reg] ||
2326 (!PPC::F8RCRegClass.contains(Reg) &&
2327 !PPC::VFRCRegClass.contains(Reg)) ||
2328 (MF.getRegInfo().isPhysRegUsed(Reg)))
2329 BVAllocatable.reset(Reg);
2332 bool AllSpilledToReg = true;
2333 for (auto &CS : CSI) {
2334 if (BVAllocatable.none())
2337 unsigned Reg = CS.getReg();
2338 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2339 AllSpilledToReg = false;
2343 unsigned VolatileVFReg = BVAllocatable.find_first();
2344 if (VolatileVFReg < BVAllocatable.size()) {
2345 CS.setDstReg(VolatileVFReg);
2346 BVAllocatable.reset(VolatileVFReg);
2348 AllSpilledToReg = false;
2351 return AllSpilledToReg;
2354 bool PPCFrameLowering::spillCalleeSavedRegisters(
2355 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2356 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2358 MachineFunction *MF = MBB.getParent();
2359 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2360 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2361 bool MustSaveTOC = FI->mustSaveTOC();
2363 bool CRSpilled = false;
2364 MachineInstrBuilder CRMIB;
2366 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2367 unsigned Reg = CSI[i].getReg();
2368 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2369 if (Reg == PPC::VRSAVE)
2372 // CR2 through CR4 are the nonvolatile CR fields.
2373 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2375 // Add the callee-saved register as live-in; it's killed at the spill.
2376 // Do not do this for callee-saved registers that are live-in to the
2377 // function because they will already be marked live-in and this will be
2378 // adding it for a second time. It is an error to add the same register
2379 // to the set more than once.
2380 const MachineRegisterInfo &MRI = MF->getRegInfo();
2381 bool IsLiveIn = MRI.isLiveIn(Reg);
2385 if (CRSpilled && IsCRField) {
2386 CRMIB.addReg(Reg, RegState::ImplicitKill);
2390 // The actual spill will happen in the prologue.
2391 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2394 // Insert the spill to the stack frame.
2396 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2397 if (!Subtarget.is32BitELFABI()) {
2398 // The actual spill will happen at the start of the prologue.
2399 FuncInfo->addMustSaveCR(Reg);
2402 FuncInfo->setSpillsCR();
2404 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2405 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2406 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2407 .addReg(Reg, RegState::ImplicitKill);
2409 MBB.insert(MI, CRMIB);
2410 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2412 getKillRegState(true)),
2413 CSI[i].getFrameIdx()));
2416 if (CSI[i].isSpilledToReg()) {
2418 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2419 .addReg(Reg, getKillRegState(true));
2421 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2422 // Use !IsLiveIn for the kill flag.
2423 // We do not want to kill registers that are live in this function
2424 // before their use because they will become undefined registers.
2425 // Functions without NoUnwind need to preserve the order of elements in
2426 // saved vector registers.
2427 if (Subtarget.needsSwapsForVSXMemOps() &&
2428 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2429 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2430 CSI[i].getFrameIdx(), RC, TRI);
2432 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2440 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2441 bool CR4Spilled, MachineBasicBlock &MBB,
2442 MachineBasicBlock::iterator MI,
2443 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2445 MachineFunction *MF = MBB.getParent();
2446 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2448 unsigned MoveReg = PPC::R12;
2450 // 32-bit: FP-relative
2452 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2453 CSI[CSIIndex].getFrameIdx()));
2455 unsigned RestoreOp = PPC::MTOCRF;
2457 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2458 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2461 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2462 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2465 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2466 .addReg(MoveReg, getKillRegState(true)));
2469 MachineBasicBlock::iterator PPCFrameLowering::
2470 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2471 MachineBasicBlock::iterator I) const {
2472 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2473 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2474 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2475 // Add (actually subtract) back the amount the callee popped on return.
2476 if (int CalleeAmt = I->getOperand(1).getImm()) {
2477 bool is64Bit = Subtarget.isPPC64();
2479 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2480 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2481 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2482 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2483 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2484 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2485 const DebugLoc &dl = I->getDebugLoc();
2487 if (isInt<16>(CalleeAmt)) {
2488 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2489 .addReg(StackReg, RegState::Kill)
2492 MachineBasicBlock::iterator MBBI = I;
2493 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2494 .addImm(CalleeAmt >> 16);
2495 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2496 .addReg(TmpReg, RegState::Kill)
2497 .addImm(CalleeAmt & 0xFFFF);
2498 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2499 .addReg(StackReg, RegState::Kill)
2504 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2505 return MBB.erase(I);
2508 static bool isCalleeSavedCR(unsigned Reg) {
2509 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2512 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2513 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2514 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2515 MachineFunction *MF = MBB.getParent();
2516 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2517 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2518 bool MustSaveTOC = FI->mustSaveTOC();
2519 bool CR2Spilled = false;
2520 bool CR3Spilled = false;
2521 bool CR4Spilled = false;
2522 unsigned CSIIndex = 0;
2524 // Initialize insertion-point logic; we will be restoring in reverse
2526 MachineBasicBlock::iterator I = MI, BeforeI = I;
2527 bool AtStart = I == MBB.begin();
2532 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2533 unsigned Reg = CSI[i].getReg();
2535 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2536 if (Reg == PPC::VRSAVE)
2539 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2542 // Restore of callee saved condition register field is handled during
2543 // epilogue insertion.
2544 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2547 if (Reg == PPC::CR2) {
2549 // The spill slot is associated only with CR2, which is the
2550 // first nonvolatile spilled. Save it here.
2553 } else if (Reg == PPC::CR3) {
2556 } else if (Reg == PPC::CR4) {
2560 // On 32-bit ELF when we first encounter a non-CR register after seeing at
2561 // least one CR register, restore all spilled CRs together.
2562 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2563 bool is31 = needsFP(*MF);
2564 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2566 CR2Spilled = CR3Spilled = CR4Spilled = false;
2569 if (CSI[i].isSpilledToReg()) {
2572 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2573 .addReg(CSI[i].getDstReg(), getKillRegState(true));
2575 // Default behavior for non-CR saves.
2576 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2578 // Functions without NoUnwind need to preserve the order of elements in
2579 // saved vector registers.
2580 if (Subtarget.needsSwapsForVSXMemOps() &&
2581 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2582 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2585 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2587 assert(I != MBB.begin() &&
2588 "loadRegFromStackSlot didn't insert any code!");
2592 // Insert in reverse order.
2601 // If we haven't yet spilled the CRs, do so now.
2602 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2603 assert(Subtarget.is32BitELFABI() &&
2604 "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2605 bool is31 = needsFP(*MF);
2606 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2612 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2613 return TOCSaveOffset;
2616 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2617 return FramePointerSaveOffset;
2620 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2621 return BasePointerSaveOffset;
2624 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2625 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2627 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2628 MF.getSubtarget<PPCSubtarget>().isPPC64());