contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

   1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements the lowering of LLVM calls to machine code calls for
  11 /// GlobalISel.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AArch64CallLowering.h"
  16 #include "AArch64ISelLowering.h"
  17 #include "AArch64MachineFunctionInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "llvm/ADT/ArrayRef.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 #include "llvm/CodeGen/Analysis.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  24 #include "llvm/CodeGen/GlobalISel/Utils.h"
  25 #include "llvm/CodeGen/LowLevelType.h"
  26 #include "llvm/CodeGen/MachineBasicBlock.h"
  27 #include "llvm/CodeGen/MachineFrameInfo.h"
  28 #include "llvm/CodeGen/MachineFunction.h"
  29 #include "llvm/CodeGen/MachineInstrBuilder.h"
  30 #include "llvm/CodeGen/MachineMemOperand.h"
  31 #include "llvm/CodeGen/MachineOperand.h"
  32 #include "llvm/CodeGen/MachineRegisterInfo.h"
  33 #include "llvm/CodeGen/TargetRegisterInfo.h"
  34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  35 #include "llvm/CodeGen/ValueTypes.h"
  36 #include "llvm/IR/Argument.h"
  37 #include "llvm/IR/Attributes.h"
  38 #include "llvm/IR/Function.h"
  39 #include "llvm/IR/Type.h"
  40 #include "llvm/IR/Value.h"
  41 #include "llvm/Support/MachineValueType.h"
  42 #include <algorithm>
  43 #include <cassert>
  44 #include <cstdint>
  45 #include <iterator>
  46
  47 #define DEBUG_TYPE "aarch64-call-lowering"
  48
  49 using namespace llvm;
  50
  51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
  52   : CallLowering(&TLI) {}
  53
  54 namespace {
  55 struct IncomingArgHandler : public CallLowering::ValueHandler {
  56   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  57                      CCAssignFn *AssignFn)
  58       : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
  59
  60   Register getStackAddress(uint64_t Size, int64_t Offset,
  61                            MachinePointerInfo &MPO) override {
  62     auto &MFI = MIRBuilder.getMF().getFrameInfo();
  63     int FI = MFI.CreateFixedObject(Size, Offset, true);
  64     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
  65     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
  66     StackUsed = std::max(StackUsed, Size + Offset);
  67     return AddrReg.getReg(0);
  68   }
  69
  70   void assignValueToReg(Register ValVReg, Register PhysReg,
  71                         CCValAssign &VA) override {
  72     markPhysRegUsed(PhysReg);
  73     switch (VA.getLocInfo()) {
  74     default:
  75       MIRBuilder.buildCopy(ValVReg, PhysReg);
  76       break;
  77     case CCValAssign::LocInfo::SExt:
  78     case CCValAssign::LocInfo::ZExt:
  79     case CCValAssign::LocInfo::AExt: {
  80       auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
  81       MIRBuilder.buildTrunc(ValVReg, Copy);
  82       break;
  83     }
  84     }
  85   }
  86
  87   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
  88                             MachinePointerInfo &MPO, CCValAssign &VA) override {
  89     MachineFunction &MF = MIRBuilder.getMF();
  90
  91     // The reported memory location may be wider than the value.
  92     const LLT RegTy = MRI.getType(ValVReg);
  93     MemSize = std::min(static_cast<uint64_t>(RegTy.getSizeInBytes()), MemSize);
  94
  95     auto MMO = MF.getMachineMemOperand(
  96         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemSize,
  97         inferAlignFromPtrInfo(MF, MPO));
  98     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
  99   }
 100
 101   /// How the physical register gets marked varies between formal
 102   /// parameters (it's a basic-block live-in), and a call instruction
 103   /// (it's an implicit-def of the BL).
 104   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
 105
 106   bool isIncomingArgumentHandler() const override { return true; }
 107
 108   uint64_t StackUsed;
 109 };
 110
 111 struct FormalArgHandler : public IncomingArgHandler {
 112   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 113                    CCAssignFn *AssignFn)
 114     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
 115
 116   void markPhysRegUsed(unsigned PhysReg) override {
 117     MIRBuilder.getMRI()->addLiveIn(PhysReg);
 118     MIRBuilder.getMBB().addLiveIn(PhysReg);
 119   }
 120 };
 121
 122 struct CallReturnHandler : public IncomingArgHandler {
 123   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 124                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
 125     : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
 126
 127   void markPhysRegUsed(unsigned PhysReg) override {
 128     MIB.addDef(PhysReg, RegState::Implicit);
 129   }
 130
 131   MachineInstrBuilder MIB;
 132 };
 133
 134 struct OutgoingArgHandler : public CallLowering::ValueHandler {
 135   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 136                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
 137                      CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
 138                      int FPDiff = 0)
 139       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
 140         AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
 141         StackSize(0), SPReg(0) {}
 142
 143   bool isIncomingArgumentHandler() const override { return false; }
 144
 145   Register getStackAddress(uint64_t Size, int64_t Offset,
 146                            MachinePointerInfo &MPO) override {
 147     MachineFunction &MF = MIRBuilder.getMF();
 148     LLT p0 = LLT::pointer(0, 64);
 149     LLT s64 = LLT::scalar(64);
 150
 151     if (IsTailCall) {
 152       Offset += FPDiff;
 153       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
 154       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
 155       MPO = MachinePointerInfo::getFixedStack(MF, FI);
 156       return FIReg.getReg(0);
 157     }
 158
 159     if (!SPReg)
 160       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
 161
 162     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
 163
 164     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
 165
 166     MPO = MachinePointerInfo::getStack(MF, Offset);
 167     return AddrReg.getReg(0);
 168   }
 169
 170   void assignValueToReg(Register ValVReg, Register PhysReg,
 171                         CCValAssign &VA) override {
 172     MIB.addUse(PhysReg, RegState::Implicit);
 173     Register ExtReg = extendRegister(ValVReg, VA);
 174     MIRBuilder.buildCopy(PhysReg, ExtReg);
 175   }
 176
 177   void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
 178                             MachinePointerInfo &MPO, CCValAssign &VA) override {
 179     MachineFunction &MF = MIRBuilder.getMF();
 180     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size,
 181                                        inferAlignFromPtrInfo(MF, MPO));
 182     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
 183   }
 184
 185   void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr,
 186                             uint64_t Size, MachinePointerInfo &MPO,
 187                             CCValAssign &VA) override {
 188     unsigned MaxSize = Size * 8;
 189     // For varargs, we always want to extend them to 8 bytes, in which case
 190     // we disable setting a max.
 191     if (!Arg.IsFixed)
 192       MaxSize = 0;
 193
 194     Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
 195                            ? extendRegister(Arg.Regs[0], VA, MaxSize)
 196                            : Arg.Regs[0];
 197
 198     // If we extended we might need to adjust the MMO's Size.
 199     const LLT RegTy = MRI.getType(ValVReg);
 200     if (RegTy.getSizeInBytes() > Size)
 201       Size = RegTy.getSizeInBytes();
 202
 203     assignValueToAddress(ValVReg, Addr, Size, MPO, VA);
 204   }
 205
 206   bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
 207                  CCValAssign::LocInfo LocInfo,
 208                  const CallLowering::ArgInfo &Info,
 209                  ISD::ArgFlagsTy Flags,
 210                  CCState &State) override {
 211     bool Res;
 212     if (Info.IsFixed)
 213       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 214     else
 215       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 216
 217     StackSize = State.getNextStackOffset();
 218     return Res;
 219   }
 220
 221   MachineInstrBuilder MIB;
 222   CCAssignFn *AssignFnVarArg;
 223   bool IsTailCall;
 224
 225   /// For tail calls, the byte offset of the call's argument area from the
 226   /// callee's. Unused elsewhere.
 227   int FPDiff;
 228   uint64_t StackSize;
 229
 230   // Cache the SP register vreg if we need it more than once in this call site.
 231   Register SPReg;
 232 };
 233 } // namespace
 234
 235 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
 236   return CallConv == CallingConv::Fast && TailCallOpt;
 237 }
 238
 239 void AArch64CallLowering::splitToValueTypes(
 240     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
 241     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
 242   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 243   LLVMContext &Ctx = OrigArg.Ty->getContext();
 244
 245   SmallVector<EVT, 4> SplitVTs;
 246   SmallVector<uint64_t, 4> Offsets;
 247   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
 248
 249   if (SplitVTs.size() == 0)
 250     return;
 251
 252   if (SplitVTs.size() == 1) {
 253     // No splitting to do, but we want to replace the original type (e.g. [1 x
 254     // double] -> double).
 255     SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
 256                            OrigArg.Flags[0], OrigArg.IsFixed);
 257     return;
 258   }
 259
 260   // Create one ArgInfo for each virtual register in the original ArgInfo.
 261   assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
 262
 263   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
 264       OrigArg.Ty, CallConv, false);
 265   for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
 266     Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
 267     SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0],
 268                            OrigArg.IsFixed);
 269     if (NeedsRegBlock)
 270       SplitArgs.back().Flags[0].setInConsecutiveRegs();
 271   }
 272
 273   SplitArgs.back().Flags[0].setInConsecutiveRegsLast();
 274 }
 275
 276 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
 277                                       const Value *Val,
 278                                       ArrayRef<Register> VRegs,
 279                                       Register SwiftErrorVReg) const {
 280   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
 281   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
 282          "Return value without a vreg");
 283
 284   bool Success = true;
 285   if (!VRegs.empty()) {
 286     MachineFunction &MF = MIRBuilder.getMF();
 287     const Function &F = MF.getFunction();
 288
 289     MachineRegisterInfo &MRI = MF.getRegInfo();
 290     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 291     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
 292     auto &DL = F.getParent()->getDataLayout();
 293     LLVMContext &Ctx = Val->getType()->getContext();
 294
 295     SmallVector<EVT, 4> SplitEVTs;
 296     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
 297     assert(VRegs.size() == SplitEVTs.size() &&
 298            "For each split Type there should be exactly one VReg.");
 299
 300     SmallVector<ArgInfo, 8> SplitArgs;
 301     CallingConv::ID CC = F.getCallingConv();
 302
 303     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
 304       if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
 305         LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
 306         return false;
 307       }
 308
 309       Register CurVReg = VRegs[i];
 310       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
 311       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 312
 313       // i1 is a special case because SDAG i1 true is naturally zero extended
 314       // when widened using ANYEXT. We need to do it explicitly here.
 315       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
 316         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
 317       } else {
 318         // Some types will need extending as specified by the CC.
 319         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
 320         if (EVT(NewVT) != SplitEVTs[i]) {
 321           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
 322           if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
 323                                              Attribute::SExt))
 324             ExtendOp = TargetOpcode::G_SEXT;
 325           else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
 326                                                   Attribute::ZExt))
 327             ExtendOp = TargetOpcode::G_ZEXT;
 328
 329           LLT NewLLT(NewVT);
 330           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
 331           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
 332           // Instead of an extend, we might have a vector type which needs
 333           // padding with more elements, e.g. <2 x half> -> <4 x half>.
 334           if (NewVT.isVector()) {
 335             if (OldLLT.isVector()) {
 336               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
 337                 // We don't handle VA types which are not exactly twice the
 338                 // size, but can easily be done in future.
 339                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
 340                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
 341                   return false;
 342                 }
 343                 auto Undef = MIRBuilder.buildUndef({OldLLT});
 344                 CurVReg =
 345                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
 346               } else {
 347                 // Just do a vector extend.
 348                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
 349                               .getReg(0);
 350               }
 351             } else if (NewLLT.getNumElements() == 2) {
 352               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
 353               // <1 x S> vector types in GISel we use a build_vector instead
 354               // of a vector merge/concat.
 355               auto Undef = MIRBuilder.buildUndef({OldLLT});
 356               CurVReg =
 357                   MIRBuilder
 358                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
 359                       .getReg(0);
 360             } else {
 361               LLVM_DEBUG(dbgs() << "Could not handle ret ty");
 362               return false;
 363             }
 364           } else {
 365             // A scalar extend.
 366             CurVReg =
 367                 MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
 368           }
 369         }
 370       }
 371       if (CurVReg != CurArgInfo.Regs[0]) {
 372         CurArgInfo.Regs[0] = CurVReg;
 373         // Reset the arg flags after modifying CurVReg.
 374         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 375       }
 376      splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
 377     }
 378
 379     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
 380     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
 381   }
 382
 383   if (SwiftErrorVReg) {
 384     MIB.addUse(AArch64::X21, RegState::Implicit);
 385     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
 386   }
 387
 388   MIRBuilder.insertInstr(MIB);
 389   return Success;
 390 }
 391
 392 /// Helper function to compute forwarded registers for musttail calls. Computes
 393 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
 394 /// can be used to save + restore registers later.
 395 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
 396                                              CCAssignFn *AssignFn) {
 397   MachineBasicBlock &MBB = MIRBuilder.getMBB();
 398   MachineFunction &MF = MIRBuilder.getMF();
 399   MachineFrameInfo &MFI = MF.getFrameInfo();
 400
 401   if (!MFI.hasMustTailInVarArgFunc())
 402     return;
 403
 404   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 405   const Function &F = MF.getFunction();
 406   assert(F.isVarArg() && "Expected F to be vararg?");
 407
 408   // Compute the set of forwarded registers. The rest are scratch.
 409   SmallVector<CCValAssign, 16> ArgLocs;
 410   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
 411                  F.getContext());
 412   SmallVector<MVT, 2> RegParmTypes;
 413   RegParmTypes.push_back(MVT::i64);
 414   RegParmTypes.push_back(MVT::f128);
 415
 416   // Later on, we can use this vector to restore the registers if necessary.
 417   SmallVectorImpl<ForwardedRegister> &Forwards =
 418       FuncInfo->getForwardedMustTailRegParms();
 419   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
 420
 421   // Conservatively forward X8, since it might be used for an aggregate
 422   // return.
 423   if (!CCInfo.isAllocated(AArch64::X8)) {
 424     unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
 425     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
 426   }
 427
 428   // Add the forwards to the MachineBasicBlock and MachineFunction.
 429   for (const auto &F : Forwards) {
 430     MBB.addLiveIn(F.PReg);
 431     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
 432   }
 433 }
 434
 435 bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
 436   if (isa<ScalableVectorType>(F.getReturnType()))
 437     return true;
 438   return llvm::any_of(F.args(), [](const Argument &A) {
 439     return isa<ScalableVectorType>(A.getType());
 440   });
 441 }
 442
 443 bool AArch64CallLowering::lowerFormalArguments(
 444     MachineIRBuilder &MIRBuilder, const Function &F,
 445     ArrayRef<ArrayRef<Register>> VRegs) const {
 446   MachineFunction &MF = MIRBuilder.getMF();
 447   MachineBasicBlock &MBB = MIRBuilder.getMBB();
 448   MachineRegisterInfo &MRI = MF.getRegInfo();
 449   auto &DL = F.getParent()->getDataLayout();
 450
 451   SmallVector<ArgInfo, 8> SplitArgs;
 452   unsigned i = 0;
 453   for (auto &Arg : F.args()) {
 454     if (DL.getTypeStoreSize(Arg.getType()).isZero())
 455       continue;
 456
 457     ArgInfo OrigArg{VRegs[i], Arg.getType()};
 458     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
 459
 460     splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
 461     ++i;
 462   }
 463
 464   if (!MBB.empty())
 465     MIRBuilder.setInstr(*MBB.begin());
 466
 467   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 468   CCAssignFn *AssignFn =
 469       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
 470
 471   FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
 472   if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
 473     return false;
 474
 475   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 476   uint64_t StackOffset = Handler.StackUsed;
 477   if (F.isVarArg()) {
 478     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 479     if (!Subtarget.isTargetDarwin()) {
 480         // FIXME: we need to reimplement saveVarArgsRegisters from
 481       // AArch64ISelLowering.
 482       return false;
 483     }
 484
 485     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
 486     StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
 487
 488     auto &MFI = MIRBuilder.getMF().getFrameInfo();
 489     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
 490   }
 491
 492   if (doesCalleeRestoreStack(F.getCallingConv(),
 493                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
 494     // We have a non-standard ABI, so why not make full use of the stack that
 495     // we're going to pop? It must be aligned to 16 B in any case.
 496     StackOffset = alignTo(StackOffset, 16);
 497
 498     // If we're expected to restore the stack (e.g. fastcc), then we'll be
 499     // adding a multiple of 16.
 500     FuncInfo->setArgumentStackToRestore(StackOffset);
 501
 502     // Our own callers will guarantee that the space is free by giving an
 503     // aligned value to CALLSEQ_START.
 504   }
 505
 506   // When we tail call, we need to check if the callee's arguments
 507   // will fit on the caller's stack. So, whenever we lower formal arguments,
 508   // we should keep track of this information, since we might lower a tail call
 509   // in this function later.
 510   FuncInfo->setBytesInStackArgArea(StackOffset);
 511
 512   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 513   if (Subtarget.hasCustomCallingConv())
 514     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
 515
 516   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
 517
 518   // Move back to the end of the basic block.
 519   MIRBuilder.setMBB(MBB);
 520
 521   return true;
 522 }
 523
 524 /// Return true if the calling convention is one that we can guarantee TCO for.
 525 static bool canGuaranteeTCO(CallingConv::ID CC) {
 526   return CC == CallingConv::Fast;
 527 }
 528
 529 /// Return true if we might ever do TCO for calls with this calling convention.
 530 static bool mayTailCallThisCC(CallingConv::ID CC) {
 531   switch (CC) {
 532   case CallingConv::C:
 533   case CallingConv::PreserveMost:
 534   case CallingConv::Swift:
 535     return true;
 536   default:
 537     return canGuaranteeTCO(CC);
 538   }
 539 }
 540
 541 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
 542 /// CC.
 543 static std::pair<CCAssignFn *, CCAssignFn *>
 544 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
 545   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
 546 }
 547
 548 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
 549     CallLoweringInfo &Info, MachineFunction &MF,
 550     SmallVectorImpl<ArgInfo> &InArgs) const {
 551   const Function &CallerF = MF.getFunction();
 552   CallingConv::ID CalleeCC = Info.CallConv;
 553   CallingConv::ID CallerCC = CallerF.getCallingConv();
 554
 555   // If the calling conventions match, then everything must be the same.
 556   if (CalleeCC == CallerCC)
 557     return true;
 558
 559   // Check if the caller and callee will handle arguments in the same way.
 560   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 561   CCAssignFn *CalleeAssignFnFixed;
 562   CCAssignFn *CalleeAssignFnVarArg;
 563   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
 564       getAssignFnsForCC(CalleeCC, TLI);
 565
 566   CCAssignFn *CallerAssignFnFixed;
 567   CCAssignFn *CallerAssignFnVarArg;
 568   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
 569       getAssignFnsForCC(CallerCC, TLI);
 570
 571   if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
 572                          *CalleeAssignFnVarArg, *CallerAssignFnFixed,
 573                          *CallerAssignFnVarArg))
 574     return false;
 575
 576   // Make sure that the caller and callee preserve all of the same registers.
 577   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 578   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
 579   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
 580   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
 581     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
 582     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
 583   }
 584
 585   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
 586 }
 587
 588 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
 589     CallLoweringInfo &Info, MachineFunction &MF,
 590     SmallVectorImpl<ArgInfo> &OutArgs) const {
 591   // If there are no outgoing arguments, then we are done.
 592   if (OutArgs.empty())
 593     return true;
 594
 595   const Function &CallerF = MF.getFunction();
 596   CallingConv::ID CalleeCC = Info.CallConv;
 597   CallingConv::ID CallerCC = CallerF.getCallingConv();
 598   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 599
 600   CCAssignFn *AssignFnFixed;
 601   CCAssignFn *AssignFnVarArg;
 602   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
 603
 604   // We have outgoing arguments. Make sure that we can tail call with them.
 605   SmallVector<CCValAssign, 16> OutLocs;
 606   CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
 607
 608   if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
 609     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
 610     return false;
 611   }
 612
 613   // Make sure that they can fit on the caller's stack.
 614   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 615   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
 616     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
 617     return false;
 618   }
 619
 620   // Verify that the parameters in callee-saved registers match.
 621   // TODO: Port this over to CallLowering as general code once swiftself is
 622   // supported.
 623   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 624   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
 625   MachineRegisterInfo &MRI = MF.getRegInfo();
 626
 627   for (unsigned i = 0; i < OutLocs.size(); ++i) {
 628     auto &ArgLoc = OutLocs[i];
 629     // If it's not a register, it's fine.
 630     if (!ArgLoc.isRegLoc()) {
 631       if (Info.IsVarArg) {
 632         // Be conservative and disallow variadic memory operands to match SDAG's
 633         // behaviour.
 634         // FIXME: If the caller's calling convention is C, then we can
 635         // potentially use its argument area. However, for cases like fastcc,
 636         // we can't do anything.
 637         LLVM_DEBUG(
 638             dbgs()
 639             << "... Cannot tail call vararg function with stack arguments\n");
 640         return false;
 641       }
 642       continue;
 643     }
 644
 645     Register Reg = ArgLoc.getLocReg();
 646
 647     // Only look at callee-saved registers.
 648     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
 649       continue;
 650
 651     LLVM_DEBUG(
 652         dbgs()
 653         << "... Call has an argument passed in a callee-saved register.\n");
 654
 655     // Check if it was copied from.
 656     ArgInfo &OutInfo = OutArgs[i];
 657
 658     if (OutInfo.Regs.size() > 1) {
 659       LLVM_DEBUG(
 660           dbgs() << "... Cannot handle arguments in multiple registers.\n");
 661       return false;
 662     }
 663
 664     // Check if we copy the register, walking through copies from virtual
 665     // registers. Note that getDefIgnoringCopies does not ignore copies from
 666     // physical registers.
 667     MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
 668     if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
 669       LLVM_DEBUG(
 670           dbgs()
 671           << "... Parameter was not copied into a VReg, cannot tail call.\n");
 672       return false;
 673     }
 674
 675     // Got a copy. Verify that it's the same as the register we want.
 676     Register CopyRHS = RegDef->getOperand(1).getReg();
 677     if (CopyRHS != Reg) {
 678       LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
 679                            "VReg, cannot tail call.\n");
 680       return false;
 681     }
 682   }
 683
 684   return true;
 685 }
 686
 687 bool AArch64CallLowering::isEligibleForTailCallOptimization(
 688     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 689     SmallVectorImpl<ArgInfo> &InArgs,
 690     SmallVectorImpl<ArgInfo> &OutArgs) const {
 691
 692   // Must pass all target-independent checks in order to tail call optimize.
 693   if (!Info.IsTailCall)
 694     return false;
 695
 696   CallingConv::ID CalleeCC = Info.CallConv;
 697   MachineFunction &MF = MIRBuilder.getMF();
 698   const Function &CallerF = MF.getFunction();
 699
 700   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
 701
 702   if (Info.SwiftErrorVReg) {
 703     // TODO: We should handle this.
 704     // Note that this is also handled by the check for no outgoing arguments.
 705     // Proactively disabling this though, because the swifterror handling in
 706     // lowerCall inserts a COPY *after* the location of the call.
 707     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
 708     return false;
 709   }
 710
 711   if (!mayTailCallThisCC(CalleeCC)) {
 712     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
 713     return false;
 714   }
 715
 716   // Byval parameters hand the function a pointer directly into the stack area
 717   // we want to reuse during a tail call. Working around this *is* possible (see
 718   // X86).
 719   //
 720   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
 721   // it?
 722   //
 723   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
 724   // In this case, it is necessary to save/restore X0 in the callee. Tail
 725   // call opt interferes with this. So we disable tail call opt when the
 726   // caller has an argument with "inreg" attribute.
 727   //
 728   // FIXME: Check whether the callee also has an "inreg" argument.
 729   //
 730   // When the caller has a swifterror argument, we don't want to tail call
 731   // because would have to move into the swifterror register before the
 732   // tail call.
 733   if (any_of(CallerF.args(), [](const Argument &A) {
 734         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
 735       })) {
 736     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
 737                          "inreg, or swifterror arguments\n");
 738     return false;
 739   }
 740
 741   // Externally-defined functions with weak linkage should not be
 742   // tail-called on AArch64 when the OS does not support dynamic
 743   // pre-emption of symbols, as the AAELF spec requires normal calls
 744   // to undefined weak functions to be replaced with a NOP or jump to the
 745   // next instruction. The behaviour of branch instructions in this
 746   // situation (as used for tail calls) is implementation-defined, so we
 747   // cannot rely on the linker replacing the tail call with a return.
 748   if (Info.Callee.isGlobal()) {
 749     const GlobalValue *GV = Info.Callee.getGlobal();
 750     const Triple &TT = MF.getTarget().getTargetTriple();
 751     if (GV->hasExternalWeakLinkage() &&
 752         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
 753          TT.isOSBinFormatMachO())) {
 754       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
 755                            "with weak linkage for this OS.\n");
 756       return false;
 757     }
 758   }
 759
 760   // If we have -tailcallopt, then we're done.
 761   if (MF.getTarget().Options.GuaranteedTailCallOpt)
 762     return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
 763
 764   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
 765   // Try to find cases where we can do that.
 766
 767   // I want anyone implementing a new calling convention to think long and hard
 768   // about this assert.
 769   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
 770          "Unexpected variadic calling convention");
 771
 772   // Verify that the incoming and outgoing arguments from the callee are
 773   // safe to tail call.
 774   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
 775     LLVM_DEBUG(
 776         dbgs()
 777         << "... Caller and callee have incompatible calling conventions.\n");
 778     return false;
 779   }
 780
 781   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
 782     return false;
 783
 784   LLVM_DEBUG(
 785       dbgs() << "... Call is eligible for tail call optimization.\n");
 786   return true;
 787 }
 788
 789 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
 790                               bool IsTailCall) {
 791   if (!IsTailCall)
 792     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
 793
 794   if (!IsIndirect)
 795     return AArch64::TCRETURNdi;
 796
 797   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
 798   // x16 or x17.
 799   if (CallerF.getFunction().hasFnAttribute("branch-target-enforcement"))
 800     return AArch64::TCRETURNriBTI;
 801
 802   return AArch64::TCRETURNri;
 803 }
 804
 805 bool AArch64CallLowering::lowerTailCall(
 806     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 807     SmallVectorImpl<ArgInfo> &OutArgs) const {
 808   MachineFunction &MF = MIRBuilder.getMF();
 809   const Function &F = MF.getFunction();
 810   MachineRegisterInfo &MRI = MF.getRegInfo();
 811   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 812   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 813
 814   // True when we're tail calling, but without -tailcallopt.
 815   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
 816
 817   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
 818   // register class. Until we can do that, we should fall back here.
 819   if (F.hasFnAttribute("branch-target-enforcement")) {
 820     LLVM_DEBUG(
 821         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
 822     return false;
 823   }
 824
 825   // Find out which ABI gets to decide where things go.
 826   CallingConv::ID CalleeCC = Info.CallConv;
 827   CCAssignFn *AssignFnFixed;
 828   CCAssignFn *AssignFnVarArg;
 829   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
 830
 831   MachineInstrBuilder CallSeqStart;
 832   if (!IsSibCall)
 833     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 834
 835   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
 836   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
 837   MIB.add(Info.Callee);
 838
 839   // Byte offset for the tail call. When we are sibcalling, this will always
 840   // be 0.
 841   MIB.addImm(0);
 842
 843   // Tell the call which registers are clobbered.
 844   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 845   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
 846   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
 847     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
 848   MIB.addRegMask(Mask);
 849
 850   if (TRI->isAnyArgRegReserved(MF))
 851     TRI->emitReservedArgRegCallError(MF);
 852
 853   // FPDiff is the byte offset of the call's argument area from the callee's.
 854   // Stores to callee stack arguments will be placed in FixedStackSlots offset
 855   // by this amount for a tail call. In a sibling call it must be 0 because the
 856   // caller will deallocate the entire stack and the callee still expects its
 857   // arguments to begin at SP+0.
 858   int FPDiff = 0;
 859
 860   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
 861   // by -tailcallopt. For sibcalls, the memory operands for the call are
 862   // already available in the caller's incoming argument space.
 863   unsigned NumBytes = 0;
 864   if (!IsSibCall) {
 865     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
 866     // before handling assignments, because FPDiff must be known for memory
 867     // arguments.
 868     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
 869     SmallVector<CCValAssign, 16> OutLocs;
 870     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
 871     analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
 872
 873     // The callee will pop the argument stack as a tail call. Thus, we must
 874     // keep it 16-byte aligned.
 875     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
 876
 877     // FPDiff will be negative if this tail call requires more space than we
 878     // would automatically have in our incoming argument space. Positive if we
 879     // actually shrink the stack.
 880     FPDiff = NumReusableBytes - NumBytes;
 881
 882     // The stack pointer must be 16-byte aligned at all times it's used for a
 883     // memory operation, which in practice means at *all* times and in
 884     // particular across call boundaries. Therefore our own arguments started at
 885     // a 16-byte aligned SP and the delta applied for the tail call should
 886     // satisfy the same constraint.
 887     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
 888   }
 889
 890   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
 891
 892   // Do the actual argument marshalling.
 893   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
 894                              AssignFnVarArg, true, FPDiff);
 895   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
 896     return false;
 897
 898   if (Info.IsVarArg && Info.IsMustTailCall) {
 899     // Now we know what's being passed to the function. Add uses to the call for
 900     // the forwarded registers that we *aren't* passing as parameters. This will
 901     // preserve the copies we build earlier.
 902     for (const auto &F : Forwards) {
 903       Register ForwardedReg = F.PReg;
 904       // If the register is already passed, or aliases a register which is
 905       // already being passed, then skip it.
 906       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
 907             if (!Use.isReg())
 908               return false;
 909             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
 910           }))
 911         continue;
 912
 913       // We aren't passing it already, so we should add it to the call.
 914       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
 915       MIB.addReg(ForwardedReg, RegState::Implicit);
 916     }
 917   }
 918
 919   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
 920   // sequence start and end here.
 921   if (!IsSibCall) {
 922     MIB->getOperand(1).setImm(FPDiff);
 923     CallSeqStart.addImm(NumBytes).addImm(0);
 924     // End the call sequence *before* emitting the call. Normally, we would
 925     // tidy the frame up after the call. However, here, we've laid out the
 926     // parameters so that when SP is reset, they will be in the correct
 927     // location.
 928     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
 929   }
 930
 931   // Now we can add the actual call instruction to the correct basic block.
 932   MIRBuilder.insertInstr(MIB);
 933
 934   // If Callee is a reg, since it is used by a target specific instruction,
 935   // it must have a register class matching the constraint of that instruction.
 936   if (Info.Callee.isReg())
 937     MIB->getOperand(0).setReg(constrainOperandRegClass(
 938         MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
 939         *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
 940         0));
 941
 942   MF.getFrameInfo().setHasTailCall();
 943   Info.LoweredTailCall = true;
 944   return true;
 945 }
 946
 947 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 948                                     CallLoweringInfo &Info) const {
 949   MachineFunction &MF = MIRBuilder.getMF();
 950   const Function &F = MF.getFunction();
 951   MachineRegisterInfo &MRI = MF.getRegInfo();
 952   auto &DL = F.getParent()->getDataLayout();
 953   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 954
 955   SmallVector<ArgInfo, 8> OutArgs;
 956   for (auto &OrigArg : Info.OrigArgs) {
 957     splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
 958     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
 959     if (OrigArg.Ty->isIntegerTy(1))
 960       OutArgs.back().Flags[0].setZExt();
 961   }
 962
 963   SmallVector<ArgInfo, 8> InArgs;
 964   if (!Info.OrigRet.Ty->isVoidTy())
 965     splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
 966
 967   // If we can lower as a tail call, do that instead.
 968   bool CanTailCallOpt =
 969       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
 970
 971   // We must emit a tail call if we have musttail.
 972   if (Info.IsMustTailCall && !CanTailCallOpt) {
 973     // There are types of incoming/outgoing arguments we can't handle yet, so
 974     // it doesn't make sense to actually die here like in ISelLowering. Instead,
 975     // fall back to SelectionDAG and let it try to handle this.
 976     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
 977     return false;
 978   }
 979
 980   if (CanTailCallOpt)
 981     return lowerTailCall(MIRBuilder, Info, OutArgs);
 982
 983   // Find out which ABI gets to decide where things go.
 984   CCAssignFn *AssignFnFixed;
 985   CCAssignFn *AssignFnVarArg;
 986   std::tie(AssignFnFixed, AssignFnVarArg) =
 987       getAssignFnsForCC(Info.CallConv, TLI);
 988
 989   MachineInstrBuilder CallSeqStart;
 990   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 991
 992   // Create a temporarily-floating call instruction so we can add the implicit
 993   // uses of arg registers.
 994   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
 995
 996   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
 997   MIB.add(Info.Callee);
 998
 999   // Tell the call which registers are clobbered.
1000   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
1001   const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
1002   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1003     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1004   MIB.addRegMask(Mask);
1005
1006   if (TRI->isAnyArgRegReserved(MF))
1007     TRI->emitReservedArgRegCallError(MF);
1008
1009   // Do the actual argument marshalling.
1010   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
1011                              AssignFnVarArg, false);
1012   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
1013     return false;
1014
1015   // Now we can add the actual call instruction to the correct basic block.
1016   MIRBuilder.insertInstr(MIB);
1017
1018   // If Callee is a reg, since it is used by a target specific
1019   // instruction, it must have a register class matching the
1020   // constraint of that instruction.
1021   if (Info.Callee.isReg())
1022     MIB->getOperand(0).setReg(constrainOperandRegClass(
1023         MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1024         *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee,
1025         0));
1026
1027   // Finally we can copy the returned value back into its virtual-register. In
1028   // symmetry with the arguments, the physical register must be an
1029   // implicit-define of the call instruction.
1030   if (!Info.OrigRet.Ty->isVoidTy()) {
1031     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1032     CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
1033     if (!handleAssignments(MIRBuilder, InArgs, Handler))
1034       return false;
1035   }
1036
1037   if (Info.SwiftErrorVReg) {
1038     MIB.addDef(AArch64::X21, RegState::Implicit);
1039     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1040   }
1041
1042   uint64_t CalleePopBytes =
1043       doesCalleeRestoreStack(Info.CallConv,
1044                              MF.getTarget().Options.GuaranteedTailCallOpt)
1045           ? alignTo(Handler.StackSize, 16)
1046           : 0;
1047
1048   CallSeqStart.addImm(Handler.StackSize).addImm(0);
1049   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1050       .addImm(Handler.StackSize)
1051       .addImm(CalleePopBytes);
1052
1053   return true;
1054 }