contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

   1 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements the lowering of LLVM calls to machine code calls for
  11 /// GlobalISel.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AArch64CallLowering.h"
  16 #include "AArch64ISelLowering.h"
  17 #include "AArch64MachineFunctionInfo.h"
  18 #include "AArch64Subtarget.h"
  19 #include "llvm/ADT/ArrayRef.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 #include "llvm/CodeGen/Analysis.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  24 #include "llvm/CodeGen/GlobalISel/Utils.h"
  25 #include "llvm/CodeGen/LowLevelType.h"
  26 #include "llvm/CodeGen/MachineBasicBlock.h"
  27 #include "llvm/CodeGen/MachineFrameInfo.h"
  28 #include "llvm/CodeGen/MachineFunction.h"
  29 #include "llvm/CodeGen/MachineInstrBuilder.h"
  30 #include "llvm/CodeGen/MachineMemOperand.h"
  31 #include "llvm/CodeGen/MachineOperand.h"
  32 #include "llvm/CodeGen/MachineRegisterInfo.h"
  33 #include "llvm/CodeGen/TargetRegisterInfo.h"
  34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  35 #include "llvm/CodeGen/ValueTypes.h"
  36 #include "llvm/IR/Argument.h"
  37 #include "llvm/IR/Attributes.h"
  38 #include "llvm/IR/Function.h"
  39 #include "llvm/IR/Type.h"
  40 #include "llvm/IR/Value.h"
  41 #include "llvm/Support/MachineValueType.h"
  42 #include <algorithm>
  43 #include <cassert>
  44 #include <cstdint>
  45 #include <iterator>
  46
  47 #define DEBUG_TYPE "aarch64-call-lowering"
  48
  49 using namespace llvm;
  50
  51 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
  52   : CallLowering(&TLI) {}
  53
  54 static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
  55                                              MVT &LocVT) {
  56   // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
  57   // hack because the DAG calls the assignment function with pre-legalized
  58   // register typed values, not the raw type.
  59   //
  60   // This hack is not applied to return values which are not passed on the
  61   // stack.
  62   if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
  63     ValVT = LocVT = MVT::i8;
  64   else if (OrigVT == MVT::i16)
  65     ValVT = LocVT = MVT::i16;
  66 }
  67
  68 // Account for i1/i8/i16 stack passed value hack
  69 static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
  70   const MVT ValVT = VA.getValVT();
  71   return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
  72                                                  : LLT(VA.getLocVT());
  73 }
  74
  75 namespace {
  76
  77 struct AArch64IncomingValueAssigner
  78     : public CallLowering::IncomingValueAssigner {
  79   AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
  80                                CCAssignFn *AssignFnVarArg_)
  81       : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
  82
  83   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
  84                  CCValAssign::LocInfo LocInfo,
  85                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
  86                  CCState &State) override {
  87     applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
  88     return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
  89                                             LocInfo, Info, Flags, State);
  90   }
  91 };
  92
  93 struct AArch64OutgoingValueAssigner
  94     : public CallLowering::OutgoingValueAssigner {
  95   const AArch64Subtarget &Subtarget;
  96
  97   /// Track if this is used for a return instead of function argument
  98   /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
  99   /// stack passed returns for them and cannot apply the type adjustment.
 100   bool IsReturn;
 101
 102   AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
 103                                CCAssignFn *AssignFnVarArg_,
 104                                const AArch64Subtarget &Subtarget_,
 105                                bool IsReturn)
 106       : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
 107         Subtarget(Subtarget_), IsReturn(IsReturn) {}
 108
 109   bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
 110                  CCValAssign::LocInfo LocInfo,
 111                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
 112                  CCState &State) override {
 113     bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
 114     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
 115
 116     if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
 117       applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
 118
 119     bool Res;
 120     if (Info.IsFixed && !UseVarArgsCCForFixed)
 121       Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 122     else
 123       Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
 124
 125     StackOffset = State.getNextStackOffset();
 126     return Res;
 127   }
 128 };
 129
 130 struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
 131   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
 132       : IncomingValueHandler(MIRBuilder, MRI) {}
 133
 134   Register getStackAddress(uint64_t Size, int64_t Offset,
 135                            MachinePointerInfo &MPO,
 136                            ISD::ArgFlagsTy Flags) override {
 137     auto &MFI = MIRBuilder.getMF().getFrameInfo();
 138
 139     // Byval is assumed to be writable memory, but other stack passed arguments
 140     // are not.
 141     const bool IsImmutable = !Flags.isByVal();
 142
 143     int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
 144     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
 145     auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
 146     return AddrReg.getReg(0);
 147   }
 148
 149   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
 150                              ISD::ArgFlagsTy Flags) const override {
 151     // For pointers, we just need to fixup the integer types reported in the
 152     // CCValAssign.
 153     if (Flags.isPointer())
 154       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
 155     return getStackValueStoreTypeHack(VA);
 156   }
 157
 158   void assignValueToReg(Register ValVReg, Register PhysReg,
 159                         CCValAssign VA) override {
 160     markPhysRegUsed(PhysReg);
 161     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
 162   }
 163
 164   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
 165                             MachinePointerInfo &MPO, CCValAssign &VA) override {
 166     MachineFunction &MF = MIRBuilder.getMF();
 167
 168     LLT ValTy(VA.getValVT());
 169     LLT LocTy(VA.getLocVT());
 170
 171     // Fixup the types for the DAG compatibility hack.
 172     if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
 173       std::swap(ValTy, LocTy);
 174     else {
 175       // The calling code knows if this is a pointer or not, we're only touching
 176       // the LocTy for the i8/i16 hack.
 177       assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
 178       LocTy = MemTy;
 179     }
 180
 181     auto MMO = MF.getMachineMemOperand(
 182         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
 183         inferAlignFromPtrInfo(MF, MPO));
 184
 185     switch (VA.getLocInfo()) {
 186     case CCValAssign::LocInfo::ZExt:
 187       MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
 188       return;
 189     case CCValAssign::LocInfo::SExt:
 190       MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
 191       return;
 192     default:
 193       MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
 194       return;
 195     }
 196   }
 197
 198   /// How the physical register gets marked varies between formal
 199   /// parameters (it's a basic-block live-in), and a call instruction
 200   /// (it's an implicit-def of the BL).
 201   virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
 202 };
 203
 204 struct FormalArgHandler : public IncomingArgHandler {
 205   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
 206       : IncomingArgHandler(MIRBuilder, MRI) {}
 207
 208   void markPhysRegUsed(MCRegister PhysReg) override {
 209     MIRBuilder.getMRI()->addLiveIn(PhysReg);
 210     MIRBuilder.getMBB().addLiveIn(PhysReg);
 211   }
 212 };
 213
 214 struct CallReturnHandler : public IncomingArgHandler {
 215   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 216                     MachineInstrBuilder MIB)
 217       : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
 218
 219   void markPhysRegUsed(MCRegister PhysReg) override {
 220     MIB.addDef(PhysReg, RegState::Implicit);
 221   }
 222
 223   MachineInstrBuilder MIB;
 224 };
 225
 226 /// A special return arg handler for "returned" attribute arg calls.
 227 struct ReturnedArgCallReturnHandler : public CallReturnHandler {
 228   ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
 229                                MachineRegisterInfo &MRI,
 230                                MachineInstrBuilder MIB)
 231       : CallReturnHandler(MIRBuilder, MRI, MIB) {}
 232
 233   void markPhysRegUsed(MCRegister PhysReg) override {}
 234 };
 235
 236 struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
 237   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
 238                      MachineInstrBuilder MIB, bool IsTailCall = false,
 239                      int FPDiff = 0)
 240       : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
 241         FPDiff(FPDiff),
 242         Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
 243
 244   Register getStackAddress(uint64_t Size, int64_t Offset,
 245                            MachinePointerInfo &MPO,
 246                            ISD::ArgFlagsTy Flags) override {
 247     MachineFunction &MF = MIRBuilder.getMF();
 248     LLT p0 = LLT::pointer(0, 64);
 249     LLT s64 = LLT::scalar(64);
 250
 251     if (IsTailCall) {
 252       assert(!Flags.isByVal() && "byval unhandled with tail calls");
 253
 254       Offset += FPDiff;
 255       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
 256       auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
 257       MPO = MachinePointerInfo::getFixedStack(MF, FI);
 258       return FIReg.getReg(0);
 259     }
 260
 261     if (!SPReg)
 262       SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
 263
 264     auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
 265
 266     auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
 267
 268     MPO = MachinePointerInfo::getStack(MF, Offset);
 269     return AddrReg.getReg(0);
 270   }
 271
 272   /// We need to fixup the reported store size for certain value types because
 273   /// we invert the interpretation of ValVT and LocVT in certain cases. This is
 274   /// for compatability with the DAG call lowering implementation, which we're
 275   /// currently building on top of.
 276   LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
 277                              ISD::ArgFlagsTy Flags) const override {
 278     if (Flags.isPointer())
 279       return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
 280     return getStackValueStoreTypeHack(VA);
 281   }
 282
 283   void assignValueToReg(Register ValVReg, Register PhysReg,
 284                         CCValAssign VA) override {
 285     MIB.addUse(PhysReg, RegState::Implicit);
 286     Register ExtReg = extendRegister(ValVReg, VA);
 287     MIRBuilder.buildCopy(PhysReg, ExtReg);
 288   }
 289
 290   void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
 291                             MachinePointerInfo &MPO, CCValAssign &VA) override {
 292     MachineFunction &MF = MIRBuilder.getMF();
 293     auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
 294                                        inferAlignFromPtrInfo(MF, MPO));
 295     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
 296   }
 297
 298   void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
 299                             Register Addr, LLT MemTy, MachinePointerInfo &MPO,
 300                             CCValAssign &VA) override {
 301     unsigned MaxSize = MemTy.getSizeInBytes() * 8;
 302     // For varargs, we always want to extend them to 8 bytes, in which case
 303     // we disable setting a max.
 304     if (!Arg.IsFixed)
 305       MaxSize = 0;
 306
 307     Register ValVReg = Arg.Regs[RegIndex];
 308     if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
 309       MVT LocVT = VA.getLocVT();
 310       MVT ValVT = VA.getValVT();
 311
 312       if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
 313         std::swap(ValVT, LocVT);
 314         MemTy = LLT(VA.getValVT());
 315       }
 316
 317       ValVReg = extendRegister(ValVReg, VA, MaxSize);
 318     } else {
 319       // The store does not cover the full allocated stack slot.
 320       MemTy = LLT(VA.getValVT());
 321     }
 322
 323     assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
 324   }
 325
 326   MachineInstrBuilder MIB;
 327
 328   bool IsTailCall;
 329
 330   /// For tail calls, the byte offset of the call's argument area from the
 331   /// callee's. Unused elsewhere.
 332   int FPDiff;
 333
 334   // Cache the SP register vreg if we need it more than once in this call site.
 335   Register SPReg;
 336
 337   const AArch64Subtarget &Subtarget;
 338 };
 339 } // namespace
 340
 341 static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
 342   return (CallConv == CallingConv::Fast && TailCallOpt) ||
 343          CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
 344 }
 345
 346 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
 347                                       const Value *Val,
 348                                       ArrayRef<Register> VRegs,
 349                                       FunctionLoweringInfo &FLI,
 350                                       Register SwiftErrorVReg) const {
 351   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
 352   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
 353          "Return value without a vreg");
 354
 355   bool Success = true;
 356   if (!VRegs.empty()) {
 357     MachineFunction &MF = MIRBuilder.getMF();
 358     const Function &F = MF.getFunction();
 359     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 360
 361     MachineRegisterInfo &MRI = MF.getRegInfo();
 362     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 363     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
 364     auto &DL = F.getParent()->getDataLayout();
 365     LLVMContext &Ctx = Val->getType()->getContext();
 366
 367     SmallVector<EVT, 4> SplitEVTs;
 368     ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
 369     assert(VRegs.size() == SplitEVTs.size() &&
 370            "For each split Type there should be exactly one VReg.");
 371
 372     SmallVector<ArgInfo, 8> SplitArgs;
 373     CallingConv::ID CC = F.getCallingConv();
 374
 375     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
 376       Register CurVReg = VRegs[i];
 377       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
 378       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 379
 380       // i1 is a special case because SDAG i1 true is naturally zero extended
 381       // when widened using ANYEXT. We need to do it explicitly here.
 382       if (MRI.getType(CurVReg).getSizeInBits() == 1) {
 383         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
 384       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
 385                  1) {
 386         // Some types will need extending as specified by the CC.
 387         MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
 388         if (EVT(NewVT) != SplitEVTs[i]) {
 389           unsigned ExtendOp = TargetOpcode::G_ANYEXT;
 390           if (F.getAttributes().hasRetAttr(Attribute::SExt))
 391             ExtendOp = TargetOpcode::G_SEXT;
 392           else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
 393             ExtendOp = TargetOpcode::G_ZEXT;
 394
 395           LLT NewLLT(NewVT);
 396           LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
 397           CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
 398           // Instead of an extend, we might have a vector type which needs
 399           // padding with more elements, e.g. <2 x half> -> <4 x half>.
 400           if (NewVT.isVector()) {
 401             if (OldLLT.isVector()) {
 402               if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
 403                 // We don't handle VA types which are not exactly twice the
 404                 // size, but can easily be done in future.
 405                 if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
 406                   LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
 407                   return false;
 408                 }
 409                 auto Undef = MIRBuilder.buildUndef({OldLLT});
 410                 CurVReg =
 411                     MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
 412               } else {
 413                 // Just do a vector extend.
 414                 CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
 415                               .getReg(0);
 416               }
 417             } else if (NewLLT.getNumElements() == 2) {
 418               // We need to pad a <1 x S> type to <2 x S>. Since we don't have
 419               // <1 x S> vector types in GISel we use a build_vector instead
 420               // of a vector merge/concat.
 421               auto Undef = MIRBuilder.buildUndef({OldLLT});
 422               CurVReg =
 423                   MIRBuilder
 424                       .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
 425                       .getReg(0);
 426             } else {
 427               LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
 428               return false;
 429             }
 430           } else {
 431             // If the split EVT was a <1 x T> vector, and NewVT is T, then we
 432             // don't have to do anything since we don't distinguish between the
 433             // two.
 434             if (NewLLT != MRI.getType(CurVReg)) {
 435               // A scalar extend.
 436               CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
 437                             .getReg(0);
 438             }
 439           }
 440         }
 441       }
 442       if (CurVReg != CurArgInfo.Regs[0]) {
 443         CurArgInfo.Regs[0] = CurVReg;
 444         // Reset the arg flags after modifying CurVReg.
 445         setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 446       }
 447       splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
 448     }
 449
 450     AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
 451                                           /*IsReturn*/ true);
 452     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
 453     Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
 454                                             MIRBuilder, CC, F.isVarArg());
 455   }
 456
 457   if (SwiftErrorVReg) {
 458     MIB.addUse(AArch64::X21, RegState::Implicit);
 459     MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
 460   }
 461
 462   MIRBuilder.insertInstr(MIB);
 463   return Success;
 464 }
 465
 466 /// Helper function to compute forwarded registers for musttail calls. Computes
 467 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
 468 /// can be used to save + restore registers later.
 469 static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
 470                                              CCAssignFn *AssignFn) {
 471   MachineBasicBlock &MBB = MIRBuilder.getMBB();
 472   MachineFunction &MF = MIRBuilder.getMF();
 473   MachineFrameInfo &MFI = MF.getFrameInfo();
 474
 475   if (!MFI.hasMustTailInVarArgFunc())
 476     return;
 477
 478   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 479   const Function &F = MF.getFunction();
 480   assert(F.isVarArg() && "Expected F to be vararg?");
 481
 482   // Compute the set of forwarded registers. The rest are scratch.
 483   SmallVector<CCValAssign, 16> ArgLocs;
 484   CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
 485                  F.getContext());
 486   SmallVector<MVT, 2> RegParmTypes;
 487   RegParmTypes.push_back(MVT::i64);
 488   RegParmTypes.push_back(MVT::f128);
 489
 490   // Later on, we can use this vector to restore the registers if necessary.
 491   SmallVectorImpl<ForwardedRegister> &Forwards =
 492       FuncInfo->getForwardedMustTailRegParms();
 493   CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
 494
 495   // Conservatively forward X8, since it might be used for an aggregate
 496   // return.
 497   if (!CCInfo.isAllocated(AArch64::X8)) {
 498     Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
 499     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
 500   }
 501
 502   // Add the forwards to the MachineBasicBlock and MachineFunction.
 503   for (const auto &F : Forwards) {
 504     MBB.addLiveIn(F.PReg);
 505     MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
 506   }
 507 }
 508
 509 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
 510   auto &F = MF.getFunction();
 511   if (isa<ScalableVectorType>(F.getReturnType()))
 512     return true;
 513   if (llvm::any_of(F.args(), [](const Argument &A) {
 514         return isa<ScalableVectorType>(A.getType());
 515       }))
 516     return true;
 517   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
 518   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
 519     LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
 520     return true;
 521   }
 522   return false;
 523 }
 524
 525 bool AArch64CallLowering::lowerFormalArguments(
 526     MachineIRBuilder &MIRBuilder, const Function &F,
 527     ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
 528   MachineFunction &MF = MIRBuilder.getMF();
 529   MachineBasicBlock &MBB = MIRBuilder.getMBB();
 530   MachineRegisterInfo &MRI = MF.getRegInfo();
 531   auto &DL = F.getParent()->getDataLayout();
 532
 533   SmallVector<ArgInfo, 8> SplitArgs;
 534   SmallVector<std::pair<Register, Register>> BoolArgs;
 535   unsigned i = 0;
 536   for (auto &Arg : F.args()) {
 537     if (DL.getTypeStoreSize(Arg.getType()).isZero())
 538       continue;
 539
 540     ArgInfo OrigArg{VRegs[i], Arg, i};
 541     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
 542
 543     // i1 arguments are zero-extended to i8 by the caller. Emit a
 544     // hint to reflect this.
 545     if (OrigArg.Ty->isIntegerTy(1)) {
 546       assert(OrigArg.Regs.size() == 1 &&
 547              MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
 548              "Unexpected registers used for i1 arg");
 549
 550       if (!OrigArg.Flags[0].isZExt()) {
 551         // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
 552         Register OrigReg = OrigArg.Regs[0];
 553         Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
 554         OrigArg.Regs[0] = WideReg;
 555         BoolArgs.push_back({OrigReg, WideReg});
 556       }
 557     }
 558
 559     if (Arg.hasAttribute(Attribute::SwiftAsync))
 560       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
 561
 562     splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
 563     ++i;
 564   }
 565
 566   if (!MBB.empty())
 567     MIRBuilder.setInstr(*MBB.begin());
 568
 569   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 570   CCAssignFn *AssignFn =
 571       TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
 572
 573   AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
 574   FormalArgHandler Handler(MIRBuilder, MRI);
 575   if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
 576                                      F.getCallingConv(), F.isVarArg()))
 577     return false;
 578
 579   if (!BoolArgs.empty()) {
 580     for (auto &KV : BoolArgs) {
 581       Register OrigReg = KV.first;
 582       Register WideReg = KV.second;
 583       LLT WideTy = MRI.getType(WideReg);
 584       assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
 585              "Unexpected bit size of a bool arg");
 586       MIRBuilder.buildTrunc(
 587           OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
 588     }
 589   }
 590
 591   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 592   uint64_t StackOffset = Assigner.StackOffset;
 593   if (F.isVarArg()) {
 594     auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 595     if (!Subtarget.isTargetDarwin()) {
 596         // FIXME: we need to reimplement saveVarArgsRegisters from
 597       // AArch64ISelLowering.
 598       return false;
 599     }
 600
 601     // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
 602     StackOffset =
 603         alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
 604
 605     auto &MFI = MIRBuilder.getMF().getFrameInfo();
 606     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
 607   }
 608
 609   if (doesCalleeRestoreStack(F.getCallingConv(),
 610                              MF.getTarget().Options.GuaranteedTailCallOpt)) {
 611     // We have a non-standard ABI, so why not make full use of the stack that
 612     // we're going to pop? It must be aligned to 16 B in any case.
 613     StackOffset = alignTo(StackOffset, 16);
 614
 615     // If we're expected to restore the stack (e.g. fastcc), then we'll be
 616     // adding a multiple of 16.
 617     FuncInfo->setArgumentStackToRestore(StackOffset);
 618
 619     // Our own callers will guarantee that the space is free by giving an
 620     // aligned value to CALLSEQ_START.
 621   }
 622
 623   // When we tail call, we need to check if the callee's arguments
 624   // will fit on the caller's stack. So, whenever we lower formal arguments,
 625   // we should keep track of this information, since we might lower a tail call
 626   // in this function later.
 627   FuncInfo->setBytesInStackArgArea(StackOffset);
 628
 629   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 630   if (Subtarget.hasCustomCallingConv())
 631     Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
 632
 633   handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
 634
 635   // Move back to the end of the basic block.
 636   MIRBuilder.setMBB(MBB);
 637
 638   return true;
 639 }
 640
 641 /// Return true if the calling convention is one that we can guarantee TCO for.
 642 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
 643   return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
 644          CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
 645 }
 646
 647 /// Return true if we might ever do TCO for calls with this calling convention.
 648 static bool mayTailCallThisCC(CallingConv::ID CC) {
 649   switch (CC) {
 650   case CallingConv::C:
 651   case CallingConv::PreserveMost:
 652   case CallingConv::Swift:
 653   case CallingConv::SwiftTail:
 654   case CallingConv::Tail:
 655   case CallingConv::Fast:
 656     return true;
 657   default:
 658     return false;
 659   }
 660 }
 661
 662 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
 663 /// CC.
 664 static std::pair<CCAssignFn *, CCAssignFn *>
 665 getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
 666   return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
 667 }
 668
 669 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
 670     CallLoweringInfo &Info, MachineFunction &MF,
 671     SmallVectorImpl<ArgInfo> &InArgs) const {
 672   const Function &CallerF = MF.getFunction();
 673   CallingConv::ID CalleeCC = Info.CallConv;
 674   CallingConv::ID CallerCC = CallerF.getCallingConv();
 675
 676   // If the calling conventions match, then everything must be the same.
 677   if (CalleeCC == CallerCC)
 678     return true;
 679
 680   // Check if the caller and callee will handle arguments in the same way.
 681   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 682   CCAssignFn *CalleeAssignFnFixed;
 683   CCAssignFn *CalleeAssignFnVarArg;
 684   std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
 685       getAssignFnsForCC(CalleeCC, TLI);
 686
 687   CCAssignFn *CallerAssignFnFixed;
 688   CCAssignFn *CallerAssignFnVarArg;
 689   std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
 690       getAssignFnsForCC(CallerCC, TLI);
 691
 692   AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
 693                                               CalleeAssignFnVarArg);
 694   AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
 695                                               CallerAssignFnVarArg);
 696
 697   if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
 698     return false;
 699
 700   // Make sure that the caller and callee preserve all of the same registers.
 701   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 702   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
 703   const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
 704   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
 705     TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
 706     TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
 707   }
 708
 709   return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
 710 }
 711
 712 bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
 713     CallLoweringInfo &Info, MachineFunction &MF,
 714     SmallVectorImpl<ArgInfo> &OutArgs) const {
 715   // If there are no outgoing arguments, then we are done.
 716   if (OutArgs.empty())
 717     return true;
 718
 719   const Function &CallerF = MF.getFunction();
 720   LLVMContext &Ctx = CallerF.getContext();
 721   CallingConv::ID CalleeCC = Info.CallConv;
 722   CallingConv::ID CallerCC = CallerF.getCallingConv();
 723   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 724   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 725
 726   CCAssignFn *AssignFnFixed;
 727   CCAssignFn *AssignFnVarArg;
 728   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
 729
 730   // We have outgoing arguments. Make sure that we can tail call with them.
 731   SmallVector<CCValAssign, 16> OutLocs;
 732   CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
 733
 734   AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
 735                                               Subtarget, /*IsReturn*/ false);
 736   if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
 737     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
 738     return false;
 739   }
 740
 741   // Make sure that they can fit on the caller's stack.
 742   const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 743   if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
 744     LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
 745     return false;
 746   }
 747
 748   // Verify that the parameters in callee-saved registers match.
 749   // TODO: Port this over to CallLowering as general code once swiftself is
 750   // supported.
 751   auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
 752   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
 753   MachineRegisterInfo &MRI = MF.getRegInfo();
 754
 755   if (Info.IsVarArg) {
 756     // Be conservative and disallow variadic memory operands to match SDAG's
 757     // behaviour.
 758     // FIXME: If the caller's calling convention is C, then we can
 759     // potentially use its argument area. However, for cases like fastcc,
 760     // we can't do anything.
 761     for (unsigned i = 0; i < OutLocs.size(); ++i) {
 762       auto &ArgLoc = OutLocs[i];
 763       if (ArgLoc.isRegLoc())
 764         continue;
 765
 766       LLVM_DEBUG(
 767           dbgs()
 768           << "... Cannot tail call vararg function with stack arguments\n");
 769       return false;
 770     }
 771   }
 772
 773   return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
 774 }
 775
 776 bool AArch64CallLowering::isEligibleForTailCallOptimization(
 777     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 778     SmallVectorImpl<ArgInfo> &InArgs,
 779     SmallVectorImpl<ArgInfo> &OutArgs) const {
 780
 781   // Must pass all target-independent checks in order to tail call optimize.
 782   if (!Info.IsTailCall)
 783     return false;
 784
 785   CallingConv::ID CalleeCC = Info.CallConv;
 786   MachineFunction &MF = MIRBuilder.getMF();
 787   const Function &CallerF = MF.getFunction();
 788
 789   LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
 790
 791   if (Info.SwiftErrorVReg) {
 792     // TODO: We should handle this.
 793     // Note that this is also handled by the check for no outgoing arguments.
 794     // Proactively disabling this though, because the swifterror handling in
 795     // lowerCall inserts a COPY *after* the location of the call.
 796     LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
 797     return false;
 798   }
 799
 800   if (!mayTailCallThisCC(CalleeCC)) {
 801     LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
 802     return false;
 803   }
 804
 805   // Byval parameters hand the function a pointer directly into the stack area
 806   // we want to reuse during a tail call. Working around this *is* possible (see
 807   // X86).
 808   //
 809   // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
 810   // it?
 811   //
 812   // On Windows, "inreg" attributes signify non-aggregate indirect returns.
 813   // In this case, it is necessary to save/restore X0 in the callee. Tail
 814   // call opt interferes with this. So we disable tail call opt when the
 815   // caller has an argument with "inreg" attribute.
 816   //
 817   // FIXME: Check whether the callee also has an "inreg" argument.
 818   //
 819   // When the caller has a swifterror argument, we don't want to tail call
 820   // because would have to move into the swifterror register before the
 821   // tail call.
 822   if (any_of(CallerF.args(), [](const Argument &A) {
 823         return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
 824       })) {
 825     LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
 826                          "inreg, or swifterror arguments\n");
 827     return false;
 828   }
 829
 830   // Externally-defined functions with weak linkage should not be
 831   // tail-called on AArch64 when the OS does not support dynamic
 832   // pre-emption of symbols, as the AAELF spec requires normal calls
 833   // to undefined weak functions to be replaced with a NOP or jump to the
 834   // next instruction. The behaviour of branch instructions in this
 835   // situation (as used for tail calls) is implementation-defined, so we
 836   // cannot rely on the linker replacing the tail call with a return.
 837   if (Info.Callee.isGlobal()) {
 838     const GlobalValue *GV = Info.Callee.getGlobal();
 839     const Triple &TT = MF.getTarget().getTargetTriple();
 840     if (GV->hasExternalWeakLinkage() &&
 841         (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
 842          TT.isOSBinFormatMachO())) {
 843       LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
 844                            "with weak linkage for this OS.\n");
 845       return false;
 846     }
 847   }
 848
 849   // If we have -tailcallopt, then we're done.
 850   if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
 851     return CalleeCC == CallerF.getCallingConv();
 852
 853   // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
 854   // Try to find cases where we can do that.
 855
 856   // I want anyone implementing a new calling convention to think long and hard
 857   // about this assert.
 858   assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
 859          "Unexpected variadic calling convention");
 860
 861   // Verify that the incoming and outgoing arguments from the callee are
 862   // safe to tail call.
 863   if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
 864     LLVM_DEBUG(
 865         dbgs()
 866         << "... Caller and callee have incompatible calling conventions.\n");
 867     return false;
 868   }
 869
 870   if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
 871     return false;
 872
 873   LLVM_DEBUG(
 874       dbgs() << "... Call is eligible for tail call optimization.\n");
 875   return true;
 876 }
 877
 878 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
 879                               bool IsTailCall) {
 880   if (!IsTailCall)
 881     return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
 882
 883   if (!IsIndirect)
 884     return AArch64::TCRETURNdi;
 885
 886   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
 887   // x16 or x17.
 888   if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
 889     return AArch64::TCRETURNriBTI;
 890
 891   return AArch64::TCRETURNri;
 892 }
 893
 894 static const uint32_t *
 895 getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
 896                AArch64CallLowering::CallLoweringInfo &Info,
 897                const AArch64RegisterInfo &TRI, MachineFunction &MF) {
 898   const uint32_t *Mask;
 899   if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
 900     // For 'this' returns, use the X0-preserving mask if applicable
 901     Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
 902     if (!Mask) {
 903       OutArgs[0].Flags[0].setReturned(false);
 904       Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
 905     }
 906   } else {
 907     Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
 908   }
 909   return Mask;
 910 }
 911
 912 bool AArch64CallLowering::lowerTailCall(
 913     MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
 914     SmallVectorImpl<ArgInfo> &OutArgs) const {
 915   MachineFunction &MF = MIRBuilder.getMF();
 916   const Function &F = MF.getFunction();
 917   MachineRegisterInfo &MRI = MF.getRegInfo();
 918   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 919   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 920
 921   // True when we're tail calling, but without -tailcallopt.
 922   bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
 923                    Info.CallConv != CallingConv::Tail &&
 924                    Info.CallConv != CallingConv::SwiftTail;
 925
 926   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
 927   // register class. Until we can do that, we should fall back here.
 928   if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
 929     LLVM_DEBUG(
 930         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
 931     return false;
 932   }
 933
 934   // Find out which ABI gets to decide where things go.
 935   CallingConv::ID CalleeCC = Info.CallConv;
 936   CCAssignFn *AssignFnFixed;
 937   CCAssignFn *AssignFnVarArg;
 938   std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
 939
 940   MachineInstrBuilder CallSeqStart;
 941   if (!IsSibCall)
 942     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 943
 944   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
 945   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
 946   MIB.add(Info.Callee);
 947
 948   // Byte offset for the tail call. When we are sibcalling, this will always
 949   // be 0.
 950   MIB.addImm(0);
 951
 952   // Tell the call which registers are clobbered.
 953   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 954   auto TRI = Subtarget.getRegisterInfo();
 955   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
 956   if (Subtarget.hasCustomCallingConv())
 957     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
 958   MIB.addRegMask(Mask);
 959
 960   if (TRI->isAnyArgRegReserved(MF))
 961     TRI->emitReservedArgRegCallError(MF);
 962
 963   // FPDiff is the byte offset of the call's argument area from the callee's.
 964   // Stores to callee stack arguments will be placed in FixedStackSlots offset
 965   // by this amount for a tail call. In a sibling call it must be 0 because the
 966   // caller will deallocate the entire stack and the callee still expects its
 967   // arguments to begin at SP+0.
 968   int FPDiff = 0;
 969
 970   // This will be 0 for sibcalls, potentially nonzero for tail calls produced
 971   // by -tailcallopt. For sibcalls, the memory operands for the call are
 972   // already available in the caller's incoming argument space.
 973   unsigned NumBytes = 0;
 974   if (!IsSibCall) {
 975     // We aren't sibcalling, so we need to compute FPDiff. We need to do this
 976     // before handling assignments, because FPDiff must be known for memory
 977     // arguments.
 978     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
 979     SmallVector<CCValAssign, 16> OutLocs;
 980     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
 981
 982     AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
 983                                                 Subtarget, /*IsReturn*/ false);
 984     if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
 985       return false;
 986
 987     // The callee will pop the argument stack as a tail call. Thus, we must
 988     // keep it 16-byte aligned.
 989     NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
 990
 991     // FPDiff will be negative if this tail call requires more space than we
 992     // would automatically have in our incoming argument space. Positive if we
 993     // actually shrink the stack.
 994     FPDiff = NumReusableBytes - NumBytes;
 995
 996     // Update the required reserved area if this is the tail call requiring the
 997     // most argument stack space.
 998     if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
 999       FuncInfo->setTailCallReservedStack(-FPDiff);
1000
1001     // The stack pointer must be 16-byte aligned at all times it's used for a
1002     // memory operation, which in practice means at *all* times and in
1003     // particular across call boundaries. Therefore our own arguments started at
1004     // a 16-byte aligned SP and the delta applied for the tail call should
1005     // satisfy the same constraint.
1006     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1007   }
1008
1009   const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1010
1011   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1012                                         Subtarget, /*IsReturn*/ false);
1013
1014   // Do the actual argument marshalling.
1015   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1016                              /*IsTailCall*/ true, FPDiff);
1017   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1018                                      CalleeCC, Info.IsVarArg))
1019     return false;
1020
1021   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1022
1023   if (Info.IsVarArg && Info.IsMustTailCall) {
1024     // Now we know what's being passed to the function. Add uses to the call for
1025     // the forwarded registers that we *aren't* passing as parameters. This will
1026     // preserve the copies we build earlier.
1027     for (const auto &F : Forwards) {
1028       Register ForwardedReg = F.PReg;
1029       // If the register is already passed, or aliases a register which is
1030       // already being passed, then skip it.
1031       if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
1032             if (!Use.isReg())
1033               return false;
1034             return TRI->regsOverlap(Use.getReg(), ForwardedReg);
1035           }))
1036         continue;
1037
1038       // We aren't passing it already, so we should add it to the call.
1039       MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
1040       MIB.addReg(ForwardedReg, RegState::Implicit);
1041     }
1042   }
1043
1044   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1045   // sequence start and end here.
1046   if (!IsSibCall) {
1047     MIB->getOperand(1).setImm(FPDiff);
1048     CallSeqStart.addImm(0).addImm(0);
1049     // End the call sequence *before* emitting the call. Normally, we would
1050     // tidy the frame up after the call. However, here, we've laid out the
1051     // parameters so that when SP is reset, they will be in the correct
1052     // location.
1053     MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
1054   }
1055
1056   // Now we can add the actual call instruction to the correct basic block.
1057   MIRBuilder.insertInstr(MIB);
1058
1059   // If Callee is a reg, since it is used by a target specific instruction,
1060   // it must have a register class matching the constraint of that instruction.
1061   if (Info.Callee.isReg())
1062     constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
1063                              *MF.getSubtarget().getRegBankInfo(), *MIB,
1064                              MIB->getDesc(), Info.Callee, 0);
1065
1066   MF.getFrameInfo().setHasTailCall();
1067   Info.LoweredTailCall = true;
1068   return true;
1069 }
1070
1071 bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1072                                     CallLoweringInfo &Info) const {
1073   MachineFunction &MF = MIRBuilder.getMF();
1074   const Function &F = MF.getFunction();
1075   MachineRegisterInfo &MRI = MF.getRegInfo();
1076   auto &DL = F.getParent()->getDataLayout();
1077   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1078
1079   SmallVector<ArgInfo, 8> OutArgs;
1080   for (auto &OrigArg : Info.OrigArgs) {
1081     splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
1082     // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1083     if (OrigArg.Ty->isIntegerTy(1)) {
1084       ArgInfo &OutArg = OutArgs.back();
1085       assert(OutArg.Regs.size() == 1 &&
1086              MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1087              "Unexpected registers used for i1 arg");
1088
1089       // We cannot use a ZExt ArgInfo flag here, because it will
1090       // zero-extend the argument to i32 instead of just i8.
1091       OutArg.Regs[0] =
1092           MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
1093       LLVMContext &Ctx = MF.getFunction().getContext();
1094       OutArg.Ty = Type::getInt8Ty(Ctx);
1095     }
1096   }
1097
1098   SmallVector<ArgInfo, 8> InArgs;
1099   if (!Info.OrigRet.Ty->isVoidTy())
1100     splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
1101
1102   // If we can lower as a tail call, do that instead.
1103   bool CanTailCallOpt =
1104       isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1105
1106   // We must emit a tail call if we have musttail.
1107   if (Info.IsMustTailCall && !CanTailCallOpt) {
1108     // There are types of incoming/outgoing arguments we can't handle yet, so
1109     // it doesn't make sense to actually die here like in ISelLowering. Instead,
1110     // fall back to SelectionDAG and let it try to handle this.
1111     LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1112     return false;
1113   }
1114
1115   Info.IsTailCall = CanTailCallOpt;
1116   if (CanTailCallOpt)
1117     return lowerTailCall(MIRBuilder, Info, OutArgs);
1118
1119   // Find out which ABI gets to decide where things go.
1120   CCAssignFn *AssignFnFixed;
1121   CCAssignFn *AssignFnVarArg;
1122   std::tie(AssignFnFixed, AssignFnVarArg) =
1123       getAssignFnsForCC(Info.CallConv, TLI);
1124
1125   MachineInstrBuilder CallSeqStart;
1126   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
1127
1128   // Create a temporarily-floating call instruction so we can add the implicit
1129   // uses of arg registers.
1130   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1131   unsigned Opc = 0;
1132   // A call to a returns twice function like setjmp must be followed by a bti
1133   // instruction.
1134   if (Info.CB && Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
1135       !Subtarget.noBTIAtReturnTwice() &&
1136       MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1137     Opc = AArch64::BLR_BTI;
1138   else
1139     Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1140
1141   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
1142   MIB.add(Info.Callee);
1143
1144   // Tell the call which registers are clobbered.
1145   const uint32_t *Mask;
1146   const auto *TRI = Subtarget.getRegisterInfo();
1147
1148   AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1149                                         Subtarget, /*IsReturn*/ false);
1150   // Do the actual argument marshalling.
1151   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1152   if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
1153                                      Info.CallConv, Info.IsVarArg))
1154     return false;
1155
1156   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
1157
1158   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1159     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
1160   MIB.addRegMask(Mask);
1161
1162   if (TRI->isAnyArgRegReserved(MF))
1163     TRI->emitReservedArgRegCallError(MF);
1164
1165   // Now we can add the actual call instruction to the correct basic block.
1166   MIRBuilder.insertInstr(MIB);
1167
1168   // If Callee is a reg, since it is used by a target specific
1169   // instruction, it must have a register class matching the
1170   // constraint of that instruction.
1171   if (Info.Callee.isReg())
1172     constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
1173                              *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
1174                              Info.Callee, 0);
1175
1176   // Finally we can copy the returned value back into its virtual-register. In
1177   // symmetry with the arguments, the physical register must be an
1178   // implicit-define of the call instruction.
1179   if (!Info.OrigRet.Ty->isVoidTy()) {
1180     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
1181     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1182     bool UsingReturnedArg =
1183         !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1184
1185     AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1186                                           /*IsReturn*/ false);
1187     ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1188     if (!determineAndHandleAssignments(
1189             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
1190             MIRBuilder, Info.CallConv, Info.IsVarArg,
1191             UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None))
1192       return false;
1193   }
1194
1195   if (Info.SwiftErrorVReg) {
1196     MIB.addDef(AArch64::X21, RegState::Implicit);
1197     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
1198   }
1199
1200   uint64_t CalleePopBytes =
1201       doesCalleeRestoreStack(Info.CallConv,
1202                              MF.getTarget().Options.GuaranteedTailCallOpt)
1203           ? alignTo(Assigner.StackOffset, 16)
1204           : 0;
1205
1206   CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
1207   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
1208       .addImm(Assigner.StackOffset)
1209       .addImm(CalleePopBytes);
1210
1211   return true;
1212 }
1213
1214 bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1215   return Ty.getSizeInBits() == 64;
1216 }