contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

   1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that Hexagon uses to lower LLVM code
  11 // into a selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "HexagonISelLowering.h"
  16 #include "Hexagon.h"
  17 #include "HexagonMachineFunctionInfo.h"
  18 #include "HexagonRegisterInfo.h"
  19 #include "HexagonSubtarget.h"
  20 #include "HexagonTargetMachine.h"
  21 #include "HexagonTargetObjectFile.h"
  22 #include "llvm/ADT/APInt.h"
  23 #include "llvm/ADT/ArrayRef.h"
  24 #include "llvm/ADT/SmallVector.h"
  25 #include "llvm/CodeGen/CallingConvLower.h"
  26 #include "llvm/CodeGen/MachineFrameInfo.h"
  27 #include "llvm/CodeGen/MachineFunction.h"
  28 #include "llvm/CodeGen/MachineMemOperand.h"
  29 #include "llvm/CodeGen/MachineRegisterInfo.h"
  30 #include "llvm/CodeGen/RuntimeLibcalls.h"
  31 #include "llvm/CodeGen/SelectionDAG.h"
  32 #include "llvm/CodeGen/TargetCallingConv.h"
  33 #include "llvm/CodeGen/ValueTypes.h"
  34 #include "llvm/IR/BasicBlock.h"
  35 #include "llvm/IR/CallingConv.h"
  36 #include "llvm/IR/DataLayout.h"
  37 #include "llvm/IR/DerivedTypes.h"
  38 #include "llvm/IR/Function.h"
  39 #include "llvm/IR/GlobalValue.h"
  40 #include "llvm/IR/InlineAsm.h"
  41 #include "llvm/IR/Instructions.h"
  42 #include "llvm/IR/Intrinsics.h"
  43 #include "llvm/IR/IntrinsicInst.h"
  44 #include "llvm/IR/Module.h"
  45 #include "llvm/IR/Type.h"
  46 #include "llvm/IR/Value.h"
  47 #include "llvm/MC/MCRegisterInfo.h"
  48 #include "llvm/Support/Casting.h"
  49 #include "llvm/Support/CodeGen.h"
  50 #include "llvm/Support/CommandLine.h"
  51 #include "llvm/Support/Debug.h"
  52 #include "llvm/Support/ErrorHandling.h"
  53 #include "llvm/Support/MathExtras.h"
  54 #include "llvm/Support/raw_ostream.h"
  55 #include "llvm/Target/TargetMachine.h"
  56 #include <algorithm>
  57 #include <cassert>
  58 #include <cstddef>
  59 #include <cstdint>
  60 #include <limits>
  61 #include <utility>
  62
  63 using namespace llvm;
  64
  65 #define DEBUG_TYPE "hexagon-lowering"
  66
  67 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
  68   cl::init(true), cl::Hidden,
  69   cl::desc("Control jump table emission on Hexagon target"));
  70
  71 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
  72   cl::Hidden, cl::ZeroOrMore, cl::init(false),
  73   cl::desc("Enable Hexagon SDNode scheduling"));
  74
  75 static cl::opt<bool> EnableFastMath("ffast-math",
  76   cl::Hidden, cl::ZeroOrMore, cl::init(false),
  77   cl::desc("Enable Fast Math processing"));
  78
  79 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
  80   cl::Hidden, cl::ZeroOrMore, cl::init(5),
  81   cl::desc("Set minimum jump tables"));
  82
  83 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
  84   cl::Hidden, cl::ZeroOrMore, cl::init(6),
  85   cl::desc("Max #stores to inline memcpy"));
  86
  87 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
  88   cl::Hidden, cl::ZeroOrMore, cl::init(4),
  89   cl::desc("Max #stores to inline memcpy"));
  90
  91 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
  92   cl::Hidden, cl::ZeroOrMore, cl::init(6),
  93   cl::desc("Max #stores to inline memmove"));
  94
  95 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
  96   cl::Hidden, cl::ZeroOrMore, cl::init(4),
  97   cl::desc("Max #stores to inline memmove"));
  98
  99 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
 100   cl::Hidden, cl::ZeroOrMore, cl::init(8),
 101   cl::desc("Max #stores to inline memset"));
 102
 103 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
 104   cl::Hidden, cl::ZeroOrMore, cl::init(4),
 105   cl::desc("Max #stores to inline memset"));
 106
 107 static cl::opt<bool> AlignLoads("hexagon-align-loads",
 108   cl::Hidden, cl::init(false),
 109   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
 110
 111
 112 namespace {
 113
 114   class HexagonCCState : public CCState {
 115     unsigned NumNamedVarArgParams = 0;
 116
 117   public:
 118     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
 119                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
 120                    unsigned NumNamedArgs)
 121         : CCState(CC, IsVarArg, MF, locs, C),
 122           NumNamedVarArgParams(NumNamedArgs) {}
 123     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
 124   };
 125
 126 } // end anonymous namespace
 127
 128
 129 // Implement calling convention for Hexagon.
 130
 131 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 132                        CCValAssign::LocInfo &LocInfo,
 133                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
 134   static const MCPhysReg ArgRegs[] = {
 135     Hexagon::R0, Hexagon::R1, Hexagon::R2,
 136     Hexagon::R3, Hexagon::R4, Hexagon::R5
 137   };
 138   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 139   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 140
 141   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
 142   if (RegNum != NumArgRegs && RegNum % 2 == 1)
 143     State.AllocateReg(ArgRegs[RegNum]);
 144
 145   // Always return false here, as this function only makes sure that the first
 146   // unallocated register has an even register number and does not actually
 147   // allocate a register for the current argument.
 148   return false;
 149 }
 150
 151 #include "HexagonGenCallingConv.inc"
 152
 153
 154 void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
 155   if (VT != PromotedLdStVT) {
 156     setOperationAction(ISD::LOAD, VT, Promote);
 157     AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
 158
 159     setOperationAction(ISD::STORE, VT, Promote);
 160     AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
 161   }
 162 }
 163
 164 SDValue
 165 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
 166       const {
 167   return SDValue();
 168 }
 169
 170 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 171 /// by "Src" to address "Dst" of size "Size".  Alignment information is
 172 /// specified by the specific parameter attribute. The copy will be passed as
 173 /// a byval function parameter.  Sometimes what we are copying is the end of a
 174 /// larger object, the part that does not fit in registers.
 175 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
 176                                          SDValue Chain, ISD::ArgFlagsTy Flags,
 177                                          SelectionDAG &DAG, const SDLoc &dl) {
 178   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
 179   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
 180                        /*isVolatile=*/false, /*AlwaysInline=*/false,
 181                        /*isTailCall=*/false,
 182                        MachinePointerInfo(), MachinePointerInfo());
 183 }
 184
 185 bool
 186 HexagonTargetLowering::CanLowerReturn(
 187     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
 188     const SmallVectorImpl<ISD::OutputArg> &Outs,
 189     LLVMContext &Context) const {
 190   SmallVector<CCValAssign, 16> RVLocs;
 191   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
 192
 193   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
 194     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
 195   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
 196 }
 197
 198 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
 199 // passed by value, the function prototype is modified to return void and
 200 // the value is stored in memory pointed by a pointer passed by caller.
 201 SDValue
 202 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 203                                    bool IsVarArg,
 204                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
 205                                    const SmallVectorImpl<SDValue> &OutVals,
 206                                    const SDLoc &dl, SelectionDAG &DAG) const {
 207   // CCValAssign - represent the assignment of the return value to locations.
 208   SmallVector<CCValAssign, 16> RVLocs;
 209
 210   // CCState - Info about the registers and stack slot.
 211   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 212                  *DAG.getContext());
 213
 214   // Analyze return values of ISD::RET
 215   if (Subtarget.useHVXOps())
 216     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
 217   else
 218     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
 219
 220   SDValue Flag;
 221   SmallVector<SDValue, 4> RetOps(1, Chain);
 222
 223   // Copy the result values into the output registers.
 224   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 225     CCValAssign &VA = RVLocs[i];
 226
 227     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
 228
 229     // Guarantee that all emitted copies are stuck together with flags.
 230     Flag = Chain.getValue(1);
 231     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
 232   }
 233
 234   RetOps[0] = Chain;  // Update chain.
 235
 236   // Add the flag if we have it.
 237   if (Flag.getNode())
 238     RetOps.push_back(Flag);
 239
 240   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
 241 }
 242
 243 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
 244   // If either no tail call or told not to tail call at all, don't.
 245   auto Attr =
 246       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
 247   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
 248     return false;
 249
 250   return true;
 251 }
 252
 253 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 254 /// appropriate copies out of appropriate physical registers.  This assumes that
 255 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
 256 /// being lowered. Returns a SDNode with the same number of values as the
 257 /// ISD::CALL.
 258 SDValue HexagonTargetLowering::LowerCallResult(
 259     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
 260     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
 261     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
 262     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
 263   // Assign locations to each value returned by this call.
 264   SmallVector<CCValAssign, 16> RVLocs;
 265
 266   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 267                  *DAG.getContext());
 268
 269   if (Subtarget.useHVXOps())
 270     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
 271   else
 272     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
 273
 274   // Copy all of the result registers out of their specified physreg.
 275   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 276     SDValue RetVal;
 277     if (RVLocs[i].getValVT() == MVT::i1) {
 278       // Return values of type MVT::i1 require special handling. The reason
 279       // is that MVT::i1 is associated with the PredRegs register class, but
 280       // values of that type are still returned in R0. Generate an explicit
 281       // copy into a predicate register from R0, and treat the value of the
 282       // predicate register as the call result.
 283       auto &MRI = DAG.getMachineFunction().getRegInfo();
 284       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
 285                                        MVT::i32, Glue);
 286       // FR0 = (Value, Chain, Glue)
 287       unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
 288       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
 289                                      FR0.getValue(0), FR0.getValue(2));
 290       // TPR = (Chain, Glue)
 291       // Don't glue this CopyFromReg, because it copies from a virtual
 292       // register. If it is glued to the call, InstrEmitter will add it
 293       // as an implicit def to the call (EmitMachineNode).
 294       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
 295       Glue = TPR.getValue(1);
 296       Chain = TPR.getValue(0);
 297     } else {
 298       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
 299                                   RVLocs[i].getValVT(), Glue);
 300       Glue = RetVal.getValue(2);
 301       Chain = RetVal.getValue(1);
 302     }
 303     InVals.push_back(RetVal.getValue(0));
 304   }
 305
 306   return Chain;
 307 }
 308
 309 /// LowerCall - Functions arguments are copied from virtual regs to
 310 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 311 SDValue
 312 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 313                                  SmallVectorImpl<SDValue> &InVals) const {
 314   SelectionDAG &DAG                     = CLI.DAG;
 315   SDLoc &dl                             = CLI.DL;
 316   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
 317   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
 318   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
 319   SDValue Chain                         = CLI.Chain;
 320   SDValue Callee                        = CLI.Callee;
 321   CallingConv::ID CallConv              = CLI.CallConv;
 322   bool IsVarArg                         = CLI.IsVarArg;
 323   bool DoesNotReturn                    = CLI.DoesNotReturn;
 324
 325   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
 326   MachineFunction &MF = DAG.getMachineFunction();
 327   MachineFrameInfo &MFI = MF.getFrameInfo();
 328   auto PtrVT = getPointerTy(MF.getDataLayout());
 329
 330   unsigned NumParams = CLI.CS.getInstruction()
 331                         ? CLI.CS.getFunctionType()->getNumParams()
 332                         : 0;
 333   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
 334     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
 335
 336   // Analyze operands of the call, assigning locations to each operand.
 337   SmallVector<CCValAssign, 16> ArgLocs;
 338   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
 339                         NumParams);
 340
 341   if (Subtarget.useHVXOps())
 342     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
 343   else
 344     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
 345
 346   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
 347   if (Attr.getValueAsString() == "true")
 348     CLI.IsTailCall = false;
 349
 350   if (CLI.IsTailCall) {
 351     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
 352     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
 353                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
 354                         OutVals, Ins, DAG);
 355     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 356       CCValAssign &VA = ArgLocs[i];
 357       if (VA.isMemLoc()) {
 358         CLI.IsTailCall = false;
 359         break;
 360       }
 361     }
 362     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
 363                                          : "Argument must be passed on stack. "
 364                                            "Not eligible for Tail Call\n"));
 365   }
 366   // Get a count of how many bytes are to be pushed on the stack.
 367   unsigned NumBytes = CCInfo.getNextStackOffset();
 368   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
 369   SmallVector<SDValue, 8> MemOpChains;
 370
 371   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 372   SDValue StackPtr =
 373       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
 374
 375   bool NeedsArgAlign = false;
 376   unsigned LargestAlignSeen = 0;
 377   // Walk the register/memloc assignments, inserting copies/loads.
 378   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 379     CCValAssign &VA = ArgLocs[i];
 380     SDValue Arg = OutVals[i];
 381     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 382     // Record if we need > 8 byte alignment on an argument.
 383     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
 384     NeedsArgAlign |= ArgAlign;
 385
 386     // Promote the value if needed.
 387     switch (VA.getLocInfo()) {
 388       default:
 389         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
 390         llvm_unreachable("Unknown loc info!");
 391       case CCValAssign::Full:
 392         break;
 393       case CCValAssign::BCvt:
 394         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
 395         break;
 396       case CCValAssign::SExt:
 397         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
 398         break;
 399       case CCValAssign::ZExt:
 400         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
 401         break;
 402       case CCValAssign::AExt:
 403         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
 404         break;
 405     }
 406
 407     if (VA.isMemLoc()) {
 408       unsigned LocMemOffset = VA.getLocMemOffset();
 409       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
 410                                         StackPtr.getValueType());
 411       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
 412       if (ArgAlign)
 413         LargestAlignSeen = std::max(LargestAlignSeen,
 414                                     VA.getLocVT().getStoreSizeInBits() >> 3);
 415       if (Flags.isByVal()) {
 416         // The argument is a struct passed by value. According to LLVM, "Arg"
 417         // is a pointer.
 418         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
 419                                                         Flags, DAG, dl));
 420       } else {
 421         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
 422             DAG.getMachineFunction(), LocMemOffset);
 423         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
 424         MemOpChains.push_back(S);
 425       }
 426       continue;
 427     }
 428
 429     // Arguments that can be passed on register must be kept at RegsToPass
 430     // vector.
 431     if (VA.isRegLoc())
 432       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 433   }
 434
 435   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
 436     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
 437     unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
 438     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
 439     MFI.ensureMaxAlignment(LargestAlignSeen);
 440   }
 441   // Transform all store nodes into one single node because all store
 442   // nodes are independent of each other.
 443   if (!MemOpChains.empty())
 444     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 445
 446   SDValue Glue;
 447   if (!CLI.IsTailCall) {
 448     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
 449     Glue = Chain.getValue(1);
 450   }
 451
 452   // Build a sequence of copy-to-reg nodes chained together with token
 453   // chain and flag operands which copy the outgoing args into registers.
 454   // The Glue is necessary since all emitted instructions must be
 455   // stuck together.
 456   if (!CLI.IsTailCall) {
 457     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 458       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
 459                                RegsToPass[i].second, Glue);
 460       Glue = Chain.getValue(1);
 461     }
 462   } else {
 463     // For tail calls lower the arguments to the 'real' stack slot.
 464     //
 465     // Force all the incoming stack arguments to be loaded from the stack
 466     // before any new outgoing arguments are stored to the stack, because the
 467     // outgoing stack slots may alias the incoming argument stack slots, and
 468     // the alias isn't otherwise explicit. This is slightly more conservative
 469     // than necessary, because it means that each store effectively depends
 470     // on every argument instead of just those arguments it would clobber.
 471     //
 472     // Do not flag preceding copytoreg stuff together with the following stuff.
 473     Glue = SDValue();
 474     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 475       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
 476                                RegsToPass[i].second, Glue);
 477       Glue = Chain.getValue(1);
 478     }
 479     Glue = SDValue();
 480   }
 481
 482   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
 483   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
 484
 485   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
 486   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
 487   // node so that legalize doesn't hack it.
 488   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
 489     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
 490   } else if (ExternalSymbolSDNode *S =
 491              dyn_cast<ExternalSymbolSDNode>(Callee)) {
 492     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
 493   }
 494
 495   // Returns a chain & a flag for retval copy to use.
 496   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 497   SmallVector<SDValue, 8> Ops;
 498   Ops.push_back(Chain);
 499   Ops.push_back(Callee);
 500
 501   // Add argument registers to the end of the list so that they are
 502   // known live into the call.
 503   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 504     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
 505                                   RegsToPass[i].second.getValueType()));
 506   }
 507
 508   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
 509   assert(Mask && "Missing call preserved mask for calling convention");
 510   Ops.push_back(DAG.getRegisterMask(Mask));
 511
 512   if (Glue.getNode())
 513     Ops.push_back(Glue);
 514
 515   if (CLI.IsTailCall) {
 516     MFI.setHasTailCall();
 517     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
 518   }
 519
 520   // Set this here because we need to know this for "hasFP" in frame lowering.
 521   // The target-independent code calls getFrameRegister before setting it, and
 522   // getFrameRegister uses hasFP to determine whether the function has FP.
 523   MFI.setHasCalls(true);
 524
 525   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
 526   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
 527   Glue = Chain.getValue(1);
 528
 529   // Create the CALLSEQ_END node.
 530   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
 531                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
 532   Glue = Chain.getValue(1);
 533
 534   // Handle result values, copying them out of physregs into vregs that we
 535   // return.
 536   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
 537                          InVals, OutVals, Callee);
 538 }
 539
 540 /// Returns true by value, base pointer and offset pointer and addressing
 541 /// mode by reference if this node can be combined with a load / store to
 542 /// form a post-indexed load / store.
 543 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 544       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
 545       SelectionDAG &DAG) const {
 546   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
 547   if (!LSN)
 548     return false;
 549   EVT VT = LSN->getMemoryVT();
 550   if (!VT.isSimple())
 551     return false;
 552   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 553                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
 554                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
 555                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
 556                      Subtarget.isHVXVectorType(VT.getSimpleVT());
 557   if (!IsLegalType)
 558     return false;
 559
 560   if (Op->getOpcode() != ISD::ADD)
 561     return false;
 562   Base = Op->getOperand(0);
 563   Offset = Op->getOperand(1);
 564   if (!isa<ConstantSDNode>(Offset.getNode()))
 565     return false;
 566   AM = ISD::POST_INC;
 567
 568   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
 569   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
 570 }
 571
 572 SDValue
 573 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
 574   MachineFunction &MF = DAG.getMachineFunction();
 575   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
 576   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 577   unsigned LR = HRI.getRARegister();
 578
 579   if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
 580     return Op;
 581
 582   unsigned NumOps = Op.getNumOperands();
 583   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
 584     --NumOps;  // Ignore the flag operand.
 585
 586   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
 587     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
 588     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 589     ++i;  // Skip the ID value.
 590
 591     switch (InlineAsm::getKind(Flags)) {
 592       default:
 593         llvm_unreachable("Bad flags!");
 594       case InlineAsm::Kind_RegUse:
 595       case InlineAsm::Kind_Imm:
 596       case InlineAsm::Kind_Mem:
 597         i += NumVals;
 598         break;
 599       case InlineAsm::Kind_Clobber:
 600       case InlineAsm::Kind_RegDef:
 601       case InlineAsm::Kind_RegDefEarlyClobber: {
 602         for (; NumVals; --NumVals, ++i) {
 603           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
 604           if (Reg != LR)
 605             continue;
 606           HMFI.setHasClobberLR(true);
 607           return Op;
 608         }
 609         break;
 610       }
 611     }
 612   }
 613
 614   return Op;
 615 }
 616
 617 // Need to transform ISD::PREFETCH into something that doesn't inherit
 618 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
 619 // SDNPMayStore.
 620 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
 621                                              SelectionDAG &DAG) const {
 622   SDValue Chain = Op.getOperand(0);
 623   SDValue Addr = Op.getOperand(1);
 624   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
 625   // if the "reg" is fed by an "add".
 626   SDLoc DL(Op);
 627   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
 628   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
 629 }
 630
 631 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
 632 // is marked as having side-effects, while the register read on Hexagon does
 633 // not have any. TableGen refuses to accept the direct pattern from that node
 634 // to the A4_tfrcpp.
 635 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
 636                                                      SelectionDAG &DAG) const {
 637   SDValue Chain = Op.getOperand(0);
 638   SDLoc dl(Op);
 639   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
 640   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
 641 }
 642
 643 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
 644       SelectionDAG &DAG) const {
 645   SDValue Chain = Op.getOperand(0);
 646   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 647   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
 648   if (IntNo == Intrinsic::hexagon_prefetch) {
 649     SDValue Addr = Op.getOperand(2);
 650     SDLoc DL(Op);
 651     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
 652     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
 653   }
 654   return SDValue();
 655 }
 656
 657 SDValue
 658 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 659                                                SelectionDAG &DAG) const {
 660   SDValue Chain = Op.getOperand(0);
 661   SDValue Size = Op.getOperand(1);
 662   SDValue Align = Op.getOperand(2);
 663   SDLoc dl(Op);
 664
 665   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
 666   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
 667
 668   unsigned A = AlignConst->getSExtValue();
 669   auto &HFI = *Subtarget.getFrameLowering();
 670   // "Zero" means natural stack alignment.
 671   if (A == 0)
 672     A = HFI.getStackAlignment();
 673
 674   LLVM_DEBUG({
 675     dbgs () << __func__ << " Align: " << A << " Size: ";
 676     Size.getNode()->dump(&DAG);
 677     dbgs() << "\n";
 678   });
 679
 680   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
 681   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
 682   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
 683
 684   DAG.ReplaceAllUsesOfValueWith(Op, AA);
 685   return AA;
 686 }
 687
 688 SDValue HexagonTargetLowering::LowerFormalArguments(
 689     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 690     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
 691     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 692   MachineFunction &MF = DAG.getMachineFunction();
 693   MachineFrameInfo &MFI = MF.getFrameInfo();
 694   MachineRegisterInfo &MRI = MF.getRegInfo();
 695
 696   // Assign locations to all of the incoming arguments.
 697   SmallVector<CCValAssign, 16> ArgLocs;
 698   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
 699                         MF.getFunction().getFunctionType()->getNumParams());
 700
 701   if (Subtarget.useHVXOps())
 702     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
 703   else
 704     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
 705
 706   // For LLVM, in the case when returning a struct by value (>8byte),
 707   // the first argument is a pointer that points to the location on caller's
 708   // stack where the return value will be stored. For Hexagon, the location on
 709   // caller's stack is passed only when the struct size is smaller than (and
 710   // equal to) 8 bytes. If not, no address will be passed into callee and
 711   // callee return the result direclty through R0/R1.
 712
 713   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
 714
 715   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 716     CCValAssign &VA = ArgLocs[i];
 717     ISD::ArgFlagsTy Flags = Ins[i].Flags;
 718     bool ByVal = Flags.isByVal();
 719
 720     // Arguments passed in registers:
 721     // 1. 32- and 64-bit values and HVX vectors are passed directly,
 722     // 2. Large structs are passed via an address, and the address is
 723     //    passed in a register.
 724     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
 725       llvm_unreachable("ByValSize must be bigger than 8 bytes");
 726
 727     bool InReg = VA.isRegLoc() &&
 728                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
 729
 730     if (InReg) {
 731       MVT RegVT = VA.getLocVT();
 732       if (VA.getLocInfo() == CCValAssign::BCvt)
 733         RegVT = VA.getValVT();
 734
 735       const TargetRegisterClass *RC = getRegClassFor(RegVT);
 736       unsigned VReg = MRI.createVirtualRegister(RC);
 737       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
 738
 739       // Treat values of type MVT::i1 specially: they are passed in
 740       // registers of type i32, but they need to remain as values of
 741       // type i1 for consistency of the argument lowering.
 742       if (VA.getValVT() == MVT::i1) {
 743         assert(RegVT.getSizeInBits() <= 32);
 744         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
 745                                 Copy, DAG.getConstant(1, dl, RegVT));
 746         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
 747                             ISD::SETNE);
 748       } else {
 749 #ifndef NDEBUG
 750         unsigned RegSize = RegVT.getSizeInBits();
 751         assert(RegSize == 32 || RegSize == 64 ||
 752                Subtarget.isHVXVectorType(RegVT));
 753 #endif
 754       }
 755       InVals.push_back(Copy);
 756       MRI.addLiveIn(VA.getLocReg(), VReg);
 757     } else {
 758       assert(VA.isMemLoc() && "Argument should be passed in memory");
 759
 760       // If it's a byval parameter, then we need to compute the
 761       // "real" size, not the size of the pointer.
 762       unsigned ObjSize = Flags.isByVal()
 763                             ? Flags.getByValSize()
 764                             : VA.getLocVT().getStoreSizeInBits() / 8;
 765
 766       // Create the frame index object for this incoming parameter.
 767       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
 768       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
 769       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
 770
 771       if (Flags.isByVal()) {
 772         // If it's a pass-by-value aggregate, then do not dereference the stack
 773         // location. Instead, we should generate a reference to the stack
 774         // location.
 775         InVals.push_back(FIN);
 776       } else {
 777         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 778                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
 779         InVals.push_back(L);
 780       }
 781     }
 782   }
 783
 784
 785   if (IsVarArg) {
 786     // This will point to the next argument passed via stack.
 787     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
 788     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
 789     HMFI.setVarArgsFrameIndex(FI);
 790   }
 791
 792   return Chain;
 793 }
 794
 795 SDValue
 796 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 797   // VASTART stores the address of the VarArgsFrameIndex slot into the
 798   // memory location argument.
 799   MachineFunction &MF = DAG.getMachineFunction();
 800   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
 801   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
 802   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 803   return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
 804                       MachinePointerInfo(SV));
 805 }
 806
 807 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 808   const SDLoc &dl(Op);
 809   SDValue LHS = Op.getOperand(0);
 810   SDValue RHS = Op.getOperand(1);
 811   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 812   MVT ResTy = ty(Op);
 813   MVT OpTy = ty(LHS);
 814
 815   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
 816     MVT ElemTy = OpTy.getVectorElementType();
 817     assert(ElemTy.isScalarInteger());
 818     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
 819                                   OpTy.getVectorNumElements());
 820     return DAG.getSetCC(dl, ResTy,
 821                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
 822                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
 823   }
 824
 825   // Treat all other vector types as legal.
 826   if (ResTy.isVector())
 827     return Op;
 828
 829   // Comparisons of short integers should use sign-extend, not zero-extend,
 830   // since we can represent small negative values in the compare instructions.
 831   // The LLVM default is to use zero-extend arbitrarily in these cases.
 832   auto isSExtFree = [this](SDValue N) {
 833     switch (N.getOpcode()) {
 834       case ISD::TRUNCATE: {
 835         // A sign-extend of a truncate of a sign-extend is free.
 836         SDValue Op = N.getOperand(0);
 837         if (Op.getOpcode() != ISD::AssertSext)
 838           return false;
 839         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
 840         unsigned ThisBW = ty(N).getSizeInBits();
 841         unsigned OrigBW = OrigTy.getSizeInBits();
 842         // The type that was sign-extended to get the AssertSext must be
 843         // narrower than the type of N (so that N has still the same value
 844         // as the original).
 845         return ThisBW >= OrigBW;
 846       }
 847       case ISD::LOAD:
 848         // We have sign-extended loads.
 849         return true;
 850     }
 851     return false;
 852   };
 853
 854   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
 855     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
 856     bool IsNegative = C && C->getAPIntValue().isNegative();
 857     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
 858       return DAG.getSetCC(dl, ResTy,
 859                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
 860                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
 861   }
 862
 863   return SDValue();
 864 }
 865
 866 SDValue
 867 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
 868   SDValue PredOp = Op.getOperand(0);
 869   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
 870   EVT OpVT = Op1.getValueType();
 871   SDLoc DL(Op);
 872
 873   if (OpVT == MVT::v2i16) {
 874     SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
 875     SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
 876     SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
 877     SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
 878     return TR;
 879   }
 880
 881   return SDValue();
 882 }
 883
 884 static Constant *convert_i1_to_i8(const Constant *ConstVal) {
 885   SmallVector<Constant *, 128> NewConst;
 886   const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
 887   if (!CV)
 888     return nullptr;
 889
 890   LLVMContext &Ctx = ConstVal->getContext();
 891   IRBuilder<> IRB(Ctx);
 892   unsigned NumVectorElements = CV->getNumOperands();
 893   assert(isPowerOf2_32(NumVectorElements) &&
 894          "conversion only supported for pow2 VectorSize!");
 895
 896   for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
 897     uint8_t x = 0;
 898     for (unsigned j = 0; j < 8; ++j) {
 899       uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
 900       x |= y << (7 - j);
 901     }
 902     assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
 903     NewConst.push_back(IRB.getInt8(x));
 904   }
 905   return ConstantVector::get(NewConst);
 906 }
 907
 908 SDValue
 909 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
 910   EVT ValTy = Op.getValueType();
 911   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
 912   Constant *CVal = nullptr;
 913   bool isVTi1Type = false;
 914   if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
 915     Type *CValTy = ConstVal->getType();
 916     if (CValTy->isVectorTy() &&
 917         CValTy->getVectorElementType()->isIntegerTy(1)) {
 918       CVal = convert_i1_to_i8(ConstVal);
 919       isVTi1Type = (CVal != nullptr);
 920     }
 921   }
 922   unsigned Align = CPN->getAlignment();
 923   bool IsPositionIndependent = isPositionIndependent();
 924   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
 925
 926   unsigned Offset = 0;
 927   SDValue T;
 928   if (CPN->isMachineConstantPoolEntry())
 929     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
 930                                   TF);
 931   else if (isVTi1Type)
 932     T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
 933   else
 934     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
 935
 936   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
 937          "Inconsistent target flag encountered");
 938
 939   if (IsPositionIndependent)
 940     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
 941   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
 942 }
 943
 944 SDValue
 945 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
 946   EVT VT = Op.getValueType();
 947   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
 948   if (isPositionIndependent()) {
 949     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
 950     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
 951   }
 952
 953   SDValue T = DAG.getTargetJumpTable(Idx, VT);
 954   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
 955 }
 956
 957 SDValue
 958 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
 959   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 960   MachineFunction &MF = DAG.getMachineFunction();
 961   MachineFrameInfo &MFI = MF.getFrameInfo();
 962   MFI.setReturnAddressIsTaken(true);
 963
 964   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
 965     return SDValue();
 966
 967   EVT VT = Op.getValueType();
 968   SDLoc dl(Op);
 969   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 970   if (Depth) {
 971     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
 972     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
 973     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
 974                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
 975                        MachinePointerInfo());
 976   }
 977
 978   // Return LR, which contains the return address. Mark it an implicit live-in.
 979   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
 980   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
 981 }
 982
 983 SDValue
 984 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 985   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 986   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 987   MFI.setFrameAddressIsTaken(true);
 988
 989   EVT VT = Op.getValueType();
 990   SDLoc dl(Op);
 991   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 992   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
 993                                          HRI.getFrameRegister(), VT);
 994   while (Depth--)
 995     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
 996                             MachinePointerInfo());
 997   return FrameAddr;
 998 }
 999
1000 SDValue
1001 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1002   SDLoc dl(Op);
1003   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1004 }
1005
1006 SDValue
1007 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1008   SDLoc dl(Op);
1009   auto *GAN = cast<GlobalAddressSDNode>(Op);
1010   auto PtrVT = getPointerTy(DAG.getDataLayout());
1011   auto *GV = GAN->getGlobal();
1012   int64_t Offset = GAN->getOffset();
1013
1014   auto &HLOF = *HTM.getObjFileLowering();
1015   Reloc::Model RM = HTM.getRelocationModel();
1016
1017   if (RM == Reloc::Static) {
1018     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1019     const GlobalObject *GO = GV->getBaseObject();
1020     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1021       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1022     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1023   }
1024
1025   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1026   if (UsePCRel) {
1027     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1028                                             HexagonII::MO_PCREL);
1029     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1030   }
1031
1032   // Use GOT index.
1033   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1034   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1035   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1036   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1037 }
1038
1039 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1040 SDValue
1041 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1042   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1043   SDLoc dl(Op);
1044   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1045
1046   Reloc::Model RM = HTM.getRelocationModel();
1047   if (RM == Reloc::Static) {
1048     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1049     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1050   }
1051
1052   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1053   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1054 }
1055
1056 SDValue
1057 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1058       const {
1059   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1060   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1061                                                HexagonII::MO_PCREL);
1062   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1063 }
1064
1065 SDValue
1066 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1067       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1068       unsigned char OperandFlags) const {
1069   MachineFunction &MF = DAG.getMachineFunction();
1070   MachineFrameInfo &MFI = MF.getFrameInfo();
1071   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1072   SDLoc dl(GA);
1073   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1074                                            GA->getValueType(0),
1075                                            GA->getOffset(),
1076                                            OperandFlags);
1077   // Create Operands for the call.The Operands should have the following:
1078   // 1. Chain SDValue
1079   // 2. Callee which in this case is the Global address value.
1080   // 3. Registers live into the call.In this case its R0, as we
1081   //    have just one argument to be passed.
1082   // 4. Glue.
1083   // Note: The order is important.
1084
1085   const auto &HRI = *Subtarget.getRegisterInfo();
1086   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1087   assert(Mask && "Missing call preserved mask for calling convention");
1088   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1089                     DAG.getRegisterMask(Mask), Glue };
1090   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1091
1092   // Inform MFI that function has calls.
1093   MFI.setAdjustsStack(true);
1094
1095   Glue = Chain.getValue(1);
1096   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1097 }
1098
1099 //
1100 // Lower using the intial executable model for TLS addresses
1101 //
1102 SDValue
1103 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1104       SelectionDAG &DAG) const {
1105   SDLoc dl(GA);
1106   int64_t Offset = GA->getOffset();
1107   auto PtrVT = getPointerTy(DAG.getDataLayout());
1108
1109   // Get the thread pointer.
1110   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1111
1112   bool IsPositionIndependent = isPositionIndependent();
1113   unsigned char TF =
1114       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1115
1116   // First generate the TLS symbol address
1117   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1118                                            Offset, TF);
1119
1120   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1121
1122   if (IsPositionIndependent) {
1123     // Generate the GOT pointer in case of position independent code
1124     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1125
1126     // Add the TLS Symbol address to GOT pointer.This gives
1127     // GOT relative relocation for the symbol.
1128     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1129   }
1130
1131   // Load the offset value for TLS symbol.This offset is relative to
1132   // thread pointer.
1133   SDValue LoadOffset =
1134       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1135
1136   // Address of the thread local variable is the add of thread
1137   // pointer and the offset of the variable.
1138   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1139 }
1140
1141 //
1142 // Lower using the local executable model for TLS addresses
1143 //
1144 SDValue
1145 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1146       SelectionDAG &DAG) const {
1147   SDLoc dl(GA);
1148   int64_t Offset = GA->getOffset();
1149   auto PtrVT = getPointerTy(DAG.getDataLayout());
1150
1151   // Get the thread pointer.
1152   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1153   // Generate the TLS symbol address
1154   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1155                                            HexagonII::MO_TPREL);
1156   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1157
1158   // Address of the thread local variable is the add of thread
1159   // pointer and the offset of the variable.
1160   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1161 }
1162
1163 //
1164 // Lower using the general dynamic model for TLS addresses
1165 //
1166 SDValue
1167 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1168       SelectionDAG &DAG) const {
1169   SDLoc dl(GA);
1170   int64_t Offset = GA->getOffset();
1171   auto PtrVT = getPointerTy(DAG.getDataLayout());
1172
1173   // First generate the TLS symbol address
1174   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1175                                            HexagonII::MO_GDGOT);
1176
1177   // Then, generate the GOT pointer
1178   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1179
1180   // Add the TLS symbol and the GOT pointer
1181   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1182   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1183
1184   // Copy over the argument to R0
1185   SDValue InFlag;
1186   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1187   InFlag = Chain.getValue(1);
1188
1189   unsigned Flags =
1190       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
1191           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1192           : HexagonII::MO_GDPLT;
1193
1194   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
1195                            Hexagon::R0, Flags);
1196 }
1197
1198 //
1199 // Lower TLS addresses.
1200 //
1201 // For now for dynamic models, we only support the general dynamic model.
1202 //
1203 SDValue
1204 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1205       SelectionDAG &DAG) const {
1206   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1207
1208   switch (HTM.getTLSModel(GA->getGlobal())) {
1209     case TLSModel::GeneralDynamic:
1210     case TLSModel::LocalDynamic:
1211       return LowerToTLSGeneralDynamicModel(GA, DAG);
1212     case TLSModel::InitialExec:
1213       return LowerToTLSInitialExecModel(GA, DAG);
1214     case TLSModel::LocalExec:
1215       return LowerToTLSLocalExecModel(GA, DAG);
1216   }
1217   llvm_unreachable("Bogus TLS model");
1218 }
1219
1220 //===----------------------------------------------------------------------===//
1221 // TargetLowering Implementation
1222 //===----------------------------------------------------------------------===//
1223
1224 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1225                                              const HexagonSubtarget &ST)
1226     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1227       Subtarget(ST) {
1228   bool IsV4 = !Subtarget.hasV5Ops();
1229   auto &HRI = *Subtarget.getRegisterInfo();
1230
1231   setPrefLoopAlignment(4);
1232   setPrefFunctionAlignment(4);
1233   setMinFunctionAlignment(2);
1234   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1235   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1236   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1237
1238   setMaxAtomicSizeInBitsSupported(64);
1239   setMinCmpXchgSizeInBits(32);
1240
1241   if (EnableHexSDNodeSched)
1242     setSchedulingPreference(Sched::VLIW);
1243   else
1244     setSchedulingPreference(Sched::Source);
1245
1246   // Limits for inline expansion of memcpy/memmove
1247   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1248   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1249   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1250   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1251   MaxStoresPerMemset = MaxStoresPerMemsetCL;
1252   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1253
1254   //
1255   // Set up register classes.
1256   //
1257
1258   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1259   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1260   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1261   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1262   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1263   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1264   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1265   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1266   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1267   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1268   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1269
1270   if (Subtarget.hasV5Ops()) {
1271     addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1272     addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1273   }
1274
1275   //
1276   // Handling of scalar operations.
1277   //
1278   // All operations default to "legal", except:
1279   // - indexed loads and stores (pre-/post-incremented),
1280   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1281   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1282   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1283   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1284   // which default to "expand" for at least one type.
1285
1286   // Misc operations.
1287   setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
1288   setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
1289
1290   setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1291   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
1292   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
1293   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1294   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
1295   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1296   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1297   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1298   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
1299   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
1300   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1301   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1302
1303   // Custom legalize GlobalAddress nodes into CONST32.
1304   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1305   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1306   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1307
1308   // Hexagon needs to optimize cases with negative constants.
1309   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
1310   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
1311   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
1312   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1313
1314   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1315   setOperationAction(ISD::VASTART, MVT::Other, Custom);
1316   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1317   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1318   setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
1319
1320   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1321   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1322   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1323
1324   if (EmitJumpTables)
1325     setMinimumJumpTableEntries(MinimumJumpTables);
1326   else
1327     setMinimumJumpTableEntries(std::numeric_limits<int>::max());
1328   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1329
1330   setOperationAction(ISD::ABS, MVT::i32, Legal);
1331   setOperationAction(ISD::ABS, MVT::i64, Legal);
1332
1333   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1334   // but they only operate on i64.
1335   for (MVT VT : MVT::integer_valuetypes()) {
1336     setOperationAction(ISD::UADDO,    VT, Expand);
1337     setOperationAction(ISD::USUBO,    VT, Expand);
1338     setOperationAction(ISD::SADDO,    VT, Expand);
1339     setOperationAction(ISD::SSUBO,    VT, Expand);
1340     setOperationAction(ISD::ADDCARRY, VT, Expand);
1341     setOperationAction(ISD::SUBCARRY, VT, Expand);
1342   }
1343   setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
1344   setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
1345
1346   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1347   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1348   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1349   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1350
1351   // In V5, popcount can count # of 1s in i64 but returns i32.
1352   // On V4 it will be expanded (set later).
1353   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1354   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1355   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1356   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1357
1358   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1359   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1360   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1361   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1362
1363   for (unsigned IntExpOp :
1364        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1365         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1366         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1367         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1368     for (MVT VT : MVT::integer_valuetypes())
1369       setOperationAction(IntExpOp, VT, Expand);
1370   }
1371
1372   for (unsigned FPExpOp :
1373        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1374         ISD::FPOW, ISD::FCOPYSIGN}) {
1375     for (MVT VT : MVT::fp_valuetypes())
1376       setOperationAction(FPExpOp, VT, Expand);
1377   }
1378
1379   // No extending loads from i32.
1380   for (MVT VT : MVT::integer_valuetypes()) {
1381     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1382     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1383     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1384   }
1385   // Turn FP truncstore into trunc + store.
1386   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1387   // Turn FP extload into load/fpextend.
1388   for (MVT VT : MVT::fp_valuetypes())
1389     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1390
1391   // Expand BR_CC and SELECT_CC for all integer and fp types.
1392   for (MVT VT : MVT::integer_valuetypes()) {
1393     setOperationAction(ISD::BR_CC,     VT, Expand);
1394     setOperationAction(ISD::SELECT_CC, VT, Expand);
1395   }
1396   for (MVT VT : MVT::fp_valuetypes()) {
1397     setOperationAction(ISD::BR_CC,     VT, Expand);
1398     setOperationAction(ISD::SELECT_CC, VT, Expand);
1399   }
1400   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1401
1402   //
1403   // Handling of vector operations.
1404   //
1405
1406   promoteLdStType(MVT::v4i8,  MVT::i32);
1407   promoteLdStType(MVT::v2i16, MVT::i32);
1408   promoteLdStType(MVT::v8i8,  MVT::i64);
1409   promoteLdStType(MVT::v4i16, MVT::i64);
1410   promoteLdStType(MVT::v2i32, MVT::i64);
1411
1412   // Set the action for vector operations to "expand", then override it with
1413   // either "custom" or "legal" for specific cases.
1414   static const unsigned VectExpOps[] = {
1415     // Integer arithmetic:
1416     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
1417     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
1418     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1419     // Logical/bit:
1420     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1421     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
1422     // Floating point arithmetic/math functions:
1423     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1424     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1425     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1426     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1427     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1428     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
1429     // Misc:
1430     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
1431     // Vector:
1432     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1433     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1434     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1435     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
1436   };
1437
1438   for (MVT VT : MVT::vector_valuetypes()) {
1439     for (unsigned VectExpOp : VectExpOps)
1440       setOperationAction(VectExpOp, VT, Expand);
1441
1442     // Expand all extending loads and truncating stores:
1443     for (MVT TargetVT : MVT::vector_valuetypes()) {
1444       if (TargetVT == VT)
1445         continue;
1446       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1447       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1448       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1449       setTruncStoreAction(VT, TargetVT, Expand);
1450     }
1451
1452     // Normalize all inputs to SELECT to be vectors of i32.
1453     if (VT.getVectorElementType() != MVT::i32) {
1454       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1455       setOperationAction(ISD::SELECT, VT, Promote);
1456       AddPromotedToType(ISD::SELECT, VT, VT32);
1457     }
1458     setOperationAction(ISD::SRA, VT, Custom);
1459     setOperationAction(ISD::SHL, VT, Custom);
1460     setOperationAction(ISD::SRL, VT, Custom);
1461   }
1462
1463   // Extending loads from (native) vectors of i8 into (native) vectors of i16
1464   // are legal.
1465   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
1466   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1467   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1468   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
1469   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1470   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1471
1472   // Types natively supported:
1473   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1474                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1475     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1476     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1477     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1478     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1479     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1480     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1481
1482     setOperationAction(ISD::ADD, NativeVT, Legal);
1483     setOperationAction(ISD::SUB, NativeVT, Legal);
1484     setOperationAction(ISD::MUL, NativeVT, Legal);
1485     setOperationAction(ISD::AND, NativeVT, Legal);
1486     setOperationAction(ISD::OR,  NativeVT, Legal);
1487     setOperationAction(ISD::XOR, NativeVT, Legal);
1488   }
1489
1490   // Custom lower unaligned loads.
1491   for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1492                     MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1493     setOperationAction(ISD::LOAD, VecVT, Custom);
1494   }
1495
1496   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
1497     setCondCodeAction(ISD::SETLT,  VT, Expand);
1498     setCondCodeAction(ISD::SETLE,  VT, Expand);
1499     setCondCodeAction(ISD::SETULT, VT, Expand);
1500     setCondCodeAction(ISD::SETULE, VT, Expand);
1501   }
1502
1503   // Custom-lower bitcasts from i8 to v8i1.
1504   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
1505   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1506   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1507   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
1508   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1509   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1510
1511   // Subtarget-specific operation actions.
1512   //
1513   if (Subtarget.hasV60Ops()) {
1514     setOperationAction(ISD::ROTL, MVT::i32, Custom);
1515     setOperationAction(ISD::ROTL, MVT::i64, Custom);
1516   }
1517   if (Subtarget.hasV5Ops()) {
1518     setOperationAction(ISD::FMA,  MVT::f64, Expand);
1519     setOperationAction(ISD::FADD, MVT::f64, Expand);
1520     setOperationAction(ISD::FSUB, MVT::f64, Expand);
1521     setOperationAction(ISD::FMUL, MVT::f64, Expand);
1522
1523     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1524     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1525
1526     setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
1527     setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
1528     setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1529     setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
1530     setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
1531     setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1532     setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
1533     setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
1534     setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1535     setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
1536     setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
1537     setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1538   } else { // V4
1539     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
1540     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
1541     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
1542     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
1543     setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
1544     setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
1545     setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
1546     setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
1547     setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
1548
1549     setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
1550     setOperationAction(ISD::CTPOP, MVT::i16, Expand);
1551     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1552     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
1553
1554     // Expand these operations for both f32 and f64:
1555     for (unsigned FPExpOpV4 :
1556          {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
1557       setOperationAction(FPExpOpV4, MVT::f32, Expand);
1558       setOperationAction(FPExpOpV4, MVT::f64, Expand);
1559     }
1560
1561     for (ISD::CondCode FPExpCCV4 :
1562          {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
1563           ISD::SETUO,  ISD::SETO}) {
1564       setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
1565       setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
1566     }
1567   }
1568
1569   // Handling of indexed loads/stores: default is "expand".
1570   //
1571   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1572                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1573     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1574     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1575   }
1576
1577   if (Subtarget.useHVXOps())
1578     initializeHVXLowering();
1579
1580   computeRegisterProperties(&HRI);
1581
1582   //
1583   // Library calls for unsupported operations
1584   //
1585   bool FastMath  = EnableFastMath;
1586
1587   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
1588   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
1589   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
1590   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
1591   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
1592   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
1593   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
1594   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
1595
1596   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
1597   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
1598   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
1599   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
1600   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
1601   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
1602
1603   if (IsV4) {
1604     // Handle single-precision floating point operations on V4.
1605     if (FastMath) {
1606       setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
1607       setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
1608       setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
1609       setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
1610       setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
1611       // Double-precision compares.
1612       setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
1613       setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
1614     } else {
1615       setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
1616       setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
1617       setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
1618       setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
1619       setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
1620       // Double-precision compares.
1621       setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
1622       setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
1623     }
1624   }
1625
1626   // This is the only fast library function for sqrtd.
1627   if (FastMath)
1628     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
1629
1630   // Prefix is: nothing  for "slow-math",
1631   //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
1632   // (actually, keep fast-math and fast-math2 separate for now)
1633   if (FastMath) {
1634     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
1635     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
1636     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
1637     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
1638     // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
1639     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
1640   } else {
1641     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
1642     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
1643     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
1644     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
1645     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
1646   }
1647
1648   if (Subtarget.hasV5Ops()) {
1649     if (FastMath)
1650       setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
1651     else
1652       setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
1653   } else {
1654     // V4
1655     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
1656     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
1657     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
1658     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
1659     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
1660     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
1661     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
1662     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
1663     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
1664     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
1665     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
1666     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
1667     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
1668     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
1669     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
1670     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
1671     setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
1672     setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
1673     setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
1674     setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
1675     setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
1676     setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
1677     setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
1678     setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
1679     setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
1680     setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
1681     setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
1682     setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
1683     setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
1684     setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
1685   }
1686
1687   // These cause problems when the shift amount is non-constant.
1688   setLibcallName(RTLIB::SHL_I128, nullptr);
1689   setLibcallName(RTLIB::SRL_I128, nullptr);
1690   setLibcallName(RTLIB::SRA_I128, nullptr);
1691 }
1692
1693 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1694   switch ((HexagonISD::NodeType)Opcode) {
1695   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
1696   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
1697   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
1698   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
1699   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
1700   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
1701   case HexagonISD::CALL:          return "HexagonISD::CALL";
1702   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
1703   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
1704   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
1705   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
1706   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
1707   case HexagonISD::CP:            return "HexagonISD::CP";
1708   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
1709   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
1710   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
1711   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
1712   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
1713   case HexagonISD::JT:            return "HexagonISD::JT";
1714   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
1715   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
1716   case HexagonISD::VASL:          return "HexagonISD::VASL";
1717   case HexagonISD::VASR:          return "HexagonISD::VASR";
1718   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
1719   case HexagonISD::VSPLAT:        return "HexagonISD::VSPLAT";
1720   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
1721   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
1722   case HexagonISD::VROR:          return "HexagonISD::VROR";
1723   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
1724   case HexagonISD::VZERO:         return "HexagonISD::VZERO";
1725   case HexagonISD::VSPLATW:       return "HexagonISD::VSPLATW";
1726   case HexagonISD::D2P:           return "HexagonISD::D2P";
1727   case HexagonISD::P2D:           return "HexagonISD::P2D";
1728   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
1729   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
1730   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
1731   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
1732   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
1733   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
1734   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
1735   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
1736   case HexagonISD::OP_END:        break;
1737   }
1738   return nullptr;
1739 }
1740
1741 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1742 // intrinsic.
1743 static bool isBrevLdIntrinsic(const Value *Inst) {
1744   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
1745   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
1746           ID == Intrinsic::hexagon_L2_loadri_pbr ||
1747           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
1748           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
1749           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
1750           ID == Intrinsic::hexagon_L2_loadrub_pbr);
1751 }
1752
1753 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1754 // instruction. So far we only handle bitcast, extract value and bit reverse
1755 // load intrinsic instructions. Should we handle CGEP ?
1756 static Value *getBrevLdObject(Value *V) {
1757   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
1758       Operator::getOpcode(V) == Instruction::BitCast)
1759     V = cast<Operator>(V)->getOperand(0);
1760   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
1761     V = cast<Instruction>(V)->getOperand(0);
1762   return V;
1763 }
1764
1765 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1766 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1767 // edge is returned.
1768 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
1769   const BasicBlock *Parent = PN->getParent();
1770   int Idx = -1;
1771   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
1772     BasicBlock *Blk = PN->getIncomingBlock(i);
1773     // Determine if the back edge is originated from intrinsic.
1774     if (Blk == Parent) {
1775       Value *BackEdgeVal = PN->getIncomingValue(i);
1776       Value *BaseVal;
1777       // Loop over till we return the same Value or we hit the IntrBaseVal.
1778       do {
1779         BaseVal = BackEdgeVal;
1780         BackEdgeVal = getBrevLdObject(BackEdgeVal);
1781       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
1782       // If the getBrevLdObject returns IntrBaseVal, we should return the
1783       // incoming edge.
1784       if (IntrBaseVal == BackEdgeVal)
1785         continue;
1786       Idx = i;
1787       break;
1788     } else // Set the node to incoming edge.
1789       Idx = i;
1790   }
1791   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
1792   return PN->getIncomingValue(Idx);
1793 }
1794
1795 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
1796 // pointer points to, for the bit-reverse load intrinsic. Setting this to
1797 // memoperand might help alias analysis to figure out the dependencies.
1798 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
1799   Value *IntrBaseVal = V;
1800   Value *BaseVal;
1801   // Loop over till we return the same Value, implies we either figure out
1802   // the object or we hit a PHI
1803   do {
1804     BaseVal = V;
1805     V = getBrevLdObject(V);
1806   } while (BaseVal != V);
1807
1808   // Identify the object from PHINode.
1809   if (const PHINode *PN = dyn_cast<PHINode>(V))
1810     return returnEdge(PN, IntrBaseVal);
1811   // For non PHI nodes, the object is the last value returned by getBrevLdObject
1812   else
1813     return V;
1814 }
1815
1816 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1817 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1818 /// true and store the intrinsic information into the IntrinsicInfo that was
1819 /// passed to the function.
1820 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1821                                                const CallInst &I,
1822                                                MachineFunction &MF,
1823                                                unsigned Intrinsic) const {
1824   switch (Intrinsic) {
1825   case Intrinsic::hexagon_L2_loadrd_pbr:
1826   case Intrinsic::hexagon_L2_loadri_pbr:
1827   case Intrinsic::hexagon_L2_loadrh_pbr:
1828   case Intrinsic::hexagon_L2_loadruh_pbr:
1829   case Intrinsic::hexagon_L2_loadrb_pbr:
1830   case Intrinsic::hexagon_L2_loadrub_pbr: {
1831     Info.opc = ISD::INTRINSIC_W_CHAIN;
1832     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
1833     auto &Cont = I.getCalledFunction()->getParent()->getContext();
1834     // The intrinsic function call is of the form { ElTy, i8* }
1835     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
1836     // should be derived from ElTy.
1837     PointerType *PtrTy = I.getCalledFunction()
1838                              ->getReturnType()
1839                              ->getContainedType(0)
1840                              ->getPointerTo();
1841     Info.memVT = MVT::getVT(PtrTy->getElementType());
1842     llvm::Value *BasePtrVal = I.getOperand(0);
1843     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
1844     // The offset value comes through Modifier register. For now, assume the
1845     // offset is 0.
1846     Info.offset = 0;
1847     Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont));
1848     Info.flags = MachineMemOperand::MOLoad;
1849     return true;
1850   }
1851   case Intrinsic::hexagon_V6_vgathermw:
1852   case Intrinsic::hexagon_V6_vgathermw_128B:
1853   case Intrinsic::hexagon_V6_vgathermh:
1854   case Intrinsic::hexagon_V6_vgathermh_128B:
1855   case Intrinsic::hexagon_V6_vgathermhw:
1856   case Intrinsic::hexagon_V6_vgathermhw_128B:
1857   case Intrinsic::hexagon_V6_vgathermwq:
1858   case Intrinsic::hexagon_V6_vgathermwq_128B:
1859   case Intrinsic::hexagon_V6_vgathermhq:
1860   case Intrinsic::hexagon_V6_vgathermhq_128B:
1861   case Intrinsic::hexagon_V6_vgathermhwq:
1862   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
1863     const Module &M = *I.getParent()->getParent()->getParent();
1864     Info.opc = ISD::INTRINSIC_W_CHAIN;
1865     Type *VecTy = I.getArgOperand(1)->getType();
1866     Info.memVT = MVT::getVT(VecTy);
1867     Info.ptrVal = I.getArgOperand(0);
1868     Info.offset = 0;
1869     Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8;
1870     Info.flags = MachineMemOperand::MOLoad |
1871                  MachineMemOperand::MOStore |
1872                  MachineMemOperand::MOVolatile;
1873     return true;
1874   }
1875   default:
1876     break;
1877   }
1878   return false;
1879 }
1880
1881 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
1882   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
1883 }
1884
1885 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
1886   if (!VT1.isSimple() || !VT2.isSimple())
1887     return false;
1888   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
1889 }
1890
1891 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
1892   return isOperationLegalOrCustom(ISD::FMA, VT);
1893 }
1894
1895 // Should we expand the build vector with shuffles?
1896 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
1897       unsigned DefinedValues) const {
1898   return false;
1899 }
1900
1901 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
1902                                                EVT VT) const {
1903   return true;
1904 }
1905
1906 TargetLoweringBase::LegalizeTypeAction
1907 HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
1908   if (VT.getVectorNumElements() == 1)
1909     return TargetLoweringBase::TypeScalarizeVector;
1910
1911   // Always widen vectors of i1.
1912   MVT ElemTy = VT.getSimpleVT().getVectorElementType();
1913   if (ElemTy == MVT::i1)
1914     return TargetLoweringBase::TypeWidenVector;
1915
1916   if (Subtarget.useHVXOps()) {
1917     // If the size of VT is at least half of the vector length,
1918     // widen the vector. Note: the threshold was not selected in
1919     // any scientific way.
1920     ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
1921     if (llvm::find(Tys, ElemTy) != Tys.end()) {
1922       unsigned HwWidth = 8*Subtarget.getVectorLength();
1923       unsigned VecWidth = VT.getSizeInBits();
1924       if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
1925         return TargetLoweringBase::TypeWidenVector;
1926     }
1927   }
1928   return TargetLoweringBase::TypeSplitVector;
1929 }
1930
1931 std::pair<SDValue, int>
1932 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
1933   if (Addr.getOpcode() == ISD::ADD) {
1934     SDValue Op1 = Addr.getOperand(1);
1935     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
1936       return { Addr.getOperand(0), CN->getSExtValue() };
1937   }
1938   return { Addr, 0 };
1939 }
1940
1941 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
1942 // to select data from, V3 is the permutation.
1943 SDValue
1944 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
1945       const {
1946   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
1947   ArrayRef<int> AM = SVN->getMask();
1948   assert(AM.size() <= 8 && "Unexpected shuffle mask");
1949   unsigned VecLen = AM.size();
1950
1951   MVT VecTy = ty(Op);
1952   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
1953          "HVX shuffles should be legal");
1954   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
1955
1956   SDValue Op0 = Op.getOperand(0);
1957   SDValue Op1 = Op.getOperand(1);
1958   const SDLoc &dl(Op);
1959
1960   // If the inputs are not the same as the output, bail. This is not an
1961   // error situation, but complicates the handling and the default expansion
1962   // (into BUILD_VECTOR) should be adequate.
1963   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
1964     return SDValue();
1965
1966   // Normalize the mask so that the first non-negative index comes from
1967   // the first operand.
1968   SmallVector<int,8> Mask(AM.begin(), AM.end());
1969   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
1970   if (F == AM.size())
1971     return DAG.getUNDEF(VecTy);
1972   if (AM[F] >= int(VecLen)) {
1973     ShuffleVectorSDNode::commuteMask(Mask);
1974     std::swap(Op0, Op1);
1975   }
1976
1977   // Express the shuffle mask in terms of bytes.
1978   SmallVector<int,8> ByteMask;
1979   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
1980   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1981     int M = Mask[i];
1982     if (M < 0) {
1983       for (unsigned j = 0; j != ElemBytes; ++j)
1984         ByteMask.push_back(-1);
1985     } else {
1986       for (unsigned j = 0; j != ElemBytes; ++j)
1987         ByteMask.push_back(M*ElemBytes + j);
1988     }
1989   }
1990   assert(ByteMask.size() <= 8);
1991
1992   // All non-undef (non-negative) indexes are well within [0..127], so they
1993   // fit in a single byte. Build two 64-bit words:
1994   // - MaskIdx where each byte is the corresponding index (for non-negative
1995   //   indexes), and 0xFF for negative indexes, and
1996   // - MaskUnd that has 0xFF for each negative index.
1997   uint64_t MaskIdx = 0;
1998   uint64_t MaskUnd = 0;
1999   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2000     unsigned S = 8*i;
2001     uint64_t M = ByteMask[i] & 0xFF;
2002     if (M == 0xFF)
2003       MaskUnd |= M << S;
2004     MaskIdx |= M << S;
2005   }
2006
2007   if (ByteMask.size() == 4) {
2008     // Identity.
2009     if (MaskIdx == (0x03020100 | MaskUnd))
2010       return Op0;
2011     // Byte swap.
2012     if (MaskIdx == (0x00010203 | MaskUnd)) {
2013       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2014       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2015       return DAG.getBitcast(VecTy, T1);
2016     }
2017
2018     // Byte packs.
2019     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
2020                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
2021     if (MaskIdx == (0x06040200 | MaskUnd))
2022       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2023     if (MaskIdx == (0x07050301 | MaskUnd))
2024       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2025
2026     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
2027                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
2028     if (MaskIdx == (0x02000604 | MaskUnd))
2029       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2030     if (MaskIdx == (0x03010705 | MaskUnd))
2031       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2032   }
2033
2034   if (ByteMask.size() == 8) {
2035     // Identity.
2036     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2037       return Op0;
2038     // Byte swap.
2039     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2040       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2041       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2042       return DAG.getBitcast(VecTy, T1);
2043     }
2044
2045     // Halfword picks.
2046     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2047       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2048     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2049       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2050     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2051       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2052     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2053       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2054     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2055       VectorPair P = opSplit(Op0, dl, DAG);
2056       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2057     }
2058
2059     // Byte packs.
2060     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2061       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2062     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2063       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2064   }
2065
2066   return SDValue();
2067 }
2068
2069 // Create a Hexagon-specific node for shifting a vector by an integer.
2070 SDValue
2071 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2072       const {
2073   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
2074     if (SDValue S = BVN->getSplatValue()) {
2075       unsigned NewOpc;
2076       switch (Op.getOpcode()) {
2077         case ISD::SHL:
2078           NewOpc = HexagonISD::VASL;
2079           break;
2080         case ISD::SRA:
2081           NewOpc = HexagonISD::VASR;
2082           break;
2083         case ISD::SRL:
2084           NewOpc = HexagonISD::VLSR;
2085           break;
2086         default:
2087           llvm_unreachable("Unexpected shift opcode");
2088       }
2089       return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
2090     }
2091   }
2092
2093   return SDValue();
2094 }
2095
2096 SDValue
2097 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2098   return getVectorShiftByInt(Op, DAG);
2099 }
2100
2101 SDValue
2102 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2103   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2104     return Op;
2105   return SDValue();
2106 }
2107
2108 SDValue
2109 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2110   MVT ResTy = ty(Op);
2111   SDValue InpV = Op.getOperand(0);
2112   MVT InpTy = ty(InpV);
2113   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2114   const SDLoc &dl(Op);
2115
2116   // Handle conversion from i8 to v8i1.
2117   if (ResTy == MVT::v8i1) {
2118     SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2119     SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2120     return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2121   }
2122
2123   return SDValue();
2124 }
2125
2126 bool
2127 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2128       MVT VecTy, SelectionDAG &DAG,
2129       MutableArrayRef<ConstantInt*> Consts) const {
2130   MVT ElemTy = VecTy.getVectorElementType();
2131   unsigned ElemWidth = ElemTy.getSizeInBits();
2132   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2133   bool AllConst = true;
2134
2135   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2136     SDValue V = Values[i];
2137     if (V.isUndef()) {
2138       Consts[i] = ConstantInt::get(IntTy, 0);
2139       continue;
2140     }
2141     // Make sure to always cast to IntTy.
2142     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2143       const ConstantInt *CI = CN->getConstantIntValue();
2144       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2145     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2146       const ConstantFP *CF = CN->getConstantFPValue();
2147       APInt A = CF->getValueAPF().bitcastToAPInt();
2148       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2149     } else {
2150       AllConst = false;
2151     }
2152   }
2153   return AllConst;
2154 }
2155
2156 SDValue
2157 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2158                                      MVT VecTy, SelectionDAG &DAG) const {
2159   MVT ElemTy = VecTy.getVectorElementType();
2160   assert(VecTy.getVectorNumElements() == Elem.size());
2161
2162   SmallVector<ConstantInt*,4> Consts(Elem.size());
2163   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2164
2165   unsigned First, Num = Elem.size();
2166   for (First = 0; First != Num; ++First)
2167     if (!isUndef(Elem[First]))
2168       break;
2169   if (First == Num)
2170     return DAG.getUNDEF(VecTy);
2171
2172   if (AllConst &&
2173       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2174     return getZero(dl, VecTy, DAG);
2175
2176   if (ElemTy == MVT::i16) {
2177     assert(Elem.size() == 2);
2178     if (AllConst) {
2179       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2180                    Consts[1]->getZExtValue() << 16;
2181       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
2182     }
2183     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
2184                          {Elem[1], Elem[0]}, DAG);
2185     return DAG.getBitcast(MVT::v2i16, N);
2186   }
2187
2188   if (ElemTy == MVT::i8) {
2189     // First try generating a constant.
2190     if (AllConst) {
2191       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2192                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
2193                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
2194                   Consts[2]->getZExtValue() << 24;
2195       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2196     }
2197
2198     // Then try splat.
2199     bool IsSplat = true;
2200     for (unsigned i = 0; i != Num; ++i) {
2201       if (i == First)
2202         continue;
2203       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2204         continue;
2205       IsSplat = false;
2206       break;
2207     }
2208     if (IsSplat) {
2209       // Legalize the operand to VSPLAT.
2210       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2211       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2212     }
2213
2214     // Generate
2215     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2216     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2217     assert(Elem.size() == 4);
2218     SDValue Vs[4];
2219     for (unsigned i = 0; i != 4; ++i) {
2220       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2221       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2222     }
2223     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2224     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2225     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2226     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2227     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2228
2229     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2230     return DAG.getBitcast(MVT::v4i8, R);
2231   }
2232
2233 #ifndef NDEBUG
2234   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
2235 #endif
2236   llvm_unreachable("Unexpected vector element type");
2237 }
2238
2239 SDValue
2240 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2241                                      MVT VecTy, SelectionDAG &DAG) const {
2242   MVT ElemTy = VecTy.getVectorElementType();
2243   assert(VecTy.getVectorNumElements() == Elem.size());
2244
2245   SmallVector<ConstantInt*,8> Consts(Elem.size());
2246   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2247
2248   unsigned First, Num = Elem.size();
2249   for (First = 0; First != Num; ++First)
2250     if (!isUndef(Elem[First]))
2251       break;
2252   if (First == Num)
2253     return DAG.getUNDEF(VecTy);
2254
2255   if (AllConst &&
2256       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2257     return getZero(dl, VecTy, DAG);
2258
2259   // First try splat if possible.
2260   if (ElemTy == MVT::i16) {
2261     bool IsSplat = true;
2262     for (unsigned i = 0; i != Num; ++i) {
2263       if (i == First)
2264         continue;
2265       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2266         continue;
2267       IsSplat = false;
2268       break;
2269     }
2270     if (IsSplat) {
2271       // Legalize the operand to VSPLAT.
2272       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2273       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2274     }
2275   }
2276
2277   // Then try constant.
2278   if (AllConst) {
2279     uint64_t Val = 0;
2280     unsigned W = ElemTy.getSizeInBits();
2281     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
2282                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
2283     for (unsigned i = 0; i != Num; ++i)
2284       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2285     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2286     return DAG.getBitcast(VecTy, V0);
2287   }
2288
2289   // Build two 32-bit vectors and concatenate.
2290   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2291   SDValue L = (ElemTy == MVT::i32)
2292                 ? Elem[0]
2293                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2294   SDValue H = (ElemTy == MVT::i32)
2295                 ? Elem[1]
2296                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2297   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
2298 }
2299
2300 SDValue
2301 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2302                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
2303                                      SelectionDAG &DAG) const {
2304   MVT VecTy = ty(VecV);
2305   assert(!ValTy.isVector() ||
2306          VecTy.getVectorElementType() == ValTy.getVectorElementType());
2307   unsigned VecWidth = VecTy.getSizeInBits();
2308   unsigned ValWidth = ValTy.getSizeInBits();
2309   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2310   assert((VecWidth % ElemWidth) == 0);
2311   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
2312
2313   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2314   // without any coprocessors).
2315   if (ElemWidth == 1) {
2316     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
2317     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2318     // Check if this is an extract of the lowest bit.
2319     if (IdxN) {
2320       // Extracting the lowest bit is a no-op, but it changes the type,
2321       // so it must be kept as an operation to avoid errors related to
2322       // type mismatches.
2323       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
2324         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2325     }
2326
2327     // If the value extracted is a single bit, use tstbit.
2328     if (ValWidth == 1) {
2329       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2330       SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2331       SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2332       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2333     }
2334
2335     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2336     // a predicate register. The elements of the vector are repeated
2337     // in the register (if necessary) so that the total number is 8.
2338     // The extracted subvector will need to be expanded in such a way.
2339     unsigned Scale = VecWidth / ValWidth;
2340
2341     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2342     // position 0.
2343     assert(ty(IdxV) == MVT::i32);
2344     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2345                              DAG.getConstant(8*Scale, dl, MVT::i32));
2346     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2347     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2348     while (Scale > 1) {
2349       // The longest possible subvector is at most 32 bits, so it is always
2350       // contained in the low subregister.
2351       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
2352       T1 = expandPredicate(T1, dl, DAG);
2353       Scale /= 2;
2354     }
2355
2356     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2357   }
2358
2359   assert(VecWidth == 32 || VecWidth == 64);
2360
2361   // Cast everything to scalar integer types.
2362   MVT ScalarTy = tyScalar(VecTy);
2363   VecV = DAG.getBitcast(ScalarTy, VecV);
2364
2365   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2366   SDValue ExtV;
2367
2368   if (IdxN) {
2369     unsigned Off = IdxN->getZExtValue() * ElemWidth;
2370     if (VecWidth == 64 && ValWidth == 32) {
2371       assert(Off == 0 || Off == 32);
2372       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
2373       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
2374     } else if (Off == 0 && (ValWidth % 8) == 0) {
2375       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2376     } else {
2377       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2378       // The return type of EXTRACTU must be the same as the type of the
2379       // input vector.
2380       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2381                          {VecV, WidthV, OffV});
2382     }
2383   } else {
2384     if (ty(IdxV) != MVT::i32)
2385       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2386     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2387                                DAG.getConstant(ElemWidth, dl, MVT::i32));
2388     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2389                        {VecV, WidthV, OffV});
2390   }
2391
2392   // Cast ExtV to the requested result type.
2393   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2394   ExtV = DAG.getBitcast(ResTy, ExtV);
2395   return ExtV;
2396 }
2397
2398 SDValue
2399 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2400                                     const SDLoc &dl, MVT ValTy,
2401                                     SelectionDAG &DAG) const {
2402   MVT VecTy = ty(VecV);
2403   if (VecTy.getVectorElementType() == MVT::i1) {
2404     MVT ValTy = ty(ValV);
2405     assert(ValTy.getVectorElementType() == MVT::i1);
2406     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
2407     unsigned VecLen = VecTy.getVectorNumElements();
2408     unsigned Scale = VecLen / ValTy.getVectorNumElements();
2409     assert(Scale > 1);
2410
2411     for (unsigned R = Scale; R > 1; R /= 2) {
2412       ValR = contractPredicate(ValR, dl, DAG);
2413       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2414                          DAG.getUNDEF(MVT::i32), ValR);
2415     }
2416     // The longest possible subvector is at most 32 bits, so it is always
2417     // contained in the low subregister.
2418     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
2419
2420     unsigned ValBytes = 64 / Scale;
2421     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
2422     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2423                               DAG.getConstant(8, dl, MVT::i32));
2424     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2425     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2426                               {VecR, ValR, Width, Idx});
2427     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2428   }
2429
2430   unsigned VecWidth = VecTy.getSizeInBits();
2431   unsigned ValWidth = ValTy.getSizeInBits();
2432   assert(VecWidth == 32 || VecWidth == 64);
2433   assert((VecWidth % ValWidth) == 0);
2434
2435   // Cast everything to scalar integer types.
2436   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2437   // The actual type of ValV may be different than ValTy (which is related
2438   // to the vector type).
2439   unsigned VW = ty(ValV).getSizeInBits();
2440   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2441   VecV = DAG.getBitcast(ScalarTy, VecV);
2442   if (VW != VecWidth)
2443     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2444
2445   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2446   SDValue InsV;
2447
2448   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2449     unsigned W = C->getZExtValue() * ValWidth;
2450     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2451     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2452                        {VecV, ValV, WidthV, OffV});
2453   } else {
2454     if (ty(IdxV) != MVT::i32)
2455       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2456     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2457     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2458                        {VecV, ValV, WidthV, OffV});
2459   }
2460
2461   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2462 }
2463
2464 SDValue
2465 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2466                                        SelectionDAG &DAG) const {
2467   assert(ty(Vec32).getSizeInBits() == 32);
2468   if (isUndef(Vec32))
2469     return DAG.getUNDEF(MVT::i64);
2470   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
2471 }
2472
2473 SDValue
2474 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2475                                          SelectionDAG &DAG) const {
2476   assert(ty(Vec64).getSizeInBits() == 64);
2477   if (isUndef(Vec64))
2478     return DAG.getUNDEF(MVT::i32);
2479   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
2480 }
2481
2482 SDValue
2483 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2484       const {
2485   if (Ty.isVector()) {
2486     assert(Ty.isInteger() && "Only integer vectors are supported here");
2487     unsigned W = Ty.getSizeInBits();
2488     if (W <= 64)
2489       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2490     return DAG.getNode(HexagonISD::VZERO, dl, Ty);
2491   }
2492
2493   if (Ty.isInteger())
2494     return DAG.getConstant(0, dl, Ty);
2495   if (Ty.isFloatingPoint())
2496     return DAG.getConstantFP(0.0, dl, Ty);
2497   llvm_unreachable("Invalid type for zero");
2498 }
2499
2500 SDValue
2501 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2502   MVT VecTy = ty(Op);
2503   unsigned BW = VecTy.getSizeInBits();
2504   const SDLoc &dl(Op);
2505   SmallVector<SDValue,8> Ops;
2506   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2507     Ops.push_back(Op.getOperand(i));
2508
2509   if (BW == 32)
2510     return buildVector32(Ops, dl, VecTy, DAG);
2511   if (BW == 64)
2512     return buildVector64(Ops, dl, VecTy, DAG);
2513
2514   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2515     // For each i1 element in the resulting predicate register, put 1
2516     // shifted by the index of the element into a general-purpose register,
2517     // then or them together and transfer it back into a predicate register.
2518     SDValue Rs[8];
2519     SDValue Z = getZero(dl, MVT::i32, DAG);
2520     // Always produce 8 bits, repeat inputs if necessary.
2521     unsigned Rep = 8 / VecTy.getVectorNumElements();
2522     for (unsigned i = 0; i != 8; ++i) {
2523       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
2524       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2525     }
2526     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
2527       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2528         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
2529     }
2530     // Move the value directly to a predicate register.
2531     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
2532   }
2533
2534   return SDValue();
2535 }
2536
2537 SDValue
2538 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2539                                            SelectionDAG &DAG) const {
2540   MVT VecTy = ty(Op);
2541   const SDLoc &dl(Op);
2542   if (VecTy.getSizeInBits() == 64) {
2543     assert(Op.getNumOperands() == 2);
2544     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
2545                        Op.getOperand(0));
2546   }
2547
2548   MVT ElemTy = VecTy.getVectorElementType();
2549   if (ElemTy == MVT::i1) {
2550     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2551     MVT OpTy = ty(Op.getOperand(0));
2552     // Scale is how many times the operands need to be contracted to match
2553     // the representation in the target register.
2554     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2555     assert(Scale == Op.getNumOperands() && Scale > 1);
2556
2557     // First, convert all bool vectors to integers, then generate pairwise
2558     // inserts to form values of doubled length. Up until there are only
2559     // two values left to concatenate, all of these values will fit in a
2560     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2561     SmallVector<SDValue,4> Words[2];
2562     unsigned IdxW = 0;
2563
2564     for (SDValue P : Op.getNode()->op_values()) {
2565       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
2566       for (unsigned R = Scale; R > 1; R /= 2) {
2567         W = contractPredicate(W, dl, DAG);
2568         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2569                         DAG.getUNDEF(MVT::i32), W);
2570       }
2571       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
2572       Words[IdxW].push_back(W);
2573     }
2574
2575     while (Scale > 2) {
2576       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
2577       Words[IdxW ^ 1].clear();
2578
2579       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
2580         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
2581         // Insert W1 into W0 right next to the significant bits of W0.
2582         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2583                                 {W0, W1, WidthV, WidthV});
2584         Words[IdxW ^ 1].push_back(T);
2585       }
2586       IdxW ^= 1;
2587       Scale /= 2;
2588     }
2589
2590     // Another sanity check. At this point there should only be two words
2591     // left, and Scale should be 2.
2592     assert(Scale == 2 && Words[IdxW].size() == 2);
2593
2594     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2595                              Words[IdxW][1], Words[IdxW][0]);
2596     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
2597   }
2598
2599   return SDValue();
2600 }
2601
2602 SDValue
2603 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
2604                                                SelectionDAG &DAG) const {
2605   SDValue Vec = Op.getOperand(0);
2606   MVT ElemTy = ty(Vec).getVectorElementType();
2607   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
2608 }
2609
2610 SDValue
2611 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
2612                                               SelectionDAG &DAG) const {
2613   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
2614                        ty(Op), ty(Op), DAG);
2615 }
2616
2617 SDValue
2618 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2619                                               SelectionDAG &DAG) const {
2620   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
2621                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
2622 }
2623
2624 SDValue
2625 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
2626                                              SelectionDAG &DAG) const {
2627   SDValue ValV = Op.getOperand(1);
2628   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
2629                       SDLoc(Op), ty(ValV), DAG);
2630 }
2631
2632 bool
2633 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
2634   // Assuming the caller does not have either a signext or zeroext modifier, and
2635   // only one value is accepted, any reasonable truncation is allowed.
2636   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
2637     return false;
2638
2639   // FIXME: in principle up to 64-bit could be made safe, but it would be very
2640   // fragile at the moment: any support for multiple value returns would be
2641   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2642   return Ty1->getPrimitiveSizeInBits() <= 32;
2643 }
2644
2645 SDValue
2646 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
2647       const {
2648   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2649   unsigned HaveAlign = LN->getAlignment();
2650   MVT LoadTy = ty(Op);
2651   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
2652   if (HaveAlign >= NeedAlign)
2653     return Op;
2654
2655   const SDLoc &dl(Op);
2656   const DataLayout &DL = DAG.getDataLayout();
2657   LLVMContext &Ctx = *DAG.getContext();
2658   unsigned AS = LN->getAddressSpace();
2659
2660   // If the load aligning is disabled or the load can be broken up into two
2661   // smaller legal loads, do the default (target-independent) expansion.
2662   bool DoDefault = false;
2663   // Handle it in the default way if this is an indexed load.
2664   if (!LN->isUnindexed())
2665     DoDefault = true;
2666
2667   if (!AlignLoads) {
2668     if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
2669       return Op;
2670     DoDefault = true;
2671   }
2672   if (!DoDefault && 2*HaveAlign == NeedAlign) {
2673     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
2674     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
2675                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
2676     DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
2677   }
2678   if (DoDefault) {
2679     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
2680     return DAG.getMergeValues({P.first, P.second}, dl);
2681   }
2682
2683   // The code below generates two loads, both aligned as NeedAlign, and
2684   // with the distance of NeedAlign between them. For that to cover the
2685   // bits that need to be loaded (and without overlapping), the size of
2686   // the loads should be equal to NeedAlign. This is true for all loadable
2687   // types, but add an assertion in case something changes in the future.
2688   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
2689
2690   unsigned LoadLen = NeedAlign;
2691   SDValue Base = LN->getBasePtr();
2692   SDValue Chain = LN->getChain();
2693   auto BO = getBaseAndOffset(Base);
2694   unsigned BaseOpc = BO.first.getOpcode();
2695   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
2696     return Op;
2697
2698   if (BO.second % LoadLen != 0) {
2699     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
2700                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
2701     BO.second -= BO.second % LoadLen;
2702   }
2703   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
2704       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
2705                     DAG.getConstant(NeedAlign, dl, MVT::i32))
2706       : BO.first;
2707   SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
2708   SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
2709
2710   MachineMemOperand *WideMMO = nullptr;
2711   if (MachineMemOperand *MMO = LN->getMemOperand()) {
2712     MachineFunction &MF = DAG.getMachineFunction();
2713     WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
2714                     2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
2715                     MMO->getSyncScopeID(), MMO->getOrdering(),
2716                     MMO->getFailureOrdering());
2717   }
2718
2719   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
2720   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
2721
2722   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
2723                                 {Load1, Load0, BaseNoOff.getOperand(0)});
2724   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2725                                  Load0.getValue(1), Load1.getValue(1));
2726   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
2727   return M;
2728 }
2729
2730 SDValue
2731 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
2732   const SDLoc &dl(Op);
2733   unsigned Opc = Op.getOpcode();
2734   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
2735
2736   if (Opc == ISD::ADDCARRY)
2737     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
2738                        { X, Y, C });
2739
2740   EVT CarryTy = C.getValueType();
2741   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
2742                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
2743   SDValue Out[] = { SubC.getValue(0),
2744                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
2745   return DAG.getMergeValues(Out, dl);
2746 }
2747
2748 SDValue
2749 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
2750   SDValue Chain     = Op.getOperand(0);
2751   SDValue Offset    = Op.getOperand(1);
2752   SDValue Handler   = Op.getOperand(2);
2753   SDLoc dl(Op);
2754   auto PtrVT = getPointerTy(DAG.getDataLayout());
2755
2756   // Mark function as containing a call to EH_RETURN.
2757   HexagonMachineFunctionInfo *FuncInfo =
2758     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
2759   FuncInfo->setHasEHReturn();
2760
2761   unsigned OffsetReg = Hexagon::R28;
2762
2763   SDValue StoreAddr =
2764       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
2765                   DAG.getIntPtrConstant(4, dl));
2766   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
2767   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
2768
2769   // Not needed we already use it as explict input to EH_RETURN.
2770   // MF.getRegInfo().addLiveOut(OffsetReg);
2771
2772   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
2773 }
2774
2775 SDValue
2776 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2777   unsigned Opc = Op.getOpcode();
2778
2779   // Handle INLINEASM first.
2780   if (Opc == ISD::INLINEASM)
2781     return LowerINLINEASM(Op, DAG);
2782
2783   if (isHvxOperation(Op)) {
2784     // If HVX lowering returns nothing, try the default lowering.
2785     if (SDValue V = LowerHvxOperation(Op, DAG))
2786       return V;
2787   }
2788
2789   switch (Opc) {
2790     default:
2791 #ifndef NDEBUG
2792       Op.getNode()->dumpr(&DAG);
2793       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
2794         errs() << "Error: check for a non-legal type in this operation\n";
2795 #endif
2796       llvm_unreachable("Should not custom lower this!");
2797     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
2798     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
2799     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
2800     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
2801     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2802     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
2803     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
2804     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
2805     case ISD::LOAD:                 return LowerUnalignedLoad(Op, DAG);
2806     case ISD::ADDCARRY:
2807     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
2808     case ISD::SRA:
2809     case ISD::SHL:
2810     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
2811     case ISD::ROTL:                 return LowerROTL(Op, DAG);
2812     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
2813     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
2814     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
2815     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
2816     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
2817     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
2818     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
2819     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
2820     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
2821     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
2822     case ISD::VASTART:              return LowerVASTART(Op, DAG);
2823     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
2824     case ISD::SETCC:                return LowerSETCC(Op, DAG);
2825     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
2826     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2827     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
2828     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
2829     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
2830       break;
2831   }
2832
2833   return SDValue();
2834 }
2835
2836 void
2837 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
2838                                           SmallVectorImpl<SDValue> &Results,
2839                                           SelectionDAG &DAG) const {
2840   const SDLoc &dl(N);
2841   switch (N->getOpcode()) {
2842     case ISD::SRL:
2843     case ISD::SRA:
2844     case ISD::SHL:
2845       return;
2846     case ISD::BITCAST:
2847       // Handle a bitcast from v8i1 to i8.
2848       if (N->getValueType(0) == MVT::i8) {
2849         SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
2850                              N->getOperand(0), DAG);
2851         Results.push_back(P);
2852       }
2853       break;
2854   }
2855 }
2856
2857 /// Returns relocation base for the given PIC jumptable.
2858 SDValue
2859 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2860                                                 SelectionDAG &DAG) const {
2861   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
2862   EVT VT = Table.getValueType();
2863   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
2864   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
2865 }
2866
2867 //===----------------------------------------------------------------------===//
2868 // Inline Assembly Support
2869 //===----------------------------------------------------------------------===//
2870
2871 TargetLowering::ConstraintType
2872 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
2873   if (Constraint.size() == 1) {
2874     switch (Constraint[0]) {
2875       case 'q':
2876       case 'v':
2877         if (Subtarget.useHVXOps())
2878           return C_RegisterClass;
2879         break;
2880       case 'a':
2881         return C_RegisterClass;
2882       default:
2883         break;
2884     }
2885   }
2886   return TargetLowering::getConstraintType(Constraint);
2887 }
2888
2889 std::pair<unsigned, const TargetRegisterClass*>
2890 HexagonTargetLowering::getRegForInlineAsmConstraint(
2891     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
2892
2893   if (Constraint.size() == 1) {
2894     switch (Constraint[0]) {
2895     case 'r':   // R0-R31
2896       switch (VT.SimpleTy) {
2897       default:
2898         return {0u, nullptr};
2899       case MVT::i1:
2900       case MVT::i8:
2901       case MVT::i16:
2902       case MVT::i32:
2903       case MVT::f32:
2904         return {0u, &Hexagon::IntRegsRegClass};
2905       case MVT::i64:
2906       case MVT::f64:
2907         return {0u, &Hexagon::DoubleRegsRegClass};
2908       }
2909       break;
2910     case 'a': // M0-M1
2911       if (VT != MVT::i32)
2912         return {0u, nullptr};
2913       return {0u, &Hexagon::ModRegsRegClass};
2914     case 'q': // q0-q3
2915       switch (VT.getSizeInBits()) {
2916       default:
2917         return {0u, nullptr};
2918       case 512:
2919       case 1024:
2920         return {0u, &Hexagon::HvxQRRegClass};
2921       }
2922       break;
2923     case 'v': // V0-V31
2924       switch (VT.getSizeInBits()) {
2925       default:
2926         return {0u, nullptr};
2927       case 512:
2928         return {0u, &Hexagon::HvxVRRegClass};
2929       case 1024:
2930         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
2931           return {0u, &Hexagon::HvxVRRegClass};
2932         return {0u, &Hexagon::HvxWRRegClass};
2933       case 2048:
2934         return {0u, &Hexagon::HvxWRRegClass};
2935       }
2936       break;
2937     default:
2938       return {0u, nullptr};
2939     }
2940   }
2941
2942   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2943 }
2944
2945 /// isFPImmLegal - Returns true if the target can instruction select the
2946 /// specified FP immediate natively. If false, the legalizer will
2947 /// materialize the FP immediate as a load from a constant pool.
2948 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
2949   return Subtarget.hasV5Ops();
2950 }
2951
2952 /// isLegalAddressingMode - Return true if the addressing mode represented by
2953 /// AM is legal for this target, for a load/store of the specified type.
2954 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
2955                                                   const AddrMode &AM, Type *Ty,
2956                                                   unsigned AS, Instruction *I) const {
2957   if (Ty->isSized()) {
2958     // When LSR detects uses of the same base address to access different
2959     // types (e.g. unions), it will assume a conservative type for these
2960     // uses:
2961     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
2962     // The type Ty passed here would then be "void". Skip the alignment
2963     // checks, but do not return false right away, since that confuses
2964     // LSR into crashing.
2965     unsigned A = DL.getABITypeAlignment(Ty);
2966     // The base offset must be a multiple of the alignment.
2967     if ((AM.BaseOffs % A) != 0)
2968       return false;
2969     // The shifted offset must fit in 11 bits.
2970     if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
2971       return false;
2972   }
2973
2974   // No global is ever allowed as a base.
2975   if (AM.BaseGV)
2976     return false;
2977
2978   int Scale = AM.Scale;
2979   if (Scale < 0)
2980     Scale = -Scale;
2981   switch (Scale) {
2982   case 0:  // No scale reg, "r+i", "r", or just "i".
2983     break;
2984   default: // No scaled addressing mode.
2985     return false;
2986   }
2987   return true;
2988 }
2989
2990 /// Return true if folding a constant offset with the given GlobalAddress is
2991 /// legal.  It is frequently not legal in PIC relocation models.
2992 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
2993       const {
2994   return HTM.getRelocationModel() == Reloc::Static;
2995 }
2996
2997 /// isLegalICmpImmediate - Return true if the specified immediate is legal
2998 /// icmp immediate, that is the target has icmp instructions which can compare
2999 /// a register against the immediate without having to materialize the
3000 /// immediate into a register.
3001 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3002   return Imm >= -512 && Imm <= 511;
3003 }
3004
3005 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3006 /// for tail call optimization. Targets which want to do tail call
3007 /// optimization should implement this function.
3008 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3009                                  SDValue Callee,
3010                                  CallingConv::ID CalleeCC,
3011                                  bool IsVarArg,
3012                                  bool IsCalleeStructRet,
3013                                  bool IsCallerStructRet,
3014                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3015                                  const SmallVectorImpl<SDValue> &OutVals,
3016                                  const SmallVectorImpl<ISD::InputArg> &Ins,
3017                                  SelectionDAG& DAG) const {
3018   const Function &CallerF = DAG.getMachineFunction().getFunction();
3019   CallingConv::ID CallerCC = CallerF.getCallingConv();
3020   bool CCMatch = CallerCC == CalleeCC;
3021
3022   // ***************************************************************************
3023   //  Look for obvious safe cases to perform tail call optimization that do not
3024   //  require ABI changes.
3025   // ***************************************************************************
3026
3027   // If this is a tail call via a function pointer, then don't do it!
3028   if (!isa<GlobalAddressSDNode>(Callee) &&
3029       !isa<ExternalSymbolSDNode>(Callee)) {
3030     return false;
3031   }
3032
3033   // Do not optimize if the calling conventions do not match and the conventions
3034   // used are not C or Fast.
3035   if (!CCMatch) {
3036     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3037     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3038     // If R & E, then ok.
3039     if (!R || !E)
3040       return false;
3041   }
3042
3043   // Do not tail call optimize vararg calls.
3044   if (IsVarArg)
3045     return false;
3046
3047   // Also avoid tail call optimization if either caller or callee uses struct
3048   // return semantics.
3049   if (IsCalleeStructRet || IsCallerStructRet)
3050     return false;
3051
3052   // In addition to the cases above, we also disable Tail Call Optimization if
3053   // the calling convention code that at least one outgoing argument needs to
3054   // go on the stack. We cannot check that here because at this point that
3055   // information is not available.
3056   return true;
3057 }
3058
3059 /// Returns the target specific optimal type for load and store operations as
3060 /// a result of memset, memcpy, and memmove lowering.
3061 ///
3062 /// If DstAlign is zero that means it's safe to destination alignment can
3063 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3064 /// a need to check it against alignment requirement, probably because the
3065 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3066 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3067 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3068 /// does not need to be loaded.  It returns EVT::Other if the type should be
3069 /// determined using generic target-independent logic.
3070 EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
3071       unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
3072       bool MemcpyStrSrc, MachineFunction &MF) const {
3073
3074   auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
3075     return (GivenA % MinA) == 0;
3076   };
3077
3078   if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
3079     return MVT::i64;
3080   if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
3081     return MVT::i32;
3082   if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
3083     return MVT::i16;
3084
3085   return MVT::Other;
3086 }
3087
3088 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
3089       unsigned AS, unsigned Align, bool *Fast) const {
3090   if (Fast)
3091     *Fast = false;
3092   return Subtarget.isHVXVectorType(VT.getSimpleVT());
3093 }
3094
3095 std::pair<const TargetRegisterClass*, uint8_t>
3096 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3097       MVT VT) const {
3098   if (Subtarget.isHVXVectorType(VT, true)) {
3099     unsigned BitWidth = VT.getSizeInBits();
3100     unsigned VecWidth = Subtarget.getVectorLength() * 8;
3101
3102     if (VT.getVectorElementType() == MVT::i1)
3103       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3104     if (BitWidth == VecWidth)
3105       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3106     assert(BitWidth == 2 * VecWidth);
3107     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3108   }
3109
3110   return TargetLowering::findRepresentativeClass(TRI, VT);
3111 }
3112
3113 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
3114       AtomicOrdering Ord) const {
3115   BasicBlock *BB = Builder.GetInsertBlock();
3116   Module *M = BB->getParent()->getParent();
3117   Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
3118   unsigned SZ = Ty->getPrimitiveSizeInBits();
3119   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3120   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3121                                    : Intrinsic::hexagon_L4_loadd_locked;
3122   Value *Fn = Intrinsic::getDeclaration(M, IntID);
3123   return Builder.CreateCall(Fn, Addr, "larx");
3124 }
3125
3126 /// Perform a store-conditional operation to Addr. Return the status of the
3127 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3128 Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
3129       Value *Val, Value *Addr, AtomicOrdering Ord) const {
3130   BasicBlock *BB = Builder.GetInsertBlock();
3131   Module *M = BB->getParent()->getParent();
3132   Type *Ty = Val->getType();
3133   unsigned SZ = Ty->getPrimitiveSizeInBits();
3134   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3135   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3136                                    : Intrinsic::hexagon_S4_stored_locked;
3137   Value *Fn = Intrinsic::getDeclaration(M, IntID);
3138   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3139   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3140   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3141   return Ext;
3142 }
3143
3144 TargetLowering::AtomicExpansionKind
3145 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3146   // Do not expand loads and stores that don't exceed 64 bits.
3147   return LI->getType()->getPrimitiveSizeInBits() > 64
3148              ? AtomicExpansionKind::LLOnly
3149              : AtomicExpansionKind::None;
3150 }
3151
3152 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3153   // Do not expand loads and stores that don't exceed 64 bits.
3154   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3155 }
3156
3157 bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3158       AtomicCmpXchgInst *AI) const {
3159   const DataLayout &DL = AI->getModule()->getDataLayout();
3160   unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
3161   return Size >= 4 && Size <= 8;
3162 }