1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interfaces that VE uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VEMachineFunctionInfo.h"
17 #include "VERegisterInfo.h"
18 #include "VETargetMachine.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
35 #define DEBUG_TYPE "ve-lower"
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
41 static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
42 CCValAssign::LocInfo LocInfo,
43 ISD::ArgFlagsTy ArgFlags, CCState &State) {
44 switch (LocVT.SimpleTy) {
46 // Allocate stack like below
51 // Use align=8 for dummy area to align the beginning of these 2 area.
52 State.AllocateStack(4, Align(8)); // for empty area
53 // Use align=4 for value to place it at just after the dummy area.
54 unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
55 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
63 #include "VEGenCallingConv.inc"
65 bool VETargetLowering::CanLowerReturn(
66 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
67 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
68 CCAssignFn *RetCC = RetCC_VE;
69 SmallVector<CCValAssign, 16> RVLocs;
70 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
71 return CCInfo.CheckReturn(Outs, RetCC);
75 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
77 const SmallVectorImpl<ISD::OutputArg> &Outs,
78 const SmallVectorImpl<SDValue> &OutVals,
79 const SDLoc &DL, SelectionDAG &DAG) const {
80 // CCValAssign - represent the assignment of the return value to locations.
81 SmallVector<CCValAssign, 16> RVLocs;
83 // CCState - Info about the registers and stack slot.
84 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
87 // Analyze return values.
88 CCInfo.AnalyzeReturn(Outs, RetCC_VE);
91 SmallVector<SDValue, 4> RetOps(1, Chain);
93 // Copy the result values into the output registers.
94 for (unsigned i = 0; i != RVLocs.size(); ++i) {
95 CCValAssign &VA = RVLocs[i];
96 assert(VA.isRegLoc() && "Can only return in registers!");
97 SDValue OutVal = OutVals[i];
99 // Integer return values must be sign or zero extended by the callee.
100 switch (VA.getLocInfo()) {
101 case CCValAssign::Full:
103 case CCValAssign::SExt:
104 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
106 case CCValAssign::ZExt:
107 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
109 case CCValAssign::AExt:
110 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
113 llvm_unreachable("Unknown loc info!");
116 assert(!VA.needsCustom() && "Unexpected custom lowering");
118 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
120 // Guarantee that all emitted copies are stuck together with flags.
121 Flag = Chain.getValue(1);
122 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
125 RetOps[0] = Chain; // Update chain.
127 // Add the flag if we have it.
129 RetOps.push_back(Flag);
131 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
134 SDValue VETargetLowering::LowerFormalArguments(
135 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
136 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
137 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
138 MachineFunction &MF = DAG.getMachineFunction();
140 // Get the base offset of the incoming arguments stack space.
141 unsigned ArgsBaseOffset = 176;
142 // Get the size of the preserved arguments area
143 unsigned ArgsPreserved = 64;
145 // Analyze arguments according to CC_VE.
146 SmallVector<CCValAssign, 16> ArgLocs;
147 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
149 // Allocate the preserved area first.
150 CCInfo.AllocateStack(ArgsPreserved, Align(8));
151 // We already allocated the preserved area, so the stack offset computed
152 // by CC_VE would be correct now.
153 CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
155 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
156 CCValAssign &VA = ArgLocs[i];
158 // This argument is passed in a register.
159 // All integer register arguments are promoted by the caller to i64.
161 // Create a virtual register for the promoted live-in value.
163 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
164 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
166 // Get the high bits for i32 struct elements.
167 if (VA.getValVT() == MVT::i32 && VA.needsCustom())
168 Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
169 DAG.getConstant(32, DL, MVT::i32));
171 // The caller promoted the argument, so insert an Assert?ext SDNode so we
172 // won't promote the value again in this function.
173 switch (VA.getLocInfo()) {
174 case CCValAssign::SExt:
175 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
176 DAG.getValueType(VA.getValVT()));
178 case CCValAssign::ZExt:
179 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
180 DAG.getValueType(VA.getValVT()));
186 // Truncate the register down to the argument type.
188 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
190 InVals.push_back(Arg);
194 // The registers are exhausted. This argument was passed on the stack.
195 assert(VA.isMemLoc());
196 // The CC_VE_Full/Half functions compute stack offsets relative to the
197 // beginning of the arguments area at %fp+176.
198 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
199 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
200 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
202 DAG.getLoad(VA.getValVT(), DL, Chain,
203 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
204 MachinePointerInfo::getFixedStack(MF, FI)));
210 // This function takes variable arguments, some of which may have been passed
211 // in registers %s0-%s8.
213 // The va_start intrinsic needs to know the offset to the first variable
215 // TODO: need to calculate offset correctly once we support f128.
216 unsigned ArgOffset = ArgLocs.size() * 8;
217 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
218 // Skip the 176 bytes of register save area.
219 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
224 // FIXME? Maybe this could be a TableGen attribute on some registers and
225 // this table could be generated automatically from RegInfo.
226 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
227 const MachineFunction &MF) const {
228 Register Reg = StringSwitch<Register>(RegName)
229 .Case("sp", VE::SX11) // Stack pointer
230 .Case("fp", VE::SX9) // Frame pointer
231 .Case("sl", VE::SX8) // Stack limit
232 .Case("lr", VE::SX10) // Link register
233 .Case("tp", VE::SX14) // Thread pointer
234 .Case("outer", VE::SX12) // Outer regiser
235 .Case("info", VE::SX17) // Info area register
236 .Case("got", VE::SX15) // Global offset table register
237 .Case("plt", VE::SX16) // Procedure linkage table register
243 report_fatal_error("Invalid register name global variable");
246 //===----------------------------------------------------------------------===//
247 // TargetLowering Implementation
248 //===----------------------------------------------------------------------===//
250 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
251 SmallVectorImpl<SDValue> &InVals) const {
252 SelectionDAG &DAG = CLI.DAG;
254 SDValue Chain = CLI.Chain;
255 auto PtrVT = getPointerTy(DAG.getDataLayout());
257 // VE target does not yet support tail call optimization.
258 CLI.IsTailCall = false;
260 // Get the base offset of the outgoing arguments stack space.
261 unsigned ArgsBaseOffset = 176;
262 // Get the size of the preserved arguments area
263 unsigned ArgsPreserved = 8 * 8u;
265 // Analyze operands of the call, assigning locations to each operand.
266 SmallVector<CCValAssign, 16> ArgLocs;
267 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
269 // Allocate the preserved area first.
270 CCInfo.AllocateStack(ArgsPreserved, Align(8));
271 // We already allocated the preserved area, so the stack offset computed
272 // by CC_VE would be correct now.
273 CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
275 // VE requires to use both register and stack for varargs or no-prototyped
277 bool UseBoth = CLI.IsVarArg;
279 // Analyze operands again if it is required to store BOTH.
280 SmallVector<CCValAssign, 16> ArgLocs2;
281 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
282 ArgLocs2, *DAG.getContext());
284 CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
286 // Get the size of the outgoing arguments stack space requirement.
287 unsigned ArgsSize = CCInfo.getNextStackOffset();
289 // Keep stack frames 16-byte aligned.
290 ArgsSize = alignTo(ArgsSize, 16);
292 // Adjust the stack pointer to make room for the arguments.
293 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
294 // with more than 6 arguments.
295 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
297 // Collect the set of registers to pass to the function and their values.
298 // This will be emitted as a sequence of CopyToReg nodes glued to the call
300 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
302 // Collect chains from all the memory opeations that copy arguments to the
303 // stack. They must follow the stack pointer adjustment above and precede the
304 // call instruction itself.
305 SmallVector<SDValue, 8> MemOpChains;
307 // VE needs to get address of callee function in a register
308 // So, prepare to copy it to SX12 here.
310 // If the callee is a GlobalAddress node (quite common, every direct call is)
311 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
312 // Likewise ExternalSymbol -> TargetExternalSymbol.
313 SDValue Callee = CLI.Callee;
315 bool IsPICCall = isPositionIndependent();
317 // PC-relative references to external symbols should go through $stub.
318 // If so, we need to prepare GlobalBaseReg first.
319 const TargetMachine &TM = DAG.getTarget();
320 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
321 const GlobalValue *GV = nullptr;
322 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
324 GV = CalleeG->getGlobal();
325 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
326 bool UsePlt = !Local;
327 MachineFunction &MF = DAG.getMachineFunction();
329 // Turn GlobalAddress/ExternalSymbol node into a value node
330 // containing the address of them here.
334 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
335 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
336 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
339 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
341 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
344 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
345 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
346 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
349 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
353 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
355 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
356 CCValAssign &VA = ArgLocs[i];
357 SDValue Arg = CLI.OutVals[i];
359 // Promote the value if needed.
360 switch (VA.getLocInfo()) {
362 llvm_unreachable("Unknown location info!");
363 case CCValAssign::Full:
365 case CCValAssign::SExt:
366 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
368 case CCValAssign::ZExt:
369 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
371 case CCValAssign::AExt:
372 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
377 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
383 assert(VA.isMemLoc());
385 // Create a store off the stack pointer for this argument.
386 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
387 // The argument area starts at %fp+176 in the callee frame,
390 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
391 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
392 MemOpChains.push_back(
393 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
396 // Emit all stores, make sure they occur before the call.
397 if (!MemOpChains.empty())
398 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
400 // Build a sequence of CopyToReg nodes glued together with token chain and
401 // glue operands which copy the outgoing args into registers. The InGlue is
402 // necessary since all emitted instructions must be stuck together in order
403 // to pass the live physical registers.
405 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
406 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
407 RegsToPass[i].second, InGlue);
408 InGlue = Chain.getValue(1);
411 // Build the operands for the call instruction itself.
412 SmallVector<SDValue, 8> Ops;
413 Ops.push_back(Chain);
414 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
415 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
416 RegsToPass[i].second.getValueType()));
418 // Add a register mask operand representing the call-preserved registers.
419 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
420 const uint32_t *Mask =
421 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
422 assert(Mask && "Missing call preserved mask for calling convention");
423 Ops.push_back(DAG.getRegisterMask(Mask));
425 // Make sure the CopyToReg nodes are glued to the call instruction which
426 // consumes the registers.
427 if (InGlue.getNode())
428 Ops.push_back(InGlue);
430 // Now the call itself.
431 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
432 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
433 InGlue = Chain.getValue(1);
435 // Revert the stack pointer immediately after the call.
436 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
437 DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
438 InGlue = Chain.getValue(1);
440 // Now extract the return values. This is more or less the same as
441 // LowerFormalArguments.
443 // Assign locations to each value returned by this call.
444 SmallVector<CCValAssign, 16> RVLocs;
445 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
448 // Set inreg flag manually for codegen generated library calls that
450 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
451 CLI.Ins[0].Flags.setInReg();
453 RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
455 // Copy all of the result registers out of their specified physreg.
456 for (unsigned i = 0; i != RVLocs.size(); ++i) {
457 CCValAssign &VA = RVLocs[i];
458 unsigned Reg = VA.getLocReg();
460 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
461 // reside in the same register in the high and low bits. Reuse the
462 // CopyFromReg previous node to avoid duplicate copies.
464 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
465 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
466 RV = Chain.getValue(0);
468 // But usually we'll create a new CopyFromReg for a different register.
470 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
471 Chain = RV.getValue(1);
472 InGlue = Chain.getValue(2);
475 // Get the high bits for i32 struct elements.
476 if (VA.getValVT() == MVT::i32 && VA.needsCustom())
477 RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
478 DAG.getConstant(32, DL, MVT::i32));
480 // The callee promoted the return value, so insert an Assert?ext SDNode so
481 // we won't promote the value again in this function.
482 switch (VA.getLocInfo()) {
483 case CCValAssign::SExt:
484 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
485 DAG.getValueType(VA.getValVT()));
487 case CCValAssign::ZExt:
488 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
489 DAG.getValueType(VA.getValVT()));
495 // Truncate the register down to the return value type.
497 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
499 InVals.push_back(RV);
505 /// isFPImmLegal - Returns true if the target can instruction select the
506 /// specified FP immediate natively. If false, the legalizer will
507 /// materialize the FP immediate as a load from a constant pool.
508 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
509 bool ForCodeSize) const {
510 return VT == MVT::f32 || VT == MVT::f64;
513 /// Determine if the target supports unaligned memory accesses.
515 /// This function returns true if the target allows unaligned memory accesses
516 /// of the specified type in the given address space. If true, it also returns
517 /// whether the unaligned memory access is "fast" in the last argument by
518 /// reference. This is used, for example, in situations where an array
519 /// copy/move/set is converted to a sequence of store operations. Its use
520 /// helps to ensure that such replacements don't generate code that causes an
521 /// alignment error (trap) on the target machine.
522 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
525 MachineMemOperand::Flags,
528 // It's fast anytime on VE
534 bool VETargetLowering::hasAndNot(SDValue Y) const {
535 EVT VT = Y.getValueType();
537 // VE doesn't have vector and not instruction.
541 // VE allows different immediate values for X and Y where ~X & Y.
542 // Only simm7 works for X, and only mimm works for Y on VE. However, this
543 // function is used to check whether an immediate value is OK for and-not
544 // instruction as both X and Y. Generating additional instruction to
545 // retrieve an immediate value is no good since the purpose of this
546 // function is to convert a series of 3 instructions to another series of
547 // 3 instructions with better parallelism. Therefore, we return false
548 // for all immediate values now.
549 // FIXME: Change hasAndNot function to have two operands to make it work
550 // correctly with Aurora VE.
551 if (isa<ConstantSDNode>(Y))
554 // It's ok for generic registers.
558 VETargetLowering::VETargetLowering(const TargetMachine &TM,
559 const VESubtarget &STI)
560 : TargetLowering(TM), Subtarget(&STI) {
561 // Instructions which use registers as conditionals examine all the
562 // bits (as does the pseudo SELECT_CC expansion). I don't think it
563 // matters much whether it's ZeroOrOneBooleanContent, or
564 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
566 setBooleanContents(ZeroOrOneBooleanContent);
567 setBooleanVectorContents(ZeroOrOneBooleanContent);
569 // Set up the register classes.
570 addRegisterClass(MVT::i32, &VE::I32RegClass);
571 addRegisterClass(MVT::i64, &VE::I64RegClass);
572 addRegisterClass(MVT::f32, &VE::F32RegClass);
573 addRegisterClass(MVT::f64, &VE::I64RegClass);
576 for (MVT FPVT : MVT::fp_valuetypes()) {
577 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
578 // Turn FP extload into load/fpextend
579 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
581 // Turn FP truncstore into trunc + store.
582 setTruncStoreAction(FPVT, OtherFPVT, Expand);
586 // VE doesn't have i1 sign extending load
587 for (MVT VT : MVT::integer_valuetypes()) {
588 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
589 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
590 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
591 setTruncStoreAction(VT, MVT::i1, Expand);
595 // Custom legalize address nodes into LO/HI parts.
596 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
597 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
598 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
599 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
602 setOperationAction(ISD::VASTART, MVT::Other, Custom);
603 // VAARG needs to be lowered to access with 8 bytes alignment.
604 setOperationAction(ISD::VAARG, MVT::Other, Custom);
605 // Use the default implementation.
606 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
607 setOperationAction(ISD::VAEND, MVT::Other, Expand);
611 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
612 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
616 for (MVT IntVT : {MVT::i32, MVT::i64}) {
617 // VE has no REM or DIVREM operations.
618 setOperationAction(ISD::UREM, IntVT, Expand);
619 setOperationAction(ISD::SREM, IntVT, Expand);
620 setOperationAction(ISD::SDIVREM, IntVT, Expand);
621 setOperationAction(ISD::UDIVREM, IntVT, Expand);
623 setOperationAction(ISD::CTTZ, IntVT, Expand);
624 setOperationAction(ISD::ROTL, IntVT, Expand);
625 setOperationAction(ISD::ROTR, IntVT, Expand);
627 // Use isel patterns for i32 and i64
628 setOperationAction(ISD::BSWAP, IntVT, Legal);
629 setOperationAction(ISD::CTLZ, IntVT, Legal);
630 setOperationAction(ISD::CTPOP, IntVT, Legal);
632 // Use isel patterns for i64, Promote i32
633 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
634 setOperationAction(ISD::BITREVERSE, IntVT, Act);
639 // VE doesn't have instructions for fp<->uint, so expand them by llvm
640 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
641 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
642 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
643 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
645 // fp16 not supported
646 for (MVT FPVT : MVT::fp_valuetypes()) {
647 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
648 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
652 setStackPointerRegisterToSaveRestore(VE::SX11);
654 // Set function alignment to 16 bytes
655 setMinFunctionAlignment(Align(16));
657 // VE stores all argument by 8 bytes alignment
658 setMinStackArgumentAlignment(Align(8));
660 computeRegisterProperties(Subtarget->getRegisterInfo());
663 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
664 #define TARGET_NODE_CASE(NAME) \
666 return "VEISD::" #NAME;
667 switch ((VEISD::NodeType)Opcode) {
668 case VEISD::FIRST_NUMBER:
672 TARGET_NODE_CASE(GETFUNPLT)
673 TARGET_NODE_CASE(GETSTACKTOP)
674 TARGET_NODE_CASE(GETTLSADDR)
675 TARGET_NODE_CASE(CALL)
676 TARGET_NODE_CASE(RET_FLAG)
677 TARGET_NODE_CASE(GLOBAL_BASE_REG)
679 #undef TARGET_NODE_CASE
683 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
688 // Convert to a target node and set target flags.
689 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
690 SelectionDAG &DAG) const {
691 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
692 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
693 GA->getValueType(0), GA->getOffset(), TF);
695 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
696 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
699 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
700 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
703 llvm_unreachable("Unhandled address SDNode");
706 // Split Op into high and low parts according to HiTF and LoTF.
707 // Return an ADD node combining the parts.
708 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
709 SelectionDAG &DAG) const {
711 EVT VT = Op.getValueType();
712 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
713 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
714 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
717 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
718 // or ExternalSymbol SDNode.
719 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
721 EVT PtrVT = Op.getValueType();
723 // Handle PIC mode first. VE needs a got load for every variable!
724 if (isPositionIndependent()) {
725 // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
726 // function has calls.
727 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
728 MFI.setHasCalls(true);
729 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
731 if (isa<ConstantPoolSDNode>(Op) ||
732 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
733 // Create following instructions for local linkage PIC code.
734 // lea %s35, %gotoff_lo(.LCPI0_0)
735 // and %s35, %s35, (32)0
736 // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
737 // adds.l %s35, %s15, %s35 ; %s15 is GOT
738 // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
739 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
740 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
741 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
742 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
744 // Create following instructions for not local linkage PIC code.
745 // lea %s35, %got_lo(.LCPI0_0)
746 // and %s35, %s35, (32)0
747 // lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
748 // adds.l %s35, %s15, %s35 ; %s15 is GOT
750 // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
751 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
752 VEMCExpr::VK_VE_GOT_LO32, DAG);
753 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
754 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
755 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
756 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
759 // This is one of the absolute code models.
760 switch (getTargetMachine().getCodeModel()) {
762 llvm_unreachable("Unsupported absolute code model");
763 case CodeModel::Small:
764 case CodeModel::Medium:
765 case CodeModel::Large:
767 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
773 SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
774 SelectionDAG &DAG) const {
775 return makeAddress(Op, DAG);
778 SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
779 SelectionDAG &DAG) const {
780 return makeAddress(Op, DAG);
784 VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
785 SelectionDAG &DAG) const {
788 // Generate the following code:
789 // t1: ch,glue = callseq_start t0, 0, 0
790 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
791 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
792 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
793 SDValue Label = withTargetFlags(Op, 0, DAG);
794 EVT PtrVT = Op.getValueType();
796 // Lowering the machine isd will make sure everything is in the right
798 SDValue Chain = DAG.getEntryNode();
799 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
800 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
801 DAG.getMachineFunction(), CallingConv::C);
802 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
803 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
804 Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
805 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
806 DAG.getIntPtrConstant(0, dl, true),
807 Chain.getValue(1), dl);
808 Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
810 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
811 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
812 MFI.setHasCalls(true);
814 // Also generate code to prepare a GOT register if it is PIC.
815 if (isPositionIndependent()) {
816 MachineFunction &MF = DAG.getMachineFunction();
817 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
823 SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
824 SelectionDAG &DAG) const {
825 // The current implementation of nld (2.26) doesn't allow local exec model
826 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
827 // generate the general dynamic model code sequence.
829 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
830 return LowerToTLSGeneralDynamicModel(Op, DAG);
833 SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
834 MachineFunction &MF = DAG.getMachineFunction();
835 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
836 auto PtrVT = getPointerTy(DAG.getDataLayout());
838 // Need frame address to find the address of VarArgsFrameIndex.
839 MF.getFrameInfo().setFrameAddressIsTaken(true);
841 // vastart just stores the address of the VarArgsFrameIndex slot into the
842 // memory location argument.
845 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
846 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
847 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
848 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
849 MachinePointerInfo(SV));
852 SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
853 SDNode *Node = Op.getNode();
854 EVT VT = Node->getValueType(0);
855 SDValue InChain = Node->getOperand(0);
856 SDValue VAListPtr = Node->getOperand(1);
857 EVT PtrVT = VAListPtr.getValueType();
858 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
861 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
862 SDValue Chain = VAList.getValue(1);
865 if (VT == MVT::f32) {
866 // float --> need special handling like below.
871 // Increment the pointer, VAList, by 8 to the next vaarg.
873 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
874 // Then, adjust VAList.
875 unsigned InternalOffset = 4;
876 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
877 DAG.getConstant(InternalOffset, DL, PtrVT));
879 // Increment the pointer, VAList, by 8 to the next vaarg.
881 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
884 // Store the incremented VAList to the legalized pointer.
885 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
887 // Load the actual argument out of the pointer VAList.
888 // We can't count on greater alignment than the word size.
889 return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
890 std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
893 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
894 SelectionDAG &DAG) const {
895 // Generate following code.
896 // (void)__llvm_grow_stack(size);
897 // ret = GETSTACKTOP; // pseudo instruction
901 SDNode *Node = Op.getNode();
902 SDValue Chain = Op.getOperand(0);
903 SDValue Size = Op.getOperand(1);
904 MaybeAlign Alignment(Op.getConstantOperandVal(2));
905 EVT VT = Node->getValueType(0);
907 // Chain the dynamic stack allocation so that it doesn't modify the stack
908 // pointer when other instructions are using the stack.
909 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
911 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
912 Align StackAlign = TFI.getStackAlign();
913 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
916 TargetLowering::ArgListTy Args;
917 TargetLowering::ArgListEntry Entry;
919 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
920 Args.push_back(Entry);
922 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
923 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
924 Args.push_back(Entry);
926 Type *RetTy = Type::getVoidTy(*DAG.getContext());
928 EVT PtrVT = Op.getValueType();
931 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
933 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
936 TargetLowering::CallLoweringInfo CLI(DAG);
939 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
940 .setDiscardResult(true);
941 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
943 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
945 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
946 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
947 Result = DAG.getNode(ISD::AND, DL, VT, Result,
948 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
950 // Chain = Result.getValue(1);
951 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
952 DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
954 SDValue Ops[2] = {Result, Chain};
955 return DAG.getMergeValues(Ops, DL);
958 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
959 switch (Op.getOpcode()) {
961 llvm_unreachable("Should not custom lower this!");
962 case ISD::BlockAddress:
963 return LowerBlockAddress(Op, DAG);
964 case ISD::DYNAMIC_STACKALLOC:
965 return lowerDYNAMIC_STACKALLOC(Op, DAG);
966 case ISD::GlobalAddress:
967 return LowerGlobalAddress(Op, DAG);
968 case ISD::GlobalTLSAddress:
969 return LowerGlobalTLSAddress(Op, DAG);
971 return LowerVASTART(Op, DAG);
973 return LowerVAARG(Op, DAG);