1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
23 enum NodeType : unsigned {
24 // Start the numbering from where ISD NodeType finishes.
25 FIRST_NUMBER = ISD::BUILTIN_OP_END,
38 PrintConvergentCallUni,
62 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
76 StoreParamS32, // to sext and store a <32bit value, not used currently
77 StoreParamU32, // to zext and store a <32bit value, not used currently
97 Tex1DArrayFloatFloatLevel,
98 Tex1DArrayFloatFloatGrad,
101 Tex1DArrayS32FloatLevel,
102 Tex1DArrayS32FloatGrad,
105 Tex1DArrayU32FloatLevel,
106 Tex1DArrayU32FloatGrad,
109 Tex2DFloatFloatLevel,
120 Tex2DArrayFloatFloat,
121 Tex2DArrayFloatFloatLevel,
122 Tex2DArrayFloatFloatGrad,
125 Tex2DArrayS32FloatLevel,
126 Tex2DArrayS32FloatGrad,
129 Tex2DArrayU32FloatLevel,
130 Tex2DArrayU32FloatGrad,
133 Tex3DFloatFloatLevel,
144 TexCubeFloatFloatLevel,
146 TexCubeS32FloatLevel,
148 TexCubeU32FloatLevel,
149 TexCubeArrayFloatFloat,
150 TexCubeArrayFloatFloatLevel,
151 TexCubeArrayS32Float,
152 TexCubeArrayS32FloatLevel,
153 TexCubeArrayU32Float,
154 TexCubeArrayU32FloatLevel,
167 TexUnified1DFloatS32,
168 TexUnified1DFloatFloat,
169 TexUnified1DFloatFloatLevel,
170 TexUnified1DFloatFloatGrad,
172 TexUnified1DS32Float,
173 TexUnified1DS32FloatLevel,
174 TexUnified1DS32FloatGrad,
176 TexUnified1DU32Float,
177 TexUnified1DU32FloatLevel,
178 TexUnified1DU32FloatGrad,
179 TexUnified1DArrayFloatS32,
180 TexUnified1DArrayFloatFloat,
181 TexUnified1DArrayFloatFloatLevel,
182 TexUnified1DArrayFloatFloatGrad,
183 TexUnified1DArrayS32S32,
184 TexUnified1DArrayS32Float,
185 TexUnified1DArrayS32FloatLevel,
186 TexUnified1DArrayS32FloatGrad,
187 TexUnified1DArrayU32S32,
188 TexUnified1DArrayU32Float,
189 TexUnified1DArrayU32FloatLevel,
190 TexUnified1DArrayU32FloatGrad,
191 TexUnified2DFloatS32,
192 TexUnified2DFloatFloat,
193 TexUnified2DFloatFloatLevel,
194 TexUnified2DFloatFloatGrad,
196 TexUnified2DS32Float,
197 TexUnified2DS32FloatLevel,
198 TexUnified2DS32FloatGrad,
200 TexUnified2DU32Float,
201 TexUnified2DU32FloatLevel,
202 TexUnified2DU32FloatGrad,
203 TexUnified2DArrayFloatS32,
204 TexUnified2DArrayFloatFloat,
205 TexUnified2DArrayFloatFloatLevel,
206 TexUnified2DArrayFloatFloatGrad,
207 TexUnified2DArrayS32S32,
208 TexUnified2DArrayS32Float,
209 TexUnified2DArrayS32FloatLevel,
210 TexUnified2DArrayS32FloatGrad,
211 TexUnified2DArrayU32S32,
212 TexUnified2DArrayU32Float,
213 TexUnified2DArrayU32FloatLevel,
214 TexUnified2DArrayU32FloatGrad,
215 TexUnified3DFloatS32,
216 TexUnified3DFloatFloat,
217 TexUnified3DFloatFloatLevel,
218 TexUnified3DFloatFloatGrad,
220 TexUnified3DS32Float,
221 TexUnified3DS32FloatLevel,
222 TexUnified3DS32FloatGrad,
224 TexUnified3DU32Float,
225 TexUnified3DU32FloatLevel,
226 TexUnified3DU32FloatGrad,
227 TexUnifiedCubeFloatFloat,
228 TexUnifiedCubeFloatFloatLevel,
229 TexUnifiedCubeS32Float,
230 TexUnifiedCubeS32FloatLevel,
231 TexUnifiedCubeU32Float,
232 TexUnifiedCubeU32FloatLevel,
233 TexUnifiedCubeArrayFloatFloat,
234 TexUnifiedCubeArrayFloatFloatLevel,
235 TexUnifiedCubeArrayS32Float,
236 TexUnifiedCubeArrayS32FloatLevel,
237 TexUnifiedCubeArrayU32Float,
238 TexUnifiedCubeArrayU32FloatLevel,
239 Tld4UnifiedR2DFloatFloat,
240 Tld4UnifiedG2DFloatFloat,
241 Tld4UnifiedB2DFloatFloat,
242 Tld4UnifiedA2DFloatFloat,
243 Tld4UnifiedR2DS64Float,
244 Tld4UnifiedG2DS64Float,
245 Tld4UnifiedB2DS64Float,
246 Tld4UnifiedA2DS64Float,
247 Tld4UnifiedR2DU64Float,
248 Tld4UnifiedG2DU64Float,
249 Tld4UnifiedB2DU64Float,
250 Tld4UnifiedA2DU64Float,
252 // Surface intrinsics
269 Suld1DArrayV2I8Clamp,
270 Suld1DArrayV2I16Clamp,
271 Suld1DArrayV2I32Clamp,
272 Suld1DArrayV2I64Clamp,
273 Suld1DArrayV4I8Clamp,
274 Suld1DArrayV4I16Clamp,
275 Suld1DArrayV4I32Clamp,
293 Suld2DArrayV2I8Clamp,
294 Suld2DArrayV2I16Clamp,
295 Suld2DArrayV2I32Clamp,
296 Suld2DArrayV2I64Clamp,
297 Suld2DArrayV4I8Clamp,
298 Suld2DArrayV4I16Clamp,
299 Suld2DArrayV4I32Clamp,
330 Suld1DArrayV2I16Trap,
331 Suld1DArrayV2I32Trap,
332 Suld1DArrayV2I64Trap,
334 Suld1DArrayV4I16Trap,
335 Suld1DArrayV4I32Trap,
354 Suld2DArrayV2I16Trap,
355 Suld2DArrayV2I32Trap,
356 Suld2DArrayV2I64Trap,
358 Suld2DArrayV4I16Trap,
359 Suld2DArrayV4I32Trap,
390 Suld1DArrayV2I16Zero,
391 Suld1DArrayV2I32Zero,
392 Suld1DArrayV2I64Zero,
394 Suld1DArrayV4I16Zero,
395 Suld1DArrayV4I32Zero,
414 Suld2DArrayV2I16Zero,
415 Suld2DArrayV2I32Zero,
416 Suld2DArrayV2I64Zero,
418 Suld2DArrayV4I16Zero,
419 Suld2DArrayV4I32Zero,
435 class NVPTXSubtarget;
437 //===--------------------------------------------------------------------===//
438 // TargetLowering Implementation
439 //===--------------------------------------------------------------------===//
440 class NVPTXTargetLowering : public TargetLowering {
442 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
443 const NVPTXSubtarget &STI);
444 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
446 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
448 const char *getTargetNodeName(unsigned Opcode) const override;
450 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
452 unsigned Intrinsic) const override;
454 /// isLegalAddressingMode - Return true if the addressing mode represented
455 /// by AM is legal for this target, for a load/store of the specified type
456 /// Used to guide target specific optimizations, like loop strength
457 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458 /// address mode (CodeGenPrepare.cpp)
459 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
461 Instruction *I = nullptr) const override;
463 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
464 // Truncating 64-bit to 32-bit is free in SASS.
465 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
467 return SrcTy->getPrimitiveSizeInBits() == 64 &&
468 DstTy->getPrimitiveSizeInBits() == 32;
471 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
472 EVT VT) const override {
474 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
478 ConstraintType getConstraintType(StringRef Constraint) const override;
479 std::pair<unsigned, const TargetRegisterClass *>
480 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
481 StringRef Constraint, MVT VT) const override;
483 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
485 const SmallVectorImpl<ISD::InputArg> &Ins,
486 const SDLoc &dl, SelectionDAG &DAG,
487 SmallVectorImpl<SDValue> &InVals) const override;
489 SDValue LowerCall(CallLoweringInfo &CLI,
490 SmallVectorImpl<SDValue> &InVals) const override;
492 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
493 const SmallVectorImpl<ISD::OutputArg> &,
494 unsigned retAlignment,
495 ImmutableCallSite CS) const;
497 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
498 const SmallVectorImpl<ISD::OutputArg> &Outs,
499 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
500 SelectionDAG &DAG) const override;
502 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
503 std::vector<SDValue> &Ops,
504 SelectionDAG &DAG) const override;
506 const NVPTXTargetMachine *nvTM;
508 // PTX always uses 32-bit shift amounts
509 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
513 TargetLoweringBase::LegalizeTypeAction
514 getPreferredVectorAction(MVT VT) const override;
516 // Get the degree of precision we want from 32-bit floating point division
519 // 0 - Use ptx div.approx
520 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
521 // 2 - Use IEEE-compliant div instructions, if available.
522 int getDivF32Level() const;
524 // Get whether we should use a precise or approximate 32-bit floating point
526 bool usePrecSqrtF32() const;
528 // Get whether we should use instructions that flush floating-point denormals
529 // to sign-preserving zero.
530 bool useF32FTZ(const MachineFunction &MF) const;
532 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
533 int &ExtraSteps, bool &UseOneConst,
534 bool Reciprocal) const override;
536 unsigned combineRepeatedFPDivisors() const override { return 2; }
538 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
539 bool allowUnsafeFPMath(MachineFunction &MF) const;
541 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
543 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
545 // The default is to transform llvm.ctlz(x, false) (where false indicates that
546 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
547 // and avoids calling ctlz in that case. We have a dedicated ctlz
548 // instruction, so we say that ctlz is cheap to speculate.
549 bool isCheapToSpeculateCtlz() const override { return true; }
552 const NVPTXSubtarget &STI; // cache the subtarget here
553 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
555 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
556 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
557 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
559 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
560 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
561 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
563 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
564 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
566 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
567 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
568 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
570 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
571 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
573 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
575 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
576 SelectionDAG &DAG) const override;
577 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
579 unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
580 unsigned Idx, const DataLayout &DL) const;