1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
24 enum NodeType : unsigned {
25 // Start the numbering from where ISD NodeType finishes.
26 FIRST_NUMBER = ISD::BUILTIN_OP_END,
39 PrintConvergentCallUni,
63 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
77 StoreParamS32, // to sext and store a <32bit value, not used currently
78 StoreParamU32, // to zext and store a <32bit value, not used currently
98 Tex1DArrayFloatFloatLevel,
99 Tex1DArrayFloatFloatGrad,
102 Tex1DArrayS32FloatLevel,
103 Tex1DArrayS32FloatGrad,
106 Tex1DArrayU32FloatLevel,
107 Tex1DArrayU32FloatGrad,
110 Tex2DFloatFloatLevel,
121 Tex2DArrayFloatFloat,
122 Tex2DArrayFloatFloatLevel,
123 Tex2DArrayFloatFloatGrad,
126 Tex2DArrayS32FloatLevel,
127 Tex2DArrayS32FloatGrad,
130 Tex2DArrayU32FloatLevel,
131 Tex2DArrayU32FloatGrad,
134 Tex3DFloatFloatLevel,
145 TexCubeFloatFloatLevel,
147 TexCubeS32FloatLevel,
149 TexCubeU32FloatLevel,
150 TexCubeArrayFloatFloat,
151 TexCubeArrayFloatFloatLevel,
152 TexCubeArrayS32Float,
153 TexCubeArrayS32FloatLevel,
154 TexCubeArrayU32Float,
155 TexCubeArrayU32FloatLevel,
168 TexUnified1DFloatS32,
169 TexUnified1DFloatFloat,
170 TexUnified1DFloatFloatLevel,
171 TexUnified1DFloatFloatGrad,
173 TexUnified1DS32Float,
174 TexUnified1DS32FloatLevel,
175 TexUnified1DS32FloatGrad,
177 TexUnified1DU32Float,
178 TexUnified1DU32FloatLevel,
179 TexUnified1DU32FloatGrad,
180 TexUnified1DArrayFloatS32,
181 TexUnified1DArrayFloatFloat,
182 TexUnified1DArrayFloatFloatLevel,
183 TexUnified1DArrayFloatFloatGrad,
184 TexUnified1DArrayS32S32,
185 TexUnified1DArrayS32Float,
186 TexUnified1DArrayS32FloatLevel,
187 TexUnified1DArrayS32FloatGrad,
188 TexUnified1DArrayU32S32,
189 TexUnified1DArrayU32Float,
190 TexUnified1DArrayU32FloatLevel,
191 TexUnified1DArrayU32FloatGrad,
192 TexUnified2DFloatS32,
193 TexUnified2DFloatFloat,
194 TexUnified2DFloatFloatLevel,
195 TexUnified2DFloatFloatGrad,
197 TexUnified2DS32Float,
198 TexUnified2DS32FloatLevel,
199 TexUnified2DS32FloatGrad,
201 TexUnified2DU32Float,
202 TexUnified2DU32FloatLevel,
203 TexUnified2DU32FloatGrad,
204 TexUnified2DArrayFloatS32,
205 TexUnified2DArrayFloatFloat,
206 TexUnified2DArrayFloatFloatLevel,
207 TexUnified2DArrayFloatFloatGrad,
208 TexUnified2DArrayS32S32,
209 TexUnified2DArrayS32Float,
210 TexUnified2DArrayS32FloatLevel,
211 TexUnified2DArrayS32FloatGrad,
212 TexUnified2DArrayU32S32,
213 TexUnified2DArrayU32Float,
214 TexUnified2DArrayU32FloatLevel,
215 TexUnified2DArrayU32FloatGrad,
216 TexUnified3DFloatS32,
217 TexUnified3DFloatFloat,
218 TexUnified3DFloatFloatLevel,
219 TexUnified3DFloatFloatGrad,
221 TexUnified3DS32Float,
222 TexUnified3DS32FloatLevel,
223 TexUnified3DS32FloatGrad,
225 TexUnified3DU32Float,
226 TexUnified3DU32FloatLevel,
227 TexUnified3DU32FloatGrad,
228 TexUnifiedCubeFloatFloat,
229 TexUnifiedCubeFloatFloatLevel,
230 TexUnifiedCubeS32Float,
231 TexUnifiedCubeS32FloatLevel,
232 TexUnifiedCubeU32Float,
233 TexUnifiedCubeU32FloatLevel,
234 TexUnifiedCubeArrayFloatFloat,
235 TexUnifiedCubeArrayFloatFloatLevel,
236 TexUnifiedCubeArrayS32Float,
237 TexUnifiedCubeArrayS32FloatLevel,
238 TexUnifiedCubeArrayU32Float,
239 TexUnifiedCubeArrayU32FloatLevel,
240 Tld4UnifiedR2DFloatFloat,
241 Tld4UnifiedG2DFloatFloat,
242 Tld4UnifiedB2DFloatFloat,
243 Tld4UnifiedA2DFloatFloat,
244 Tld4UnifiedR2DS64Float,
245 Tld4UnifiedG2DS64Float,
246 Tld4UnifiedB2DS64Float,
247 Tld4UnifiedA2DS64Float,
248 Tld4UnifiedR2DU64Float,
249 Tld4UnifiedG2DU64Float,
250 Tld4UnifiedB2DU64Float,
251 Tld4UnifiedA2DU64Float,
253 // Surface intrinsics
270 Suld1DArrayV2I8Clamp,
271 Suld1DArrayV2I16Clamp,
272 Suld1DArrayV2I32Clamp,
273 Suld1DArrayV2I64Clamp,
274 Suld1DArrayV4I8Clamp,
275 Suld1DArrayV4I16Clamp,
276 Suld1DArrayV4I32Clamp,
294 Suld2DArrayV2I8Clamp,
295 Suld2DArrayV2I16Clamp,
296 Suld2DArrayV2I32Clamp,
297 Suld2DArrayV2I64Clamp,
298 Suld2DArrayV4I8Clamp,
299 Suld2DArrayV4I16Clamp,
300 Suld2DArrayV4I32Clamp,
331 Suld1DArrayV2I16Trap,
332 Suld1DArrayV2I32Trap,
333 Suld1DArrayV2I64Trap,
335 Suld1DArrayV4I16Trap,
336 Suld1DArrayV4I32Trap,
355 Suld2DArrayV2I16Trap,
356 Suld2DArrayV2I32Trap,
357 Suld2DArrayV2I64Trap,
359 Suld2DArrayV4I16Trap,
360 Suld2DArrayV4I32Trap,
391 Suld1DArrayV2I16Zero,
392 Suld1DArrayV2I32Zero,
393 Suld1DArrayV2I64Zero,
395 Suld1DArrayV4I16Zero,
396 Suld1DArrayV4I32Zero,
415 Suld2DArrayV2I16Zero,
416 Suld2DArrayV2I32Zero,
417 Suld2DArrayV2I64Zero,
419 Suld2DArrayV4I16Zero,
420 Suld2DArrayV4I32Zero,
436 class NVPTXSubtarget;
438 //===--------------------------------------------------------------------===//
439 // TargetLowering Implementation
440 //===--------------------------------------------------------------------===//
441 class NVPTXTargetLowering : public TargetLowering {
443 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
444 const NVPTXSubtarget &STI);
445 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
447 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
449 const char *getTargetNodeName(unsigned Opcode) const override;
451 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
453 unsigned Intrinsic) const override;
455 /// isLegalAddressingMode - Return true if the addressing mode represented
456 /// by AM is legal for this target, for a load/store of the specified type
457 /// Used to guide target specific optimizations, like loop strength
458 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
459 /// address mode (CodeGenPrepare.cpp)
460 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
462 Instruction *I = nullptr) const override;
464 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
465 // Truncating 64-bit to 32-bit is free in SASS.
466 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
468 return SrcTy->getPrimitiveSizeInBits() == 64 &&
469 DstTy->getPrimitiveSizeInBits() == 32;
472 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
473 EVT VT) const override {
475 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
479 ConstraintType getConstraintType(StringRef Constraint) const override;
480 std::pair<unsigned, const TargetRegisterClass *>
481 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
482 StringRef Constraint, MVT VT) const override;
484 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
486 const SmallVectorImpl<ISD::InputArg> &Ins,
487 const SDLoc &dl, SelectionDAG &DAG,
488 SmallVectorImpl<SDValue> &InVals) const override;
490 SDValue LowerCall(CallLoweringInfo &CLI,
491 SmallVectorImpl<SDValue> &InVals) const override;
493 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
494 const SmallVectorImpl<ISD::OutputArg> &,
495 unsigned retAlignment,
496 ImmutableCallSite CS) const;
498 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
499 const SmallVectorImpl<ISD::OutputArg> &Outs,
500 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
501 SelectionDAG &DAG) const override;
503 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
504 std::vector<SDValue> &Ops,
505 SelectionDAG &DAG) const override;
507 const NVPTXTargetMachine *nvTM;
509 // PTX always uses 32-bit shift amounts
510 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
514 TargetLoweringBase::LegalizeTypeAction
515 getPreferredVectorAction(MVT VT) const override;
517 // Get the degree of precision we want from 32-bit floating point division
520 // 0 - Use ptx div.approx
521 // 1 - Use ptx.div.full (approximate, but less so than div.approx)
522 // 2 - Use IEEE-compliant div instructions, if available.
523 int getDivF32Level() const;
525 // Get whether we should use a precise or approximate 32-bit floating point
527 bool usePrecSqrtF32() const;
529 // Get whether we should use instructions that flush floating-point denormals
530 // to sign-preserving zero.
531 bool useF32FTZ(const MachineFunction &MF) const;
533 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
534 int &ExtraSteps, bool &UseOneConst,
535 bool Reciprocal) const override;
537 unsigned combineRepeatedFPDivisors() const override { return 2; }
539 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
540 bool allowUnsafeFPMath(MachineFunction &MF) const;
542 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
544 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
546 // The default is to transform llvm.ctlz(x, false) (where false indicates that
547 // x == 0 is not undefined behavior) into a branch that checks whether x is 0
548 // and avoids calling ctlz in that case. We have a dedicated ctlz
549 // instruction, so we say that ctlz is cheap to speculate.
550 bool isCheapToSpeculateCtlz() const override { return true; }
553 const NVPTXSubtarget &STI; // cache the subtarget here
554 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
556 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
557 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
558 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
560 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
561 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
563 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
564 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
565 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
567 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
568 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
570 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
572 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
573 SelectionDAG &DAG) const override;
574 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
576 unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
577 unsigned Idx, const DataLayout &DL) const;