1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/Target/TargetLowering.h"
24 enum NodeType : unsigned {
25 // Start the numbering from where ISD NodeType finishes.
26 FIRST_NUMBER = ISD::BUILTIN_OP_END,
39 PrintConvergentCallUni,
61 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
75 StoreParamS32, // to sext and store a <32bit value, not used currently
76 StoreParamU32, // to zext and store a <32bit value, not used currently
96 Tex1DArrayFloatFloatLevel,
97 Tex1DArrayFloatFloatGrad,
100 Tex1DArrayS32FloatLevel,
101 Tex1DArrayS32FloatGrad,
104 Tex1DArrayU32FloatLevel,
105 Tex1DArrayU32FloatGrad,
108 Tex2DFloatFloatLevel,
119 Tex2DArrayFloatFloat,
120 Tex2DArrayFloatFloatLevel,
121 Tex2DArrayFloatFloatGrad,
124 Tex2DArrayS32FloatLevel,
125 Tex2DArrayS32FloatGrad,
128 Tex2DArrayU32FloatLevel,
129 Tex2DArrayU32FloatGrad,
132 Tex3DFloatFloatLevel,
143 TexCubeFloatFloatLevel,
145 TexCubeS32FloatLevel,
147 TexCubeU32FloatLevel,
148 TexCubeArrayFloatFloat,
149 TexCubeArrayFloatFloatLevel,
150 TexCubeArrayS32Float,
151 TexCubeArrayS32FloatLevel,
152 TexCubeArrayU32Float,
153 TexCubeArrayU32FloatLevel,
166 TexUnified1DFloatS32,
167 TexUnified1DFloatFloat,
168 TexUnified1DFloatFloatLevel,
169 TexUnified1DFloatFloatGrad,
171 TexUnified1DS32Float,
172 TexUnified1DS32FloatLevel,
173 TexUnified1DS32FloatGrad,
175 TexUnified1DU32Float,
176 TexUnified1DU32FloatLevel,
177 TexUnified1DU32FloatGrad,
178 TexUnified1DArrayFloatS32,
179 TexUnified1DArrayFloatFloat,
180 TexUnified1DArrayFloatFloatLevel,
181 TexUnified1DArrayFloatFloatGrad,
182 TexUnified1DArrayS32S32,
183 TexUnified1DArrayS32Float,
184 TexUnified1DArrayS32FloatLevel,
185 TexUnified1DArrayS32FloatGrad,
186 TexUnified1DArrayU32S32,
187 TexUnified1DArrayU32Float,
188 TexUnified1DArrayU32FloatLevel,
189 TexUnified1DArrayU32FloatGrad,
190 TexUnified2DFloatS32,
191 TexUnified2DFloatFloat,
192 TexUnified2DFloatFloatLevel,
193 TexUnified2DFloatFloatGrad,
195 TexUnified2DS32Float,
196 TexUnified2DS32FloatLevel,
197 TexUnified2DS32FloatGrad,
199 TexUnified2DU32Float,
200 TexUnified2DU32FloatLevel,
201 TexUnified2DU32FloatGrad,
202 TexUnified2DArrayFloatS32,
203 TexUnified2DArrayFloatFloat,
204 TexUnified2DArrayFloatFloatLevel,
205 TexUnified2DArrayFloatFloatGrad,
206 TexUnified2DArrayS32S32,
207 TexUnified2DArrayS32Float,
208 TexUnified2DArrayS32FloatLevel,
209 TexUnified2DArrayS32FloatGrad,
210 TexUnified2DArrayU32S32,
211 TexUnified2DArrayU32Float,
212 TexUnified2DArrayU32FloatLevel,
213 TexUnified2DArrayU32FloatGrad,
214 TexUnified3DFloatS32,
215 TexUnified3DFloatFloat,
216 TexUnified3DFloatFloatLevel,
217 TexUnified3DFloatFloatGrad,
219 TexUnified3DS32Float,
220 TexUnified3DS32FloatLevel,
221 TexUnified3DS32FloatGrad,
223 TexUnified3DU32Float,
224 TexUnified3DU32FloatLevel,
225 TexUnified3DU32FloatGrad,
226 TexUnifiedCubeFloatFloat,
227 TexUnifiedCubeFloatFloatLevel,
228 TexUnifiedCubeS32Float,
229 TexUnifiedCubeS32FloatLevel,
230 TexUnifiedCubeU32Float,
231 TexUnifiedCubeU32FloatLevel,
232 TexUnifiedCubeArrayFloatFloat,
233 TexUnifiedCubeArrayFloatFloatLevel,
234 TexUnifiedCubeArrayS32Float,
235 TexUnifiedCubeArrayS32FloatLevel,
236 TexUnifiedCubeArrayU32Float,
237 TexUnifiedCubeArrayU32FloatLevel,
238 Tld4UnifiedR2DFloatFloat,
239 Tld4UnifiedG2DFloatFloat,
240 Tld4UnifiedB2DFloatFloat,
241 Tld4UnifiedA2DFloatFloat,
242 Tld4UnifiedR2DS64Float,
243 Tld4UnifiedG2DS64Float,
244 Tld4UnifiedB2DS64Float,
245 Tld4UnifiedA2DS64Float,
246 Tld4UnifiedR2DU64Float,
247 Tld4UnifiedG2DU64Float,
248 Tld4UnifiedB2DU64Float,
249 Tld4UnifiedA2DU64Float,
251 // Surface intrinsics
268 Suld1DArrayV2I8Clamp,
269 Suld1DArrayV2I16Clamp,
270 Suld1DArrayV2I32Clamp,
271 Suld1DArrayV2I64Clamp,
272 Suld1DArrayV4I8Clamp,
273 Suld1DArrayV4I16Clamp,
274 Suld1DArrayV4I32Clamp,
292 Suld2DArrayV2I8Clamp,
293 Suld2DArrayV2I16Clamp,
294 Suld2DArrayV2I32Clamp,
295 Suld2DArrayV2I64Clamp,
296 Suld2DArrayV4I8Clamp,
297 Suld2DArrayV4I16Clamp,
298 Suld2DArrayV4I32Clamp,
329 Suld1DArrayV2I16Trap,
330 Suld1DArrayV2I32Trap,
331 Suld1DArrayV2I64Trap,
333 Suld1DArrayV4I16Trap,
334 Suld1DArrayV4I32Trap,
353 Suld2DArrayV2I16Trap,
354 Suld2DArrayV2I32Trap,
355 Suld2DArrayV2I64Trap,
357 Suld2DArrayV4I16Trap,
358 Suld2DArrayV4I32Trap,
389 Suld1DArrayV2I16Zero,
390 Suld1DArrayV2I32Zero,
391 Suld1DArrayV2I64Zero,
393 Suld1DArrayV4I16Zero,
394 Suld1DArrayV4I32Zero,
413 Suld2DArrayV2I16Zero,
414 Suld2DArrayV2I32Zero,
415 Suld2DArrayV2I64Zero,
417 Suld2DArrayV4I16Zero,
418 Suld2DArrayV4I32Zero,
434 class NVPTXSubtarget;
436 //===--------------------------------------------------------------------===//
437 // TargetLowering Implementation
438 //===--------------------------------------------------------------------===//
439 class NVPTXTargetLowering : public TargetLowering {
441 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
442 const NVPTXSubtarget &STI);
443 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
445 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
447 const char *getTargetNodeName(unsigned Opcode) const override;
449 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
450 unsigned Intrinsic) const override;
452 /// isLegalAddressingMode - Return true if the addressing mode represented
453 /// by AM is legal for this target, for a load/store of the specified type
454 /// Used to guide target specific optimizations, like loop strength
455 /// reduction (LoopStrengthReduce.cpp) and memory optimization for
456 /// address mode (CodeGenPrepare.cpp)
457 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
458 unsigned AS) const override;
460 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
461 // Truncating 64-bit to 32-bit is free in SASS.
462 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
464 return SrcTy->getPrimitiveSizeInBits() == 64 &&
465 DstTy->getPrimitiveSizeInBits() == 32;
468 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
469 EVT VT) const override {
471 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
475 ConstraintType getConstraintType(StringRef Constraint) const override;
476 std::pair<unsigned, const TargetRegisterClass *>
477 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
478 StringRef Constraint, MVT VT) const override;
480 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
482 const SmallVectorImpl<ISD::InputArg> &Ins,
483 const SDLoc &dl, SelectionDAG &DAG,
484 SmallVectorImpl<SDValue> &InVals) const override;
486 SDValue LowerCall(CallLoweringInfo &CLI,
487 SmallVectorImpl<SDValue> &InVals) const override;
489 std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
490 const SmallVectorImpl<ISD::OutputArg> &,
491 unsigned retAlignment,
492 const ImmutableCallSite *CS) const;
494 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
495 const SmallVectorImpl<ISD::OutputArg> &Outs,
496 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
497 SelectionDAG &DAG) const override;
499 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
500 std::vector<SDValue> &Ops,
501 SelectionDAG &DAG) const override;
503 const NVPTXTargetMachine *nvTM;
505 // PTX always uses 32-bit shift amounts
506 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
510 TargetLoweringBase::LegalizeTypeAction
511 getPreferredVectorAction(EVT VT) const override;
513 bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
515 bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
517 bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
520 const NVPTXSubtarget &STI; // cache the subtarget here
521 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
523 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
525 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
526 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
528 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
529 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
530 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
532 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
533 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
535 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
537 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
538 SelectionDAG &DAG) const override;
539 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
541 unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
542 Type *Ty, unsigned Idx) const;