1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetIntrinsicInfo.h"
27 #define DEBUG_TYPE "nvptx-isel"
29 static cl::opt<int> UsePrecDivF32(
30 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32 " IEEE Compliant F32 div.rnd if available."),
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49 llvm::CodeGenOpt::Level OptLevel) {
50 return new NVPTXDAGToDAGISel(TM, OptLevel);
53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54 CodeGenOpt::Level OptLevel)
55 : SelectionDAGISel(tm, OptLevel), TM(tm) {
56 doMulWide = (OptLevel > 0);
59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61 return SelectionDAGISel::runOnMachineFunction(MF);
64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65 if (UsePrecDivF32.getNumOccurrences() > 0) {
66 // If nvptx-prec-div32=N is used on the command-line, always honor it
69 // Otherwise, use div.approx if fast math is enabled
70 if (TM.Options.UnsafeFPMath)
77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80 return UsePrecSqrtF32;
82 // Otherwise, use sqrt.approx if fast math is enabled
83 return !TM.Options.UnsafeFPMath;
87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88 if (FtzEnabled.getNumOccurrences() > 0) {
89 // If nvptx-f32ftz is used on the command-line, always honor it
92 const Function *F = MF->getFunction();
93 // Otherwise, check for an nvptx-f32ftz attribute on the function
94 if (F->hasFnAttribute("nvptx-f32ftz"))
95 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
101 bool NVPTXDAGToDAGISel::allowFMA() const {
102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103 return TL->allowFMA(*MF, OptLevel);
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
108 void NVPTXDAGToDAGISel::Select(SDNode *N) {
110 if (N->isMachineOpcode()) {
112 return; // Already selected.
115 switch (N->getOpcode()) {
124 case NVPTXISD::LoadV2:
125 case NVPTXISD::LoadV4:
126 if (tryLoadVector(N))
129 case NVPTXISD::LDGV2:
130 case NVPTXISD::LDGV4:
131 case NVPTXISD::LDUV2:
132 case NVPTXISD::LDUV4:
136 case NVPTXISD::StoreV2:
137 case NVPTXISD::StoreV4:
138 if (tryStoreVector(N))
141 case NVPTXISD::LoadParam:
142 case NVPTXISD::LoadParamV2:
143 case NVPTXISD::LoadParamV4:
147 case NVPTXISD::StoreRetval:
148 case NVPTXISD::StoreRetvalV2:
149 case NVPTXISD::StoreRetvalV4:
150 if (tryStoreRetval(N))
153 case NVPTXISD::StoreParam:
154 case NVPTXISD::StoreParamV2:
155 case NVPTXISD::StoreParamV4:
156 case NVPTXISD::StoreParamS32:
157 case NVPTXISD::StoreParamU32:
158 if (tryStoreParam(N))
161 case ISD::INTRINSIC_WO_CHAIN:
162 if (tryIntrinsicNoChain(N))
165 case ISD::INTRINSIC_W_CHAIN:
166 if (tryIntrinsicChain(N))
169 case NVPTXISD::Tex1DFloatS32:
170 case NVPTXISD::Tex1DFloatFloat:
171 case NVPTXISD::Tex1DFloatFloatLevel:
172 case NVPTXISD::Tex1DFloatFloatGrad:
173 case NVPTXISD::Tex1DS32S32:
174 case NVPTXISD::Tex1DS32Float:
175 case NVPTXISD::Tex1DS32FloatLevel:
176 case NVPTXISD::Tex1DS32FloatGrad:
177 case NVPTXISD::Tex1DU32S32:
178 case NVPTXISD::Tex1DU32Float:
179 case NVPTXISD::Tex1DU32FloatLevel:
180 case NVPTXISD::Tex1DU32FloatGrad:
181 case NVPTXISD::Tex1DArrayFloatS32:
182 case NVPTXISD::Tex1DArrayFloatFloat:
183 case NVPTXISD::Tex1DArrayFloatFloatLevel:
184 case NVPTXISD::Tex1DArrayFloatFloatGrad:
185 case NVPTXISD::Tex1DArrayS32S32:
186 case NVPTXISD::Tex1DArrayS32Float:
187 case NVPTXISD::Tex1DArrayS32FloatLevel:
188 case NVPTXISD::Tex1DArrayS32FloatGrad:
189 case NVPTXISD::Tex1DArrayU32S32:
190 case NVPTXISD::Tex1DArrayU32Float:
191 case NVPTXISD::Tex1DArrayU32FloatLevel:
192 case NVPTXISD::Tex1DArrayU32FloatGrad:
193 case NVPTXISD::Tex2DFloatS32:
194 case NVPTXISD::Tex2DFloatFloat:
195 case NVPTXISD::Tex2DFloatFloatLevel:
196 case NVPTXISD::Tex2DFloatFloatGrad:
197 case NVPTXISD::Tex2DS32S32:
198 case NVPTXISD::Tex2DS32Float:
199 case NVPTXISD::Tex2DS32FloatLevel:
200 case NVPTXISD::Tex2DS32FloatGrad:
201 case NVPTXISD::Tex2DU32S32:
202 case NVPTXISD::Tex2DU32Float:
203 case NVPTXISD::Tex2DU32FloatLevel:
204 case NVPTXISD::Tex2DU32FloatGrad:
205 case NVPTXISD::Tex2DArrayFloatS32:
206 case NVPTXISD::Tex2DArrayFloatFloat:
207 case NVPTXISD::Tex2DArrayFloatFloatLevel:
208 case NVPTXISD::Tex2DArrayFloatFloatGrad:
209 case NVPTXISD::Tex2DArrayS32S32:
210 case NVPTXISD::Tex2DArrayS32Float:
211 case NVPTXISD::Tex2DArrayS32FloatLevel:
212 case NVPTXISD::Tex2DArrayS32FloatGrad:
213 case NVPTXISD::Tex2DArrayU32S32:
214 case NVPTXISD::Tex2DArrayU32Float:
215 case NVPTXISD::Tex2DArrayU32FloatLevel:
216 case NVPTXISD::Tex2DArrayU32FloatGrad:
217 case NVPTXISD::Tex3DFloatS32:
218 case NVPTXISD::Tex3DFloatFloat:
219 case NVPTXISD::Tex3DFloatFloatLevel:
220 case NVPTXISD::Tex3DFloatFloatGrad:
221 case NVPTXISD::Tex3DS32S32:
222 case NVPTXISD::Tex3DS32Float:
223 case NVPTXISD::Tex3DS32FloatLevel:
224 case NVPTXISD::Tex3DS32FloatGrad:
225 case NVPTXISD::Tex3DU32S32:
226 case NVPTXISD::Tex3DU32Float:
227 case NVPTXISD::Tex3DU32FloatLevel:
228 case NVPTXISD::Tex3DU32FloatGrad:
229 case NVPTXISD::TexCubeFloatFloat:
230 case NVPTXISD::TexCubeFloatFloatLevel:
231 case NVPTXISD::TexCubeS32Float:
232 case NVPTXISD::TexCubeS32FloatLevel:
233 case NVPTXISD::TexCubeU32Float:
234 case NVPTXISD::TexCubeU32FloatLevel:
235 case NVPTXISD::TexCubeArrayFloatFloat:
236 case NVPTXISD::TexCubeArrayFloatFloatLevel:
237 case NVPTXISD::TexCubeArrayS32Float:
238 case NVPTXISD::TexCubeArrayS32FloatLevel:
239 case NVPTXISD::TexCubeArrayU32Float:
240 case NVPTXISD::TexCubeArrayU32FloatLevel:
241 case NVPTXISD::Tld4R2DFloatFloat:
242 case NVPTXISD::Tld4G2DFloatFloat:
243 case NVPTXISD::Tld4B2DFloatFloat:
244 case NVPTXISD::Tld4A2DFloatFloat:
245 case NVPTXISD::Tld4R2DS64Float:
246 case NVPTXISD::Tld4G2DS64Float:
247 case NVPTXISD::Tld4B2DS64Float:
248 case NVPTXISD::Tld4A2DS64Float:
249 case NVPTXISD::Tld4R2DU64Float:
250 case NVPTXISD::Tld4G2DU64Float:
251 case NVPTXISD::Tld4B2DU64Float:
252 case NVPTXISD::Tld4A2DU64Float:
253 case NVPTXISD::TexUnified1DFloatS32:
254 case NVPTXISD::TexUnified1DFloatFloat:
255 case NVPTXISD::TexUnified1DFloatFloatLevel:
256 case NVPTXISD::TexUnified1DFloatFloatGrad:
257 case NVPTXISD::TexUnified1DS32S32:
258 case NVPTXISD::TexUnified1DS32Float:
259 case NVPTXISD::TexUnified1DS32FloatLevel:
260 case NVPTXISD::TexUnified1DS32FloatGrad:
261 case NVPTXISD::TexUnified1DU32S32:
262 case NVPTXISD::TexUnified1DU32Float:
263 case NVPTXISD::TexUnified1DU32FloatLevel:
264 case NVPTXISD::TexUnified1DU32FloatGrad:
265 case NVPTXISD::TexUnified1DArrayFloatS32:
266 case NVPTXISD::TexUnified1DArrayFloatFloat:
267 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
268 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
269 case NVPTXISD::TexUnified1DArrayS32S32:
270 case NVPTXISD::TexUnified1DArrayS32Float:
271 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
272 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
273 case NVPTXISD::TexUnified1DArrayU32S32:
274 case NVPTXISD::TexUnified1DArrayU32Float:
275 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
276 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
277 case NVPTXISD::TexUnified2DFloatS32:
278 case NVPTXISD::TexUnified2DFloatFloat:
279 case NVPTXISD::TexUnified2DFloatFloatLevel:
280 case NVPTXISD::TexUnified2DFloatFloatGrad:
281 case NVPTXISD::TexUnified2DS32S32:
282 case NVPTXISD::TexUnified2DS32Float:
283 case NVPTXISD::TexUnified2DS32FloatLevel:
284 case NVPTXISD::TexUnified2DS32FloatGrad:
285 case NVPTXISD::TexUnified2DU32S32:
286 case NVPTXISD::TexUnified2DU32Float:
287 case NVPTXISD::TexUnified2DU32FloatLevel:
288 case NVPTXISD::TexUnified2DU32FloatGrad:
289 case NVPTXISD::TexUnified2DArrayFloatS32:
290 case NVPTXISD::TexUnified2DArrayFloatFloat:
291 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
292 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
293 case NVPTXISD::TexUnified2DArrayS32S32:
294 case NVPTXISD::TexUnified2DArrayS32Float:
295 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
296 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
297 case NVPTXISD::TexUnified2DArrayU32S32:
298 case NVPTXISD::TexUnified2DArrayU32Float:
299 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
300 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
301 case NVPTXISD::TexUnified3DFloatS32:
302 case NVPTXISD::TexUnified3DFloatFloat:
303 case NVPTXISD::TexUnified3DFloatFloatLevel:
304 case NVPTXISD::TexUnified3DFloatFloatGrad:
305 case NVPTXISD::TexUnified3DS32S32:
306 case NVPTXISD::TexUnified3DS32Float:
307 case NVPTXISD::TexUnified3DS32FloatLevel:
308 case NVPTXISD::TexUnified3DS32FloatGrad:
309 case NVPTXISD::TexUnified3DU32S32:
310 case NVPTXISD::TexUnified3DU32Float:
311 case NVPTXISD::TexUnified3DU32FloatLevel:
312 case NVPTXISD::TexUnified3DU32FloatGrad:
313 case NVPTXISD::TexUnifiedCubeFloatFloat:
314 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
315 case NVPTXISD::TexUnifiedCubeS32Float:
316 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
317 case NVPTXISD::TexUnifiedCubeU32Float:
318 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
319 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
320 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
321 case NVPTXISD::TexUnifiedCubeArrayS32Float:
322 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
323 case NVPTXISD::TexUnifiedCubeArrayU32Float:
324 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
325 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
326 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
327 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
328 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
329 case NVPTXISD::Tld4UnifiedR2DS64Float:
330 case NVPTXISD::Tld4UnifiedG2DS64Float:
331 case NVPTXISD::Tld4UnifiedB2DS64Float:
332 case NVPTXISD::Tld4UnifiedA2DS64Float:
333 case NVPTXISD::Tld4UnifiedR2DU64Float:
334 case NVPTXISD::Tld4UnifiedG2DU64Float:
335 case NVPTXISD::Tld4UnifiedB2DU64Float:
336 case NVPTXISD::Tld4UnifiedA2DU64Float:
337 if (tryTextureIntrinsic(N))
340 case NVPTXISD::Suld1DI8Clamp:
341 case NVPTXISD::Suld1DI16Clamp:
342 case NVPTXISD::Suld1DI32Clamp:
343 case NVPTXISD::Suld1DI64Clamp:
344 case NVPTXISD::Suld1DV2I8Clamp:
345 case NVPTXISD::Suld1DV2I16Clamp:
346 case NVPTXISD::Suld1DV2I32Clamp:
347 case NVPTXISD::Suld1DV2I64Clamp:
348 case NVPTXISD::Suld1DV4I8Clamp:
349 case NVPTXISD::Suld1DV4I16Clamp:
350 case NVPTXISD::Suld1DV4I32Clamp:
351 case NVPTXISD::Suld1DArrayI8Clamp:
352 case NVPTXISD::Suld1DArrayI16Clamp:
353 case NVPTXISD::Suld1DArrayI32Clamp:
354 case NVPTXISD::Suld1DArrayI64Clamp:
355 case NVPTXISD::Suld1DArrayV2I8Clamp:
356 case NVPTXISD::Suld1DArrayV2I16Clamp:
357 case NVPTXISD::Suld1DArrayV2I32Clamp:
358 case NVPTXISD::Suld1DArrayV2I64Clamp:
359 case NVPTXISD::Suld1DArrayV4I8Clamp:
360 case NVPTXISD::Suld1DArrayV4I16Clamp:
361 case NVPTXISD::Suld1DArrayV4I32Clamp:
362 case NVPTXISD::Suld2DI8Clamp:
363 case NVPTXISD::Suld2DI16Clamp:
364 case NVPTXISD::Suld2DI32Clamp:
365 case NVPTXISD::Suld2DI64Clamp:
366 case NVPTXISD::Suld2DV2I8Clamp:
367 case NVPTXISD::Suld2DV2I16Clamp:
368 case NVPTXISD::Suld2DV2I32Clamp:
369 case NVPTXISD::Suld2DV2I64Clamp:
370 case NVPTXISD::Suld2DV4I8Clamp:
371 case NVPTXISD::Suld2DV4I16Clamp:
372 case NVPTXISD::Suld2DV4I32Clamp:
373 case NVPTXISD::Suld2DArrayI8Clamp:
374 case NVPTXISD::Suld2DArrayI16Clamp:
375 case NVPTXISD::Suld2DArrayI32Clamp:
376 case NVPTXISD::Suld2DArrayI64Clamp:
377 case NVPTXISD::Suld2DArrayV2I8Clamp:
378 case NVPTXISD::Suld2DArrayV2I16Clamp:
379 case NVPTXISD::Suld2DArrayV2I32Clamp:
380 case NVPTXISD::Suld2DArrayV2I64Clamp:
381 case NVPTXISD::Suld2DArrayV4I8Clamp:
382 case NVPTXISD::Suld2DArrayV4I16Clamp:
383 case NVPTXISD::Suld2DArrayV4I32Clamp:
384 case NVPTXISD::Suld3DI8Clamp:
385 case NVPTXISD::Suld3DI16Clamp:
386 case NVPTXISD::Suld3DI32Clamp:
387 case NVPTXISD::Suld3DI64Clamp:
388 case NVPTXISD::Suld3DV2I8Clamp:
389 case NVPTXISD::Suld3DV2I16Clamp:
390 case NVPTXISD::Suld3DV2I32Clamp:
391 case NVPTXISD::Suld3DV2I64Clamp:
392 case NVPTXISD::Suld3DV4I8Clamp:
393 case NVPTXISD::Suld3DV4I16Clamp:
394 case NVPTXISD::Suld3DV4I32Clamp:
395 case NVPTXISD::Suld1DI8Trap:
396 case NVPTXISD::Suld1DI16Trap:
397 case NVPTXISD::Suld1DI32Trap:
398 case NVPTXISD::Suld1DI64Trap:
399 case NVPTXISD::Suld1DV2I8Trap:
400 case NVPTXISD::Suld1DV2I16Trap:
401 case NVPTXISD::Suld1DV2I32Trap:
402 case NVPTXISD::Suld1DV2I64Trap:
403 case NVPTXISD::Suld1DV4I8Trap:
404 case NVPTXISD::Suld1DV4I16Trap:
405 case NVPTXISD::Suld1DV4I32Trap:
406 case NVPTXISD::Suld1DArrayI8Trap:
407 case NVPTXISD::Suld1DArrayI16Trap:
408 case NVPTXISD::Suld1DArrayI32Trap:
409 case NVPTXISD::Suld1DArrayI64Trap:
410 case NVPTXISD::Suld1DArrayV2I8Trap:
411 case NVPTXISD::Suld1DArrayV2I16Trap:
412 case NVPTXISD::Suld1DArrayV2I32Trap:
413 case NVPTXISD::Suld1DArrayV2I64Trap:
414 case NVPTXISD::Suld1DArrayV4I8Trap:
415 case NVPTXISD::Suld1DArrayV4I16Trap:
416 case NVPTXISD::Suld1DArrayV4I32Trap:
417 case NVPTXISD::Suld2DI8Trap:
418 case NVPTXISD::Suld2DI16Trap:
419 case NVPTXISD::Suld2DI32Trap:
420 case NVPTXISD::Suld2DI64Trap:
421 case NVPTXISD::Suld2DV2I8Trap:
422 case NVPTXISD::Suld2DV2I16Trap:
423 case NVPTXISD::Suld2DV2I32Trap:
424 case NVPTXISD::Suld2DV2I64Trap:
425 case NVPTXISD::Suld2DV4I8Trap:
426 case NVPTXISD::Suld2DV4I16Trap:
427 case NVPTXISD::Suld2DV4I32Trap:
428 case NVPTXISD::Suld2DArrayI8Trap:
429 case NVPTXISD::Suld2DArrayI16Trap:
430 case NVPTXISD::Suld2DArrayI32Trap:
431 case NVPTXISD::Suld2DArrayI64Trap:
432 case NVPTXISD::Suld2DArrayV2I8Trap:
433 case NVPTXISD::Suld2DArrayV2I16Trap:
434 case NVPTXISD::Suld2DArrayV2I32Trap:
435 case NVPTXISD::Suld2DArrayV2I64Trap:
436 case NVPTXISD::Suld2DArrayV4I8Trap:
437 case NVPTXISD::Suld2DArrayV4I16Trap:
438 case NVPTXISD::Suld2DArrayV4I32Trap:
439 case NVPTXISD::Suld3DI8Trap:
440 case NVPTXISD::Suld3DI16Trap:
441 case NVPTXISD::Suld3DI32Trap:
442 case NVPTXISD::Suld3DI64Trap:
443 case NVPTXISD::Suld3DV2I8Trap:
444 case NVPTXISD::Suld3DV2I16Trap:
445 case NVPTXISD::Suld3DV2I32Trap:
446 case NVPTXISD::Suld3DV2I64Trap:
447 case NVPTXISD::Suld3DV4I8Trap:
448 case NVPTXISD::Suld3DV4I16Trap:
449 case NVPTXISD::Suld3DV4I32Trap:
450 case NVPTXISD::Suld1DI8Zero:
451 case NVPTXISD::Suld1DI16Zero:
452 case NVPTXISD::Suld1DI32Zero:
453 case NVPTXISD::Suld1DI64Zero:
454 case NVPTXISD::Suld1DV2I8Zero:
455 case NVPTXISD::Suld1DV2I16Zero:
456 case NVPTXISD::Suld1DV2I32Zero:
457 case NVPTXISD::Suld1DV2I64Zero:
458 case NVPTXISD::Suld1DV4I8Zero:
459 case NVPTXISD::Suld1DV4I16Zero:
460 case NVPTXISD::Suld1DV4I32Zero:
461 case NVPTXISD::Suld1DArrayI8Zero:
462 case NVPTXISD::Suld1DArrayI16Zero:
463 case NVPTXISD::Suld1DArrayI32Zero:
464 case NVPTXISD::Suld1DArrayI64Zero:
465 case NVPTXISD::Suld1DArrayV2I8Zero:
466 case NVPTXISD::Suld1DArrayV2I16Zero:
467 case NVPTXISD::Suld1DArrayV2I32Zero:
468 case NVPTXISD::Suld1DArrayV2I64Zero:
469 case NVPTXISD::Suld1DArrayV4I8Zero:
470 case NVPTXISD::Suld1DArrayV4I16Zero:
471 case NVPTXISD::Suld1DArrayV4I32Zero:
472 case NVPTXISD::Suld2DI8Zero:
473 case NVPTXISD::Suld2DI16Zero:
474 case NVPTXISD::Suld2DI32Zero:
475 case NVPTXISD::Suld2DI64Zero:
476 case NVPTXISD::Suld2DV2I8Zero:
477 case NVPTXISD::Suld2DV2I16Zero:
478 case NVPTXISD::Suld2DV2I32Zero:
479 case NVPTXISD::Suld2DV2I64Zero:
480 case NVPTXISD::Suld2DV4I8Zero:
481 case NVPTXISD::Suld2DV4I16Zero:
482 case NVPTXISD::Suld2DV4I32Zero:
483 case NVPTXISD::Suld2DArrayI8Zero:
484 case NVPTXISD::Suld2DArrayI16Zero:
485 case NVPTXISD::Suld2DArrayI32Zero:
486 case NVPTXISD::Suld2DArrayI64Zero:
487 case NVPTXISD::Suld2DArrayV2I8Zero:
488 case NVPTXISD::Suld2DArrayV2I16Zero:
489 case NVPTXISD::Suld2DArrayV2I32Zero:
490 case NVPTXISD::Suld2DArrayV2I64Zero:
491 case NVPTXISD::Suld2DArrayV4I8Zero:
492 case NVPTXISD::Suld2DArrayV4I16Zero:
493 case NVPTXISD::Suld2DArrayV4I32Zero:
494 case NVPTXISD::Suld3DI8Zero:
495 case NVPTXISD::Suld3DI16Zero:
496 case NVPTXISD::Suld3DI32Zero:
497 case NVPTXISD::Suld3DI64Zero:
498 case NVPTXISD::Suld3DV2I8Zero:
499 case NVPTXISD::Suld3DV2I16Zero:
500 case NVPTXISD::Suld3DV2I32Zero:
501 case NVPTXISD::Suld3DV2I64Zero:
502 case NVPTXISD::Suld3DV4I8Zero:
503 case NVPTXISD::Suld3DV4I16Zero:
504 case NVPTXISD::Suld3DV4I32Zero:
505 if (trySurfaceIntrinsic(N))
515 case ISD::ADDRSPACECAST:
516 SelectAddrSpaceCast(N);
524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
525 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
529 case Intrinsic::nvvm_ldg_global_f:
530 case Intrinsic::nvvm_ldg_global_i:
531 case Intrinsic::nvvm_ldg_global_p:
532 case Intrinsic::nvvm_ldu_global_f:
533 case Intrinsic::nvvm_ldu_global_i:
534 case Intrinsic::nvvm_ldu_global_p:
539 static unsigned int getCodeAddrSpace(MemSDNode *N) {
540 const Value *Src = N->getMemOperand()->getValue();
543 return NVPTX::PTXLdStInstCode::GENERIC;
545 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
546 switch (PT->getAddressSpace()) {
547 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
548 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
549 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
550 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
551 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
552 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
556 return NVPTX::PTXLdStInstCode::GENERIC;
559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
560 unsigned CodeAddrSpace, MachineFunction *F) {
561 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
564 // We have two ways of identifying invariant loads: Loads may be explicitly
565 // marked as invariant, or we may infer them to be invariant.
567 // We currently infer invariance only for kernel function pointer params that
568 // are noalias (i.e. __restrict) and never written to.
570 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
571 // not during the SelectionDAG phase).
573 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
574 // explicitly invariant loads because these are how clang tells us to use ldg
575 // when the user uses a builtin.
576 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
579 if (N->isInvariant())
582 // Load wasn't explicitly invariant. Attempt to infer invariance.
583 if (!isKernelFunction(*F->getFunction()))
586 // We use GetUnderlyingObjects() here instead of
587 // GetUnderlyingObject() mainly because the former looks through phi
588 // nodes while the latter does not. We need to look through phi
589 // nodes to handle pointer induction variables.
590 SmallVector<Value *, 8> Objs;
591 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
592 Objs, F->getDataLayout());
593 for (Value *Obj : Objs) {
594 auto *A = dyn_cast<const Argument>(Obj);
595 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
601 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
602 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
606 case Intrinsic::nvvm_texsurf_handle_internal:
607 SelectTexSurfHandle(N);
612 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
613 // Op 0 is the intrinsic ID
614 SDValue Wrapper = N->getOperand(1);
615 SDValue GlobalVal = Wrapper.getOperand(0);
616 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
617 MVT::i64, GlobalVal));
620 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
621 SDValue Src = N->getOperand(0);
622 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
623 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
624 unsigned DstAddrSpace = CastN->getDestAddressSpace();
626 assert(SrcAddrSpace != DstAddrSpace &&
627 "addrspacecast must be between different address spaces");
629 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
630 // Specific to generic
632 switch (SrcAddrSpace) {
633 default: report_fatal_error("Bad address space in addrspacecast");
634 case ADDRESS_SPACE_GLOBAL:
635 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
637 case ADDRESS_SPACE_SHARED:
638 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
640 case ADDRESS_SPACE_CONST:
641 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
643 case ADDRESS_SPACE_LOCAL:
644 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
647 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
651 // Generic to specific
652 if (SrcAddrSpace != 0)
653 report_fatal_error("Cannot cast between two non-generic address spaces");
655 switch (DstAddrSpace) {
656 default: report_fatal_error("Bad address space in addrspacecast");
657 case ADDRESS_SPACE_GLOBAL:
658 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
659 : NVPTX::cvta_to_global_yes;
661 case ADDRESS_SPACE_SHARED:
662 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
663 : NVPTX::cvta_to_shared_yes;
665 case ADDRESS_SPACE_CONST:
667 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
669 case ADDRESS_SPACE_LOCAL:
671 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
673 case ADDRESS_SPACE_PARAM:
674 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
675 : NVPTX::nvvm_ptr_gen_to_param;
678 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
684 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
686 LoadSDNode *LD = cast<LoadSDNode>(N);
687 EVT LoadedVT = LD->getMemoryVT();
688 SDNode *NVPTXLD = nullptr;
690 // do not support pre/post inc/dec
694 if (!LoadedVT.isSimple())
697 // Address Space Setting
698 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
700 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
705 // - .volatile is only availalble for .global and .shared
706 bool isVolatile = LD->isVolatile();
707 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
708 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
709 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
713 MVT SimpleVT = LoadedVT.getSimpleVT();
714 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
715 if (SimpleVT.isVector()) {
716 unsigned num = SimpleVT.getVectorNumElements();
718 vecType = NVPTX::PTXLdStInstCode::V2;
720 vecType = NVPTX::PTXLdStInstCode::V4;
725 // Type Setting: fromType + fromTypeWidth
727 // Sign : ISD::SEXTLOAD
728 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
730 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
731 MVT ScalarVT = SimpleVT.getScalarType();
732 // Read at least 8 bits (predicates are stored as 8-bit values)
733 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
734 unsigned int fromType;
735 if ((LD->getExtensionType() == ISD::SEXTLOAD))
736 fromType = NVPTX::PTXLdStInstCode::Signed;
737 else if (ScalarVT.isFloatingPoint())
738 fromType = NVPTX::PTXLdStInstCode::Float;
740 fromType = NVPTX::PTXLdStInstCode::Unsigned;
742 // Create the machine instruction DAG
743 SDValue Chain = N->getOperand(0);
744 SDValue N1 = N->getOperand(1);
746 SDValue Offset, Base;
748 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
750 if (SelectDirectAddr(N1, Addr)) {
753 Opcode = NVPTX::LD_i8_avar;
756 Opcode = NVPTX::LD_i16_avar;
759 Opcode = NVPTX::LD_i32_avar;
762 Opcode = NVPTX::LD_i64_avar;
765 Opcode = NVPTX::LD_f32_avar;
768 Opcode = NVPTX::LD_f64_avar;
773 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
774 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
775 getI32Imm(fromTypeWidth, dl), Addr, Chain };
776 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
777 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
778 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
781 Opcode = NVPTX::LD_i8_asi;
784 Opcode = NVPTX::LD_i16_asi;
787 Opcode = NVPTX::LD_i32_asi;
790 Opcode = NVPTX::LD_i64_asi;
793 Opcode = NVPTX::LD_f32_asi;
796 Opcode = NVPTX::LD_f64_asi;
801 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
802 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
803 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
804 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
805 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
806 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
810 Opcode = NVPTX::LD_i8_ari_64;
813 Opcode = NVPTX::LD_i16_ari_64;
816 Opcode = NVPTX::LD_i32_ari_64;
819 Opcode = NVPTX::LD_i64_ari_64;
822 Opcode = NVPTX::LD_f32_ari_64;
825 Opcode = NVPTX::LD_f64_ari_64;
833 Opcode = NVPTX::LD_i8_ari;
836 Opcode = NVPTX::LD_i16_ari;
839 Opcode = NVPTX::LD_i32_ari;
842 Opcode = NVPTX::LD_i64_ari;
845 Opcode = NVPTX::LD_f32_ari;
848 Opcode = NVPTX::LD_f64_ari;
854 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
855 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
856 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
857 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
862 Opcode = NVPTX::LD_i8_areg_64;
865 Opcode = NVPTX::LD_i16_areg_64;
868 Opcode = NVPTX::LD_i32_areg_64;
871 Opcode = NVPTX::LD_i64_areg_64;
874 Opcode = NVPTX::LD_f32_areg_64;
877 Opcode = NVPTX::LD_f64_areg_64;
885 Opcode = NVPTX::LD_i8_areg;
888 Opcode = NVPTX::LD_i16_areg;
891 Opcode = NVPTX::LD_i32_areg;
894 Opcode = NVPTX::LD_i64_areg;
897 Opcode = NVPTX::LD_f32_areg;
900 Opcode = NVPTX::LD_f64_areg;
906 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
907 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
908 getI32Imm(fromTypeWidth, dl), N1, Chain };
909 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
915 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
916 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
917 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
919 ReplaceNode(N, NVPTXLD);
923 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
925 SDValue Chain = N->getOperand(0);
926 SDValue Op1 = N->getOperand(1);
927 SDValue Addr, Offset, Base;
931 MemSDNode *MemSD = cast<MemSDNode>(N);
932 EVT LoadedVT = MemSD->getMemoryVT();
934 if (!LoadedVT.isSimple())
937 // Address Space Setting
938 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
940 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
945 // - .volatile is only availalble for .global and .shared
946 bool IsVolatile = MemSD->isVolatile();
947 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
948 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
949 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
953 MVT SimpleVT = LoadedVT.getSimpleVT();
955 // Type Setting: fromType + fromTypeWidth
957 // Sign : ISD::SEXTLOAD
958 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
960 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
961 MVT ScalarVT = SimpleVT.getScalarType();
962 // Read at least 8 bits (predicates are stored as 8-bit values)
963 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
964 unsigned int FromType;
965 // The last operand holds the original LoadSDNode::getExtensionType() value
966 unsigned ExtensionType = cast<ConstantSDNode>(
967 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
968 if (ExtensionType == ISD::SEXTLOAD)
969 FromType = NVPTX::PTXLdStInstCode::Signed;
970 else if (ScalarVT.isFloatingPoint())
971 FromType = NVPTX::PTXLdStInstCode::Float;
973 FromType = NVPTX::PTXLdStInstCode::Unsigned;
977 switch (N->getOpcode()) {
978 case NVPTXISD::LoadV2:
979 VecType = NVPTX::PTXLdStInstCode::V2;
981 case NVPTXISD::LoadV4:
982 VecType = NVPTX::PTXLdStInstCode::V4;
988 EVT EltVT = N->getValueType(0);
990 if (SelectDirectAddr(Op1, Addr)) {
991 switch (N->getOpcode()) {
994 case NVPTXISD::LoadV2:
995 switch (EltVT.getSimpleVT().SimpleTy) {
999 Opcode = NVPTX::LDV_i8_v2_avar;
1002 Opcode = NVPTX::LDV_i16_v2_avar;
1005 Opcode = NVPTX::LDV_i32_v2_avar;
1008 Opcode = NVPTX::LDV_i64_v2_avar;
1011 Opcode = NVPTX::LDV_f32_v2_avar;
1014 Opcode = NVPTX::LDV_f64_v2_avar;
1018 case NVPTXISD::LoadV4:
1019 switch (EltVT.getSimpleVT().SimpleTy) {
1023 Opcode = NVPTX::LDV_i8_v4_avar;
1026 Opcode = NVPTX::LDV_i16_v4_avar;
1029 Opcode = NVPTX::LDV_i32_v4_avar;
1032 Opcode = NVPTX::LDV_f32_v4_avar;
1038 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1039 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1040 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1041 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1042 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1043 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1044 switch (N->getOpcode()) {
1047 case NVPTXISD::LoadV2:
1048 switch (EltVT.getSimpleVT().SimpleTy) {
1052 Opcode = NVPTX::LDV_i8_v2_asi;
1055 Opcode = NVPTX::LDV_i16_v2_asi;
1058 Opcode = NVPTX::LDV_i32_v2_asi;
1061 Opcode = NVPTX::LDV_i64_v2_asi;
1064 Opcode = NVPTX::LDV_f32_v2_asi;
1067 Opcode = NVPTX::LDV_f64_v2_asi;
1071 case NVPTXISD::LoadV4:
1072 switch (EltVT.getSimpleVT().SimpleTy) {
1076 Opcode = NVPTX::LDV_i8_v4_asi;
1079 Opcode = NVPTX::LDV_i16_v4_asi;
1082 Opcode = NVPTX::LDV_i32_v4_asi;
1085 Opcode = NVPTX::LDV_f32_v4_asi;
1091 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1092 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1093 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1094 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1095 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1096 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1098 switch (N->getOpcode()) {
1101 case NVPTXISD::LoadV2:
1102 switch (EltVT.getSimpleVT().SimpleTy) {
1106 Opcode = NVPTX::LDV_i8_v2_ari_64;
1109 Opcode = NVPTX::LDV_i16_v2_ari_64;
1112 Opcode = NVPTX::LDV_i32_v2_ari_64;
1115 Opcode = NVPTX::LDV_i64_v2_ari_64;
1118 Opcode = NVPTX::LDV_f32_v2_ari_64;
1121 Opcode = NVPTX::LDV_f64_v2_ari_64;
1125 case NVPTXISD::LoadV4:
1126 switch (EltVT.getSimpleVT().SimpleTy) {
1130 Opcode = NVPTX::LDV_i8_v4_ari_64;
1133 Opcode = NVPTX::LDV_i16_v4_ari_64;
1136 Opcode = NVPTX::LDV_i32_v4_ari_64;
1139 Opcode = NVPTX::LDV_f32_v4_ari_64;
1145 switch (N->getOpcode()) {
1148 case NVPTXISD::LoadV2:
1149 switch (EltVT.getSimpleVT().SimpleTy) {
1153 Opcode = NVPTX::LDV_i8_v2_ari;
1156 Opcode = NVPTX::LDV_i16_v2_ari;
1159 Opcode = NVPTX::LDV_i32_v2_ari;
1162 Opcode = NVPTX::LDV_i64_v2_ari;
1165 Opcode = NVPTX::LDV_f32_v2_ari;
1168 Opcode = NVPTX::LDV_f64_v2_ari;
1172 case NVPTXISD::LoadV4:
1173 switch (EltVT.getSimpleVT().SimpleTy) {
1177 Opcode = NVPTX::LDV_i8_v4_ari;
1180 Opcode = NVPTX::LDV_i16_v4_ari;
1183 Opcode = NVPTX::LDV_i32_v4_ari;
1186 Opcode = NVPTX::LDV_f32_v4_ari;
1193 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1194 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1195 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1197 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1200 switch (N->getOpcode()) {
1203 case NVPTXISD::LoadV2:
1204 switch (EltVT.getSimpleVT().SimpleTy) {
1208 Opcode = NVPTX::LDV_i8_v2_areg_64;
1211 Opcode = NVPTX::LDV_i16_v2_areg_64;
1214 Opcode = NVPTX::LDV_i32_v2_areg_64;
1217 Opcode = NVPTX::LDV_i64_v2_areg_64;
1220 Opcode = NVPTX::LDV_f32_v2_areg_64;
1223 Opcode = NVPTX::LDV_f64_v2_areg_64;
1227 case NVPTXISD::LoadV4:
1228 switch (EltVT.getSimpleVT().SimpleTy) {
1232 Opcode = NVPTX::LDV_i8_v4_areg_64;
1235 Opcode = NVPTX::LDV_i16_v4_areg_64;
1238 Opcode = NVPTX::LDV_i32_v4_areg_64;
1241 Opcode = NVPTX::LDV_f32_v4_areg_64;
1247 switch (N->getOpcode()) {
1250 case NVPTXISD::LoadV2:
1251 switch (EltVT.getSimpleVT().SimpleTy) {
1255 Opcode = NVPTX::LDV_i8_v2_areg;
1258 Opcode = NVPTX::LDV_i16_v2_areg;
1261 Opcode = NVPTX::LDV_i32_v2_areg;
1264 Opcode = NVPTX::LDV_i64_v2_areg;
1267 Opcode = NVPTX::LDV_f32_v2_areg;
1270 Opcode = NVPTX::LDV_f64_v2_areg;
1274 case NVPTXISD::LoadV4:
1275 switch (EltVT.getSimpleVT().SimpleTy) {
1279 Opcode = NVPTX::LDV_i8_v4_areg;
1282 Opcode = NVPTX::LDV_i16_v4_areg;
1285 Opcode = NVPTX::LDV_i32_v4_areg;
1288 Opcode = NVPTX::LDV_f32_v4_areg;
1295 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1296 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1297 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1298 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1301 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1302 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1303 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1309 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1311 SDValue Chain = N->getOperand(0);
1316 // If this is an LDG intrinsic, the address is the third operand. If its an
1317 // LDG/LDU SD node (from custom vector handling), then its the second operand
1318 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1319 Op1 = N->getOperand(2);
1320 Mem = cast<MemIntrinsicSDNode>(N);
1321 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1325 case Intrinsic::nvvm_ldg_global_f:
1326 case Intrinsic::nvvm_ldg_global_i:
1327 case Intrinsic::nvvm_ldg_global_p:
1330 case Intrinsic::nvvm_ldu_global_f:
1331 case Intrinsic::nvvm_ldu_global_i:
1332 case Intrinsic::nvvm_ldu_global_p:
1337 Op1 = N->getOperand(1);
1338 Mem = cast<MemSDNode>(N);
1344 SDValue Base, Offset, Addr;
1346 EVT EltVT = Mem->getMemoryVT();
1347 unsigned NumElts = 1;
1348 if (EltVT.isVector()) {
1349 NumElts = EltVT.getVectorNumElements();
1350 EltVT = EltVT.getVectorElementType();
1353 // Build the "promoted" result VTList for the load. If we are really loading
1354 // i8s, then the return type will be promoted to i16 since we do not expose
1355 // 8-bit registers in NVPTX.
1356 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1357 SmallVector<EVT, 5> InstVTs;
1358 for (unsigned i = 0; i != NumElts; ++i) {
1359 InstVTs.push_back(NodeVT);
1361 InstVTs.push_back(MVT::Other);
1362 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1364 if (SelectDirectAddr(Op1, Addr)) {
1365 switch (N->getOpcode()) {
1368 case ISD::INTRINSIC_W_CHAIN:
1370 switch (EltVT.getSimpleVT().SimpleTy) {
1374 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1377 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1380 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1383 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1386 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1389 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1393 switch (EltVT.getSimpleVT().SimpleTy) {
1397 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1400 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1403 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1406 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1409 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1412 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1417 case NVPTXISD::LDGV2:
1418 switch (EltVT.getSimpleVT().SimpleTy) {
1422 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1425 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1428 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1431 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1434 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1437 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1441 case NVPTXISD::LDUV2:
1442 switch (EltVT.getSimpleVT().SimpleTy) {
1446 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1449 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1452 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1455 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1458 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1461 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1465 case NVPTXISD::LDGV4:
1466 switch (EltVT.getSimpleVT().SimpleTy) {
1470 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1473 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1476 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1479 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1483 case NVPTXISD::LDUV4:
1484 switch (EltVT.getSimpleVT().SimpleTy) {
1488 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1491 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1494 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1497 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1503 SDValue Ops[] = { Addr, Chain };
1504 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1505 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1506 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1508 switch (N->getOpcode()) {
1512 case ISD::INTRINSIC_W_CHAIN:
1514 switch (EltVT.getSimpleVT().SimpleTy) {
1518 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1521 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1524 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1527 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1530 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1533 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1537 switch (EltVT.getSimpleVT().SimpleTy) {
1541 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1544 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1547 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1550 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1553 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1556 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1561 case NVPTXISD::LoadV2:
1562 case NVPTXISD::LDGV2:
1563 switch (EltVT.getSimpleVT().SimpleTy) {
1567 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1570 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1573 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1576 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1579 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1582 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1586 case NVPTXISD::LDUV2:
1587 switch (EltVT.getSimpleVT().SimpleTy) {
1591 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1594 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1597 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1600 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1603 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1606 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1610 case NVPTXISD::LoadV4:
1611 case NVPTXISD::LDGV4:
1612 switch (EltVT.getSimpleVT().SimpleTy) {
1616 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1619 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1622 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1625 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1629 case NVPTXISD::LDUV4:
1630 switch (EltVT.getSimpleVT().SimpleTy) {
1634 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1637 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1640 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1643 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1649 switch (N->getOpcode()) {
1653 case ISD::INTRINSIC_W_CHAIN:
1655 switch (EltVT.getSimpleVT().SimpleTy) {
1659 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1662 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1665 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1668 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1671 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1674 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1678 switch (EltVT.getSimpleVT().SimpleTy) {
1682 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1685 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1688 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1691 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1694 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1697 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1702 case NVPTXISD::LoadV2:
1703 case NVPTXISD::LDGV2:
1704 switch (EltVT.getSimpleVT().SimpleTy) {
1708 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1711 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1714 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1717 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1720 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1723 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1727 case NVPTXISD::LDUV2:
1728 switch (EltVT.getSimpleVT().SimpleTy) {
1732 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1735 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1738 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1741 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1744 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1747 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1751 case NVPTXISD::LoadV4:
1752 case NVPTXISD::LDGV4:
1753 switch (EltVT.getSimpleVT().SimpleTy) {
1757 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1760 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1763 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1766 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1770 case NVPTXISD::LDUV4:
1771 switch (EltVT.getSimpleVT().SimpleTy) {
1775 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1778 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1781 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1784 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1791 SDValue Ops[] = { Base, Offset, Chain };
1793 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1796 switch (N->getOpcode()) {
1800 case ISD::INTRINSIC_W_CHAIN:
1802 switch (EltVT.getSimpleVT().SimpleTy) {
1806 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1809 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1812 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1815 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1818 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1821 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1825 switch (EltVT.getSimpleVT().SimpleTy) {
1829 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1832 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1835 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1838 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1841 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1844 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1849 case NVPTXISD::LoadV2:
1850 case NVPTXISD::LDGV2:
1851 switch (EltVT.getSimpleVT().SimpleTy) {
1855 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1858 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1861 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1864 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1867 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1870 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1874 case NVPTXISD::LDUV2:
1875 switch (EltVT.getSimpleVT().SimpleTy) {
1879 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1882 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1885 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1888 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1891 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1894 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1898 case NVPTXISD::LoadV4:
1899 case NVPTXISD::LDGV4:
1900 switch (EltVT.getSimpleVT().SimpleTy) {
1904 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1907 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1910 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1913 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1917 case NVPTXISD::LDUV4:
1918 switch (EltVT.getSimpleVT().SimpleTy) {
1922 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1925 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1928 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1931 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1937 switch (N->getOpcode()) {
1941 case ISD::INTRINSIC_W_CHAIN:
1943 switch (EltVT.getSimpleVT().SimpleTy) {
1947 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1950 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1953 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1956 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1959 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1962 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1966 switch (EltVT.getSimpleVT().SimpleTy) {
1970 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1973 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1976 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1979 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1982 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1985 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1990 case NVPTXISD::LoadV2:
1991 case NVPTXISD::LDGV2:
1992 switch (EltVT.getSimpleVT().SimpleTy) {
1996 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1999 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
2002 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
2005 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
2008 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2011 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2015 case NVPTXISD::LDUV2:
2016 switch (EltVT.getSimpleVT().SimpleTy) {
2020 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2023 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2026 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2029 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2032 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2035 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2039 case NVPTXISD::LoadV4:
2040 case NVPTXISD::LDGV4:
2041 switch (EltVT.getSimpleVT().SimpleTy) {
2045 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2048 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2051 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2054 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2058 case NVPTXISD::LDUV4:
2059 switch (EltVT.getSimpleVT().SimpleTy) {
2063 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2066 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2069 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2072 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2079 SDValue Ops[] = { Op1, Chain };
2080 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
2083 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2084 MemRefs0[0] = Mem->getMemOperand();
2085 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2087 // For automatic generation of LDG (through SelectLoad[Vector], not the
2088 // intrinsics), we may have an extending load like:
2090 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2092 // In this case, the matching logic above will select a load for the original
2093 // memory type (in this case, i8) and our types will not match (the node needs
2094 // to return an i32 in this case). Our LDG/LDU nodes do not support the
2095 // concept of sign-/zero-extension, so emulate it here by adding an explicit
2096 // CVT instruction. Ptxas should clean up any redundancies here.
2098 EVT OrigType = N->getValueType(0);
2099 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
2101 if (OrigType != EltVT && LdNode) {
2102 // We have an extending-load. The instruction we selected operates on the
2103 // smaller type, but the SDNode we are replacing has the larger type. We
2104 // need to emit a CVT to make the types match.
2105 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2106 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2107 EltVT.getSimpleVT(), IsSigned);
2109 // For each output value, apply the manual sign/zero-extension and make sure
2110 // all users of the load go through that CVT.
2111 for (unsigned i = 0; i != NumElts; ++i) {
2113 SDValue OrigVal(N, i);
2116 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2117 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2119 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2127 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
2129 StoreSDNode *ST = cast<StoreSDNode>(N);
2130 EVT StoreVT = ST->getMemoryVT();
2131 SDNode *NVPTXST = nullptr;
2133 // do not support pre/post inc/dec
2134 if (ST->isIndexed())
2137 if (!StoreVT.isSimple())
2140 // Address Space Setting
2141 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2144 // - .volatile is only availalble for .global and .shared
2145 bool isVolatile = ST->isVolatile();
2146 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2147 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2148 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2152 MVT SimpleVT = StoreVT.getSimpleVT();
2153 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2154 if (SimpleVT.isVector()) {
2155 unsigned num = SimpleVT.getVectorNumElements();
2157 vecType = NVPTX::PTXLdStInstCode::V2;
2159 vecType = NVPTX::PTXLdStInstCode::V4;
2164 // Type Setting: toType + toTypeWidth
2165 // - for integer type, always use 'u'
2167 MVT ScalarVT = SimpleVT.getScalarType();
2168 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2169 unsigned int toType;
2170 if (ScalarVT.isFloatingPoint())
2171 toType = NVPTX::PTXLdStInstCode::Float;
2173 toType = NVPTX::PTXLdStInstCode::Unsigned;
2175 // Create the machine instruction DAG
2176 SDValue Chain = N->getOperand(0);
2177 SDValue N1 = N->getOperand(1);
2178 SDValue N2 = N->getOperand(2);
2180 SDValue Offset, Base;
2182 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2184 if (SelectDirectAddr(N2, Addr)) {
2187 Opcode = NVPTX::ST_i8_avar;
2190 Opcode = NVPTX::ST_i16_avar;
2193 Opcode = NVPTX::ST_i32_avar;
2196 Opcode = NVPTX::ST_i64_avar;
2199 Opcode = NVPTX::ST_f32_avar;
2202 Opcode = NVPTX::ST_f64_avar;
2207 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2208 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2209 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2211 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2212 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2213 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2216 Opcode = NVPTX::ST_i8_asi;
2219 Opcode = NVPTX::ST_i16_asi;
2222 Opcode = NVPTX::ST_i32_asi;
2225 Opcode = NVPTX::ST_i64_asi;
2228 Opcode = NVPTX::ST_f32_asi;
2231 Opcode = NVPTX::ST_f64_asi;
2236 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2237 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2238 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2240 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2241 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2242 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2246 Opcode = NVPTX::ST_i8_ari_64;
2249 Opcode = NVPTX::ST_i16_ari_64;
2252 Opcode = NVPTX::ST_i32_ari_64;
2255 Opcode = NVPTX::ST_i64_ari_64;
2258 Opcode = NVPTX::ST_f32_ari_64;
2261 Opcode = NVPTX::ST_f64_ari_64;
2269 Opcode = NVPTX::ST_i8_ari;
2272 Opcode = NVPTX::ST_i16_ari;
2275 Opcode = NVPTX::ST_i32_ari;
2278 Opcode = NVPTX::ST_i64_ari;
2281 Opcode = NVPTX::ST_f32_ari;
2284 Opcode = NVPTX::ST_f64_ari;
2290 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2291 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2292 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2294 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2299 Opcode = NVPTX::ST_i8_areg_64;
2302 Opcode = NVPTX::ST_i16_areg_64;
2305 Opcode = NVPTX::ST_i32_areg_64;
2308 Opcode = NVPTX::ST_i64_areg_64;
2311 Opcode = NVPTX::ST_f32_areg_64;
2314 Opcode = NVPTX::ST_f64_areg_64;
2322 Opcode = NVPTX::ST_i8_areg;
2325 Opcode = NVPTX::ST_i16_areg;
2328 Opcode = NVPTX::ST_i32_areg;
2331 Opcode = NVPTX::ST_i64_areg;
2334 Opcode = NVPTX::ST_f32_areg;
2337 Opcode = NVPTX::ST_f64_areg;
2343 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2344 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2345 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2347 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2353 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2354 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2355 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2356 ReplaceNode(N, NVPTXST);
2360 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
2361 SDValue Chain = N->getOperand(0);
2362 SDValue Op1 = N->getOperand(1);
2363 SDValue Addr, Offset, Base;
2367 EVT EltVT = Op1.getValueType();
2368 MemSDNode *MemSD = cast<MemSDNode>(N);
2369 EVT StoreVT = MemSD->getMemoryVT();
2371 // Address Space Setting
2372 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2374 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2375 report_fatal_error("Cannot store to pointer that points to constant "
2380 // - .volatile is only availalble for .global and .shared
2381 bool IsVolatile = MemSD->isVolatile();
2382 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2383 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2384 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2387 // Type Setting: toType + toTypeWidth
2388 // - for integer type, always use 'u'
2389 assert(StoreVT.isSimple() && "Store value is not simple");
2390 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2391 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2393 if (ScalarVT.isFloatingPoint())
2394 ToType = NVPTX::PTXLdStInstCode::Float;
2396 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2398 SmallVector<SDValue, 12> StOps;
2402 switch (N->getOpcode()) {
2403 case NVPTXISD::StoreV2:
2404 VecType = NVPTX::PTXLdStInstCode::V2;
2405 StOps.push_back(N->getOperand(1));
2406 StOps.push_back(N->getOperand(2));
2407 N2 = N->getOperand(3);
2409 case NVPTXISD::StoreV4:
2410 VecType = NVPTX::PTXLdStInstCode::V4;
2411 StOps.push_back(N->getOperand(1));
2412 StOps.push_back(N->getOperand(2));
2413 StOps.push_back(N->getOperand(3));
2414 StOps.push_back(N->getOperand(4));
2415 N2 = N->getOperand(5);
2421 StOps.push_back(getI32Imm(IsVolatile, DL));
2422 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2423 StOps.push_back(getI32Imm(VecType, DL));
2424 StOps.push_back(getI32Imm(ToType, DL));
2425 StOps.push_back(getI32Imm(ToTypeWidth, DL));
2427 if (SelectDirectAddr(N2, Addr)) {
2428 switch (N->getOpcode()) {
2431 case NVPTXISD::StoreV2:
2432 switch (EltVT.getSimpleVT().SimpleTy) {
2436 Opcode = NVPTX::STV_i8_v2_avar;
2439 Opcode = NVPTX::STV_i16_v2_avar;
2442 Opcode = NVPTX::STV_i32_v2_avar;
2445 Opcode = NVPTX::STV_i64_v2_avar;
2448 Opcode = NVPTX::STV_f32_v2_avar;
2451 Opcode = NVPTX::STV_f64_v2_avar;
2455 case NVPTXISD::StoreV4:
2456 switch (EltVT.getSimpleVT().SimpleTy) {
2460 Opcode = NVPTX::STV_i8_v4_avar;
2463 Opcode = NVPTX::STV_i16_v4_avar;
2466 Opcode = NVPTX::STV_i32_v4_avar;
2469 Opcode = NVPTX::STV_f32_v4_avar;
2474 StOps.push_back(Addr);
2475 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2476 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2477 switch (N->getOpcode()) {
2480 case NVPTXISD::StoreV2:
2481 switch (EltVT.getSimpleVT().SimpleTy) {
2485 Opcode = NVPTX::STV_i8_v2_asi;
2488 Opcode = NVPTX::STV_i16_v2_asi;
2491 Opcode = NVPTX::STV_i32_v2_asi;
2494 Opcode = NVPTX::STV_i64_v2_asi;
2497 Opcode = NVPTX::STV_f32_v2_asi;
2500 Opcode = NVPTX::STV_f64_v2_asi;
2504 case NVPTXISD::StoreV4:
2505 switch (EltVT.getSimpleVT().SimpleTy) {
2509 Opcode = NVPTX::STV_i8_v4_asi;
2512 Opcode = NVPTX::STV_i16_v4_asi;
2515 Opcode = NVPTX::STV_i32_v4_asi;
2518 Opcode = NVPTX::STV_f32_v4_asi;
2523 StOps.push_back(Base);
2524 StOps.push_back(Offset);
2525 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2526 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2528 switch (N->getOpcode()) {
2531 case NVPTXISD::StoreV2:
2532 switch (EltVT.getSimpleVT().SimpleTy) {
2536 Opcode = NVPTX::STV_i8_v2_ari_64;
2539 Opcode = NVPTX::STV_i16_v2_ari_64;
2542 Opcode = NVPTX::STV_i32_v2_ari_64;
2545 Opcode = NVPTX::STV_i64_v2_ari_64;
2548 Opcode = NVPTX::STV_f32_v2_ari_64;
2551 Opcode = NVPTX::STV_f64_v2_ari_64;
2555 case NVPTXISD::StoreV4:
2556 switch (EltVT.getSimpleVT().SimpleTy) {
2560 Opcode = NVPTX::STV_i8_v4_ari_64;
2563 Opcode = NVPTX::STV_i16_v4_ari_64;
2566 Opcode = NVPTX::STV_i32_v4_ari_64;
2569 Opcode = NVPTX::STV_f32_v4_ari_64;
2575 switch (N->getOpcode()) {
2578 case NVPTXISD::StoreV2:
2579 switch (EltVT.getSimpleVT().SimpleTy) {
2583 Opcode = NVPTX::STV_i8_v2_ari;
2586 Opcode = NVPTX::STV_i16_v2_ari;
2589 Opcode = NVPTX::STV_i32_v2_ari;
2592 Opcode = NVPTX::STV_i64_v2_ari;
2595 Opcode = NVPTX::STV_f32_v2_ari;
2598 Opcode = NVPTX::STV_f64_v2_ari;
2602 case NVPTXISD::StoreV4:
2603 switch (EltVT.getSimpleVT().SimpleTy) {
2607 Opcode = NVPTX::STV_i8_v4_ari;
2610 Opcode = NVPTX::STV_i16_v4_ari;
2613 Opcode = NVPTX::STV_i32_v4_ari;
2616 Opcode = NVPTX::STV_f32_v4_ari;
2622 StOps.push_back(Base);
2623 StOps.push_back(Offset);
2626 switch (N->getOpcode()) {
2629 case NVPTXISD::StoreV2:
2630 switch (EltVT.getSimpleVT().SimpleTy) {
2634 Opcode = NVPTX::STV_i8_v2_areg_64;
2637 Opcode = NVPTX::STV_i16_v2_areg_64;
2640 Opcode = NVPTX::STV_i32_v2_areg_64;
2643 Opcode = NVPTX::STV_i64_v2_areg_64;
2646 Opcode = NVPTX::STV_f32_v2_areg_64;
2649 Opcode = NVPTX::STV_f64_v2_areg_64;
2653 case NVPTXISD::StoreV4:
2654 switch (EltVT.getSimpleVT().SimpleTy) {
2658 Opcode = NVPTX::STV_i8_v4_areg_64;
2661 Opcode = NVPTX::STV_i16_v4_areg_64;
2664 Opcode = NVPTX::STV_i32_v4_areg_64;
2667 Opcode = NVPTX::STV_f32_v4_areg_64;
2673 switch (N->getOpcode()) {
2676 case NVPTXISD::StoreV2:
2677 switch (EltVT.getSimpleVT().SimpleTy) {
2681 Opcode = NVPTX::STV_i8_v2_areg;
2684 Opcode = NVPTX::STV_i16_v2_areg;
2687 Opcode = NVPTX::STV_i32_v2_areg;
2690 Opcode = NVPTX::STV_i64_v2_areg;
2693 Opcode = NVPTX::STV_f32_v2_areg;
2696 Opcode = NVPTX::STV_f64_v2_areg;
2700 case NVPTXISD::StoreV4:
2701 switch (EltVT.getSimpleVT().SimpleTy) {
2705 Opcode = NVPTX::STV_i8_v4_areg;
2708 Opcode = NVPTX::STV_i16_v4_areg;
2711 Opcode = NVPTX::STV_i32_v4_areg;
2714 Opcode = NVPTX::STV_f32_v4_areg;
2720 StOps.push_back(N2);
2723 StOps.push_back(Chain);
2725 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2727 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2728 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2729 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2735 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2736 SDValue Chain = Node->getOperand(0);
2737 SDValue Offset = Node->getOperand(2);
2738 SDValue Flag = Node->getOperand(3);
2740 MemSDNode *Mem = cast<MemSDNode>(Node);
2743 switch (Node->getOpcode()) {
2746 case NVPTXISD::LoadParam:
2749 case NVPTXISD::LoadParamV2:
2752 case NVPTXISD::LoadParamV4:
2757 EVT EltVT = Node->getValueType(0);
2758 EVT MemVT = Mem->getMemoryVT();
2766 switch (MemVT.getSimpleVT().SimpleTy) {
2770 Opc = NVPTX::LoadParamMemI8;
2773 Opc = NVPTX::LoadParamMemI8;
2776 Opc = NVPTX::LoadParamMemI16;
2779 Opc = NVPTX::LoadParamMemI32;
2782 Opc = NVPTX::LoadParamMemI64;
2785 Opc = NVPTX::LoadParamMemF32;
2788 Opc = NVPTX::LoadParamMemF64;
2793 switch (MemVT.getSimpleVT().SimpleTy) {
2797 Opc = NVPTX::LoadParamMemV2I8;
2800 Opc = NVPTX::LoadParamMemV2I8;
2803 Opc = NVPTX::LoadParamMemV2I16;
2806 Opc = NVPTX::LoadParamMemV2I32;
2809 Opc = NVPTX::LoadParamMemV2I64;
2812 Opc = NVPTX::LoadParamMemV2F32;
2815 Opc = NVPTX::LoadParamMemV2F64;
2820 switch (MemVT.getSimpleVT().SimpleTy) {
2824 Opc = NVPTX::LoadParamMemV4I8;
2827 Opc = NVPTX::LoadParamMemV4I8;
2830 Opc = NVPTX::LoadParamMemV4I16;
2833 Opc = NVPTX::LoadParamMemV4I32;
2836 Opc = NVPTX::LoadParamMemV4F32;
2844 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2845 } else if (VecSize == 2) {
2846 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2848 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2849 VTs = CurDAG->getVTList(EVTs);
2852 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2854 SmallVector<SDValue, 2> Ops;
2855 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2856 Ops.push_back(Chain);
2857 Ops.push_back(Flag);
2859 ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
2863 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2865 SDValue Chain = N->getOperand(0);
2866 SDValue Offset = N->getOperand(1);
2867 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2868 MemSDNode *Mem = cast<MemSDNode>(N);
2870 // How many elements do we have?
2871 unsigned NumElts = 1;
2872 switch (N->getOpcode()) {
2875 case NVPTXISD::StoreRetval:
2878 case NVPTXISD::StoreRetvalV2:
2881 case NVPTXISD::StoreRetvalV4:
2886 // Build vector of operands
2887 SmallVector<SDValue, 6> Ops;
2888 for (unsigned i = 0; i < NumElts; ++i)
2889 Ops.push_back(N->getOperand(i + 2));
2890 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2891 Ops.push_back(Chain);
2893 // Determine target opcode
2894 // If we have an i1, use an 8-bit store. The lowering code in
2895 // NVPTXISelLowering will have already emitted an upcast.
2896 unsigned Opcode = 0;
2901 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2905 Opcode = NVPTX::StoreRetvalI8;
2908 Opcode = NVPTX::StoreRetvalI8;
2911 Opcode = NVPTX::StoreRetvalI16;
2914 Opcode = NVPTX::StoreRetvalI32;
2917 Opcode = NVPTX::StoreRetvalI64;
2920 Opcode = NVPTX::StoreRetvalF32;
2923 Opcode = NVPTX::StoreRetvalF64;
2928 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2932 Opcode = NVPTX::StoreRetvalV2I8;
2935 Opcode = NVPTX::StoreRetvalV2I8;
2938 Opcode = NVPTX::StoreRetvalV2I16;
2941 Opcode = NVPTX::StoreRetvalV2I32;
2944 Opcode = NVPTX::StoreRetvalV2I64;
2947 Opcode = NVPTX::StoreRetvalV2F32;
2950 Opcode = NVPTX::StoreRetvalV2F64;
2955 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2959 Opcode = NVPTX::StoreRetvalV4I8;
2962 Opcode = NVPTX::StoreRetvalV4I8;
2965 Opcode = NVPTX::StoreRetvalV4I16;
2968 Opcode = NVPTX::StoreRetvalV4I32;
2971 Opcode = NVPTX::StoreRetvalV4F32;
2978 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2979 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2980 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2981 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2983 ReplaceNode(N, Ret);
2987 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2989 SDValue Chain = N->getOperand(0);
2990 SDValue Param = N->getOperand(1);
2991 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2992 SDValue Offset = N->getOperand(2);
2993 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2994 MemSDNode *Mem = cast<MemSDNode>(N);
2995 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2997 // How many elements do we have?
2998 unsigned NumElts = 1;
2999 switch (N->getOpcode()) {
3002 case NVPTXISD::StoreParamU32:
3003 case NVPTXISD::StoreParamS32:
3004 case NVPTXISD::StoreParam:
3007 case NVPTXISD::StoreParamV2:
3010 case NVPTXISD::StoreParamV4:
3015 // Build vector of operands
3016 SmallVector<SDValue, 8> Ops;
3017 for (unsigned i = 0; i < NumElts; ++i)
3018 Ops.push_back(N->getOperand(i + 3));
3019 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3020 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
3021 Ops.push_back(Chain);
3022 Ops.push_back(Flag);
3024 // Determine target opcode
3025 // If we have an i1, use an 8-bit store. The lowering code in
3026 // NVPTXISelLowering will have already emitted an upcast.
3027 unsigned Opcode = 0;
3028 switch (N->getOpcode()) {
3034 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3038 Opcode = NVPTX::StoreParamI8;
3041 Opcode = NVPTX::StoreParamI8;
3044 Opcode = NVPTX::StoreParamI16;
3047 Opcode = NVPTX::StoreParamI32;
3050 Opcode = NVPTX::StoreParamI64;
3053 Opcode = NVPTX::StoreParamF32;
3056 Opcode = NVPTX::StoreParamF64;
3061 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3065 Opcode = NVPTX::StoreParamV2I8;
3068 Opcode = NVPTX::StoreParamV2I8;
3071 Opcode = NVPTX::StoreParamV2I16;
3074 Opcode = NVPTX::StoreParamV2I32;
3077 Opcode = NVPTX::StoreParamV2I64;
3080 Opcode = NVPTX::StoreParamV2F32;
3083 Opcode = NVPTX::StoreParamV2F64;
3088 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3092 Opcode = NVPTX::StoreParamV4I8;
3095 Opcode = NVPTX::StoreParamV4I8;
3098 Opcode = NVPTX::StoreParamV4I16;
3101 Opcode = NVPTX::StoreParamV4I32;
3104 Opcode = NVPTX::StoreParamV4F32;
3110 // Special case: if we have a sign-extend/zero-extend node, insert the
3111 // conversion instruction first, and use that as the value operand to
3112 // the selected StoreParam node.
3113 case NVPTXISD::StoreParamU32: {
3114 Opcode = NVPTX::StoreParamI32;
3115 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3117 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3118 MVT::i32, Ops[0], CvtNone);
3119 Ops[0] = SDValue(Cvt, 0);
3122 case NVPTXISD::StoreParamS32: {
3123 Opcode = NVPTX::StoreParamI32;
3124 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3126 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3127 MVT::i32, Ops[0], CvtNone);
3128 Ops[0] = SDValue(Cvt, 0);
3133 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3135 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3136 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3137 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3138 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3140 ReplaceNode(N, Ret);
3144 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
3145 SDValue Chain = N->getOperand(0);
3147 SmallVector<SDValue, 8> Ops;
3149 switch (N->getOpcode()) {
3150 default: return false;
3151 case NVPTXISD::Tex1DFloatS32:
3152 Opc = NVPTX::TEX_1D_F32_S32;
3154 case NVPTXISD::Tex1DFloatFloat:
3155 Opc = NVPTX::TEX_1D_F32_F32;
3157 case NVPTXISD::Tex1DFloatFloatLevel:
3158 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3160 case NVPTXISD::Tex1DFloatFloatGrad:
3161 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3163 case NVPTXISD::Tex1DS32S32:
3164 Opc = NVPTX::TEX_1D_S32_S32;
3166 case NVPTXISD::Tex1DS32Float:
3167 Opc = NVPTX::TEX_1D_S32_F32;
3169 case NVPTXISD::Tex1DS32FloatLevel:
3170 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3172 case NVPTXISD::Tex1DS32FloatGrad:
3173 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3175 case NVPTXISD::Tex1DU32S32:
3176 Opc = NVPTX::TEX_1D_U32_S32;
3178 case NVPTXISD::Tex1DU32Float:
3179 Opc = NVPTX::TEX_1D_U32_F32;
3181 case NVPTXISD::Tex1DU32FloatLevel:
3182 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3184 case NVPTXISD::Tex1DU32FloatGrad:
3185 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3187 case NVPTXISD::Tex1DArrayFloatS32:
3188 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3190 case NVPTXISD::Tex1DArrayFloatFloat:
3191 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3193 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3194 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3196 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3197 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3199 case NVPTXISD::Tex1DArrayS32S32:
3200 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3202 case NVPTXISD::Tex1DArrayS32Float:
3203 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3205 case NVPTXISD::Tex1DArrayS32FloatLevel:
3206 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3208 case NVPTXISD::Tex1DArrayS32FloatGrad:
3209 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3211 case NVPTXISD::Tex1DArrayU32S32:
3212 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3214 case NVPTXISD::Tex1DArrayU32Float:
3215 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3217 case NVPTXISD::Tex1DArrayU32FloatLevel:
3218 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3220 case NVPTXISD::Tex1DArrayU32FloatGrad:
3221 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3223 case NVPTXISD::Tex2DFloatS32:
3224 Opc = NVPTX::TEX_2D_F32_S32;
3226 case NVPTXISD::Tex2DFloatFloat:
3227 Opc = NVPTX::TEX_2D_F32_F32;
3229 case NVPTXISD::Tex2DFloatFloatLevel:
3230 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3232 case NVPTXISD::Tex2DFloatFloatGrad:
3233 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3235 case NVPTXISD::Tex2DS32S32:
3236 Opc = NVPTX::TEX_2D_S32_S32;
3238 case NVPTXISD::Tex2DS32Float:
3239 Opc = NVPTX::TEX_2D_S32_F32;
3241 case NVPTXISD::Tex2DS32FloatLevel:
3242 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3244 case NVPTXISD::Tex2DS32FloatGrad:
3245 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3247 case NVPTXISD::Tex2DU32S32:
3248 Opc = NVPTX::TEX_2D_U32_S32;
3250 case NVPTXISD::Tex2DU32Float:
3251 Opc = NVPTX::TEX_2D_U32_F32;
3253 case NVPTXISD::Tex2DU32FloatLevel:
3254 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3256 case NVPTXISD::Tex2DU32FloatGrad:
3257 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3259 case NVPTXISD::Tex2DArrayFloatS32:
3260 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3262 case NVPTXISD::Tex2DArrayFloatFloat:
3263 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3265 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3266 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3268 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3269 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3271 case NVPTXISD::Tex2DArrayS32S32:
3272 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3274 case NVPTXISD::Tex2DArrayS32Float:
3275 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3277 case NVPTXISD::Tex2DArrayS32FloatLevel:
3278 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3280 case NVPTXISD::Tex2DArrayS32FloatGrad:
3281 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3283 case NVPTXISD::Tex2DArrayU32S32:
3284 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3286 case NVPTXISD::Tex2DArrayU32Float:
3287 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3289 case NVPTXISD::Tex2DArrayU32FloatLevel:
3290 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3292 case NVPTXISD::Tex2DArrayU32FloatGrad:
3293 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3295 case NVPTXISD::Tex3DFloatS32:
3296 Opc = NVPTX::TEX_3D_F32_S32;
3298 case NVPTXISD::Tex3DFloatFloat:
3299 Opc = NVPTX::TEX_3D_F32_F32;
3301 case NVPTXISD::Tex3DFloatFloatLevel:
3302 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3304 case NVPTXISD::Tex3DFloatFloatGrad:
3305 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3307 case NVPTXISD::Tex3DS32S32:
3308 Opc = NVPTX::TEX_3D_S32_S32;
3310 case NVPTXISD::Tex3DS32Float:
3311 Opc = NVPTX::TEX_3D_S32_F32;
3313 case NVPTXISD::Tex3DS32FloatLevel:
3314 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3316 case NVPTXISD::Tex3DS32FloatGrad:
3317 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3319 case NVPTXISD::Tex3DU32S32:
3320 Opc = NVPTX::TEX_3D_U32_S32;
3322 case NVPTXISD::Tex3DU32Float:
3323 Opc = NVPTX::TEX_3D_U32_F32;
3325 case NVPTXISD::Tex3DU32FloatLevel:
3326 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3328 case NVPTXISD::Tex3DU32FloatGrad:
3329 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3331 case NVPTXISD::TexCubeFloatFloat:
3332 Opc = NVPTX::TEX_CUBE_F32_F32;
3334 case NVPTXISD::TexCubeFloatFloatLevel:
3335 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3337 case NVPTXISD::TexCubeS32Float:
3338 Opc = NVPTX::TEX_CUBE_S32_F32;
3340 case NVPTXISD::TexCubeS32FloatLevel:
3341 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3343 case NVPTXISD::TexCubeU32Float:
3344 Opc = NVPTX::TEX_CUBE_U32_F32;
3346 case NVPTXISD::TexCubeU32FloatLevel:
3347 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3349 case NVPTXISD::TexCubeArrayFloatFloat:
3350 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3352 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3353 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3355 case NVPTXISD::TexCubeArrayS32Float:
3356 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3358 case NVPTXISD::TexCubeArrayS32FloatLevel:
3359 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3361 case NVPTXISD::TexCubeArrayU32Float:
3362 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3364 case NVPTXISD::TexCubeArrayU32FloatLevel:
3365 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3367 case NVPTXISD::Tld4R2DFloatFloat:
3368 Opc = NVPTX::TLD4_R_2D_F32_F32;
3370 case NVPTXISD::Tld4G2DFloatFloat:
3371 Opc = NVPTX::TLD4_G_2D_F32_F32;
3373 case NVPTXISD::Tld4B2DFloatFloat:
3374 Opc = NVPTX::TLD4_B_2D_F32_F32;
3376 case NVPTXISD::Tld4A2DFloatFloat:
3377 Opc = NVPTX::TLD4_A_2D_F32_F32;
3379 case NVPTXISD::Tld4R2DS64Float:
3380 Opc = NVPTX::TLD4_R_2D_S32_F32;
3382 case NVPTXISD::Tld4G2DS64Float:
3383 Opc = NVPTX::TLD4_G_2D_S32_F32;
3385 case NVPTXISD::Tld4B2DS64Float:
3386 Opc = NVPTX::TLD4_B_2D_S32_F32;
3388 case NVPTXISD::Tld4A2DS64Float:
3389 Opc = NVPTX::TLD4_A_2D_S32_F32;
3391 case NVPTXISD::Tld4R2DU64Float:
3392 Opc = NVPTX::TLD4_R_2D_U32_F32;
3394 case NVPTXISD::Tld4G2DU64Float:
3395 Opc = NVPTX::TLD4_G_2D_U32_F32;
3397 case NVPTXISD::Tld4B2DU64Float:
3398 Opc = NVPTX::TLD4_B_2D_U32_F32;
3400 case NVPTXISD::Tld4A2DU64Float:
3401 Opc = NVPTX::TLD4_A_2D_U32_F32;
3403 case NVPTXISD::TexUnified1DFloatS32:
3404 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3406 case NVPTXISD::TexUnified1DFloatFloat:
3407 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3409 case NVPTXISD::TexUnified1DFloatFloatLevel:
3410 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3412 case NVPTXISD::TexUnified1DFloatFloatGrad:
3413 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3415 case NVPTXISD::TexUnified1DS32S32:
3416 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3418 case NVPTXISD::TexUnified1DS32Float:
3419 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3421 case NVPTXISD::TexUnified1DS32FloatLevel:
3422 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3424 case NVPTXISD::TexUnified1DS32FloatGrad:
3425 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3427 case NVPTXISD::TexUnified1DU32S32:
3428 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3430 case NVPTXISD::TexUnified1DU32Float:
3431 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3433 case NVPTXISD::TexUnified1DU32FloatLevel:
3434 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3436 case NVPTXISD::TexUnified1DU32FloatGrad:
3437 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3439 case NVPTXISD::TexUnified1DArrayFloatS32:
3440 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3442 case NVPTXISD::TexUnified1DArrayFloatFloat:
3443 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3445 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3446 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3448 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3449 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3451 case NVPTXISD::TexUnified1DArrayS32S32:
3452 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3454 case NVPTXISD::TexUnified1DArrayS32Float:
3455 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3457 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3458 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3460 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3461 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3463 case NVPTXISD::TexUnified1DArrayU32S32:
3464 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3466 case NVPTXISD::TexUnified1DArrayU32Float:
3467 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3469 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3470 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3472 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3473 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3475 case NVPTXISD::TexUnified2DFloatS32:
3476 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3478 case NVPTXISD::TexUnified2DFloatFloat:
3479 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3481 case NVPTXISD::TexUnified2DFloatFloatLevel:
3482 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3484 case NVPTXISD::TexUnified2DFloatFloatGrad:
3485 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3487 case NVPTXISD::TexUnified2DS32S32:
3488 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3490 case NVPTXISD::TexUnified2DS32Float:
3491 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3493 case NVPTXISD::TexUnified2DS32FloatLevel:
3494 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3496 case NVPTXISD::TexUnified2DS32FloatGrad:
3497 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3499 case NVPTXISD::TexUnified2DU32S32:
3500 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3502 case NVPTXISD::TexUnified2DU32Float:
3503 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3505 case NVPTXISD::TexUnified2DU32FloatLevel:
3506 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3508 case NVPTXISD::TexUnified2DU32FloatGrad:
3509 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3511 case NVPTXISD::TexUnified2DArrayFloatS32:
3512 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3514 case NVPTXISD::TexUnified2DArrayFloatFloat:
3515 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3517 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3518 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3520 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3521 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3523 case NVPTXISD::TexUnified2DArrayS32S32:
3524 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3526 case NVPTXISD::TexUnified2DArrayS32Float:
3527 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3529 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3530 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3532 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3533 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3535 case NVPTXISD::TexUnified2DArrayU32S32:
3536 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3538 case NVPTXISD::TexUnified2DArrayU32Float:
3539 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3541 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3542 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3544 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3545 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3547 case NVPTXISD::TexUnified3DFloatS32:
3548 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3550 case NVPTXISD::TexUnified3DFloatFloat:
3551 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3553 case NVPTXISD::TexUnified3DFloatFloatLevel:
3554 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3556 case NVPTXISD::TexUnified3DFloatFloatGrad:
3557 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3559 case NVPTXISD::TexUnified3DS32S32:
3560 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3562 case NVPTXISD::TexUnified3DS32Float:
3563 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3565 case NVPTXISD::TexUnified3DS32FloatLevel:
3566 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3568 case NVPTXISD::TexUnified3DS32FloatGrad:
3569 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3571 case NVPTXISD::TexUnified3DU32S32:
3572 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3574 case NVPTXISD::TexUnified3DU32Float:
3575 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3577 case NVPTXISD::TexUnified3DU32FloatLevel:
3578 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3580 case NVPTXISD::TexUnified3DU32FloatGrad:
3581 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3583 case NVPTXISD::TexUnifiedCubeFloatFloat:
3584 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3586 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3587 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3589 case NVPTXISD::TexUnifiedCubeS32Float:
3590 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3592 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3593 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3595 case NVPTXISD::TexUnifiedCubeU32Float:
3596 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3598 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3599 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3601 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3602 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3604 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3605 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3607 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3608 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3610 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3611 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3613 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3614 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3616 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3617 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3619 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3620 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3622 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3623 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3625 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3626 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3628 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3629 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3631 case NVPTXISD::Tld4UnifiedR2DS64Float:
3632 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3634 case NVPTXISD::Tld4UnifiedG2DS64Float:
3635 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3637 case NVPTXISD::Tld4UnifiedB2DS64Float:
3638 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3640 case NVPTXISD::Tld4UnifiedA2DS64Float:
3641 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3643 case NVPTXISD::Tld4UnifiedR2DU64Float:
3644 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3646 case NVPTXISD::Tld4UnifiedG2DU64Float:
3647 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3649 case NVPTXISD::Tld4UnifiedB2DU64Float:
3650 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3652 case NVPTXISD::Tld4UnifiedA2DU64Float:
3653 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3657 // Copy over operands
3658 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3659 Ops.push_back(N->getOperand(i));
3662 Ops.push_back(Chain);
3663 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3667 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
3668 SDValue Chain = N->getOperand(0);
3669 SDValue TexHandle = N->getOperand(1);
3671 SmallVector<SDValue, 8> Ops;
3672 switch (N->getOpcode()) {
3673 default: return false;
3674 case NVPTXISD::Suld1DI8Clamp:
3675 Opc = NVPTX::SULD_1D_I8_CLAMP;
3676 Ops.push_back(TexHandle);
3677 Ops.push_back(N->getOperand(2));
3678 Ops.push_back(Chain);
3680 case NVPTXISD::Suld1DI16Clamp:
3681 Opc = NVPTX::SULD_1D_I16_CLAMP;
3682 Ops.push_back(TexHandle);
3683 Ops.push_back(N->getOperand(2));
3684 Ops.push_back(Chain);
3686 case NVPTXISD::Suld1DI32Clamp:
3687 Opc = NVPTX::SULD_1D_I32_CLAMP;
3688 Ops.push_back(TexHandle);
3689 Ops.push_back(N->getOperand(2));
3690 Ops.push_back(Chain);
3692 case NVPTXISD::Suld1DI64Clamp:
3693 Opc = NVPTX::SULD_1D_I64_CLAMP;
3694 Ops.push_back(TexHandle);
3695 Ops.push_back(N->getOperand(2));
3696 Ops.push_back(Chain);
3698 case NVPTXISD::Suld1DV2I8Clamp:
3699 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3700 Ops.push_back(TexHandle);
3701 Ops.push_back(N->getOperand(2));
3702 Ops.push_back(Chain);
3704 case NVPTXISD::Suld1DV2I16Clamp:
3705 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3706 Ops.push_back(TexHandle);
3707 Ops.push_back(N->getOperand(2));
3708 Ops.push_back(Chain);
3710 case NVPTXISD::Suld1DV2I32Clamp:
3711 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3712 Ops.push_back(TexHandle);
3713 Ops.push_back(N->getOperand(2));
3714 Ops.push_back(Chain);
3716 case NVPTXISD::Suld1DV2I64Clamp:
3717 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3718 Ops.push_back(TexHandle);
3719 Ops.push_back(N->getOperand(2));
3720 Ops.push_back(Chain);
3722 case NVPTXISD::Suld1DV4I8Clamp:
3723 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3724 Ops.push_back(TexHandle);
3725 Ops.push_back(N->getOperand(2));
3726 Ops.push_back(Chain);
3728 case NVPTXISD::Suld1DV4I16Clamp:
3729 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3730 Ops.push_back(TexHandle);
3731 Ops.push_back(N->getOperand(2));
3732 Ops.push_back(Chain);
3734 case NVPTXISD::Suld1DV4I32Clamp:
3735 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3736 Ops.push_back(TexHandle);
3737 Ops.push_back(N->getOperand(2));
3738 Ops.push_back(Chain);
3740 case NVPTXISD::Suld1DArrayI8Clamp:
3741 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3742 Ops.push_back(TexHandle);
3743 Ops.push_back(N->getOperand(2));
3744 Ops.push_back(N->getOperand(3));
3745 Ops.push_back(Chain);
3747 case NVPTXISD::Suld1DArrayI16Clamp:
3748 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3749 Ops.push_back(TexHandle);
3750 Ops.push_back(N->getOperand(2));
3751 Ops.push_back(N->getOperand(3));
3752 Ops.push_back(Chain);
3754 case NVPTXISD::Suld1DArrayI32Clamp:
3755 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3756 Ops.push_back(TexHandle);
3757 Ops.push_back(N->getOperand(2));
3758 Ops.push_back(N->getOperand(3));
3759 Ops.push_back(Chain);
3761 case NVPTXISD::Suld1DArrayI64Clamp:
3762 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3763 Ops.push_back(TexHandle);
3764 Ops.push_back(N->getOperand(2));
3765 Ops.push_back(N->getOperand(3));
3766 Ops.push_back(Chain);
3768 case NVPTXISD::Suld1DArrayV2I8Clamp:
3769 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3770 Ops.push_back(TexHandle);
3771 Ops.push_back(N->getOperand(2));
3772 Ops.push_back(N->getOperand(3));
3773 Ops.push_back(Chain);
3775 case NVPTXISD::Suld1DArrayV2I16Clamp:
3776 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3777 Ops.push_back(TexHandle);
3778 Ops.push_back(N->getOperand(2));
3779 Ops.push_back(N->getOperand(3));
3780 Ops.push_back(Chain);
3782 case NVPTXISD::Suld1DArrayV2I32Clamp:
3783 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3784 Ops.push_back(TexHandle);
3785 Ops.push_back(N->getOperand(2));
3786 Ops.push_back(N->getOperand(3));
3787 Ops.push_back(Chain);
3789 case NVPTXISD::Suld1DArrayV2I64Clamp:
3790 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3791 Ops.push_back(TexHandle);
3792 Ops.push_back(N->getOperand(2));
3793 Ops.push_back(N->getOperand(3));
3794 Ops.push_back(Chain);
3796 case NVPTXISD::Suld1DArrayV4I8Clamp:
3797 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3798 Ops.push_back(TexHandle);
3799 Ops.push_back(N->getOperand(2));
3800 Ops.push_back(N->getOperand(3));
3801 Ops.push_back(Chain);
3803 case NVPTXISD::Suld1DArrayV4I16Clamp:
3804 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3805 Ops.push_back(TexHandle);
3806 Ops.push_back(N->getOperand(2));
3807 Ops.push_back(N->getOperand(3));
3808 Ops.push_back(Chain);
3810 case NVPTXISD::Suld1DArrayV4I32Clamp:
3811 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3812 Ops.push_back(TexHandle);
3813 Ops.push_back(N->getOperand(2));
3814 Ops.push_back(N->getOperand(3));
3815 Ops.push_back(Chain);
3817 case NVPTXISD::Suld2DI8Clamp:
3818 Opc = NVPTX::SULD_2D_I8_CLAMP;
3819 Ops.push_back(TexHandle);
3820 Ops.push_back(N->getOperand(2));
3821 Ops.push_back(N->getOperand(3));
3822 Ops.push_back(Chain);
3824 case NVPTXISD::Suld2DI16Clamp:
3825 Opc = NVPTX::SULD_2D_I16_CLAMP;
3826 Ops.push_back(TexHandle);
3827 Ops.push_back(N->getOperand(2));
3828 Ops.push_back(N->getOperand(3));
3829 Ops.push_back(Chain);
3831 case NVPTXISD::Suld2DI32Clamp:
3832 Opc = NVPTX::SULD_2D_I32_CLAMP;
3833 Ops.push_back(TexHandle);
3834 Ops.push_back(N->getOperand(2));
3835 Ops.push_back(N->getOperand(3));
3836 Ops.push_back(Chain);
3838 case NVPTXISD::Suld2DI64Clamp:
3839 Opc = NVPTX::SULD_2D_I64_CLAMP;
3840 Ops.push_back(TexHandle);
3841 Ops.push_back(N->getOperand(2));
3842 Ops.push_back(N->getOperand(3));
3843 Ops.push_back(Chain);
3845 case NVPTXISD::Suld2DV2I8Clamp:
3846 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3847 Ops.push_back(TexHandle);
3848 Ops.push_back(N->getOperand(2));
3849 Ops.push_back(N->getOperand(3));
3850 Ops.push_back(Chain);
3852 case NVPTXISD::Suld2DV2I16Clamp:
3853 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3854 Ops.push_back(TexHandle);
3855 Ops.push_back(N->getOperand(2));
3856 Ops.push_back(N->getOperand(3));
3857 Ops.push_back(Chain);
3859 case NVPTXISD::Suld2DV2I32Clamp:
3860 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3861 Ops.push_back(TexHandle);
3862 Ops.push_back(N->getOperand(2));
3863 Ops.push_back(N->getOperand(3));
3864 Ops.push_back(Chain);
3866 case NVPTXISD::Suld2DV2I64Clamp:
3867 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3868 Ops.push_back(TexHandle);
3869 Ops.push_back(N->getOperand(2));
3870 Ops.push_back(N->getOperand(3));
3871 Ops.push_back(Chain);
3873 case NVPTXISD::Suld2DV4I8Clamp:
3874 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3875 Ops.push_back(TexHandle);
3876 Ops.push_back(N->getOperand(2));
3877 Ops.push_back(N->getOperand(3));
3878 Ops.push_back(Chain);
3880 case NVPTXISD::Suld2DV4I16Clamp:
3881 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3882 Ops.push_back(TexHandle);
3883 Ops.push_back(N->getOperand(2));
3884 Ops.push_back(N->getOperand(3));
3885 Ops.push_back(Chain);
3887 case NVPTXISD::Suld2DV4I32Clamp:
3888 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3889 Ops.push_back(TexHandle);
3890 Ops.push_back(N->getOperand(2));
3891 Ops.push_back(N->getOperand(3));
3892 Ops.push_back(Chain);
3894 case NVPTXISD::Suld2DArrayI8Clamp:
3895 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3896 Ops.push_back(TexHandle);
3897 Ops.push_back(N->getOperand(2));
3898 Ops.push_back(N->getOperand(3));
3899 Ops.push_back(N->getOperand(4));
3900 Ops.push_back(Chain);
3902 case NVPTXISD::Suld2DArrayI16Clamp:
3903 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3904 Ops.push_back(TexHandle);
3905 Ops.push_back(N->getOperand(2));
3906 Ops.push_back(N->getOperand(3));
3907 Ops.push_back(N->getOperand(4));
3908 Ops.push_back(Chain);
3910 case NVPTXISD::Suld2DArrayI32Clamp:
3911 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3912 Ops.push_back(TexHandle);
3913 Ops.push_back(N->getOperand(2));
3914 Ops.push_back(N->getOperand(3));
3915 Ops.push_back(N->getOperand(4));
3916 Ops.push_back(Chain);
3918 case NVPTXISD::Suld2DArrayI64Clamp:
3919 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3920 Ops.push_back(TexHandle);
3921 Ops.push_back(N->getOperand(2));
3922 Ops.push_back(N->getOperand(3));
3923 Ops.push_back(N->getOperand(4));
3924 Ops.push_back(Chain);
3926 case NVPTXISD::Suld2DArrayV2I8Clamp:
3927 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3928 Ops.push_back(TexHandle);
3929 Ops.push_back(N->getOperand(2));
3930 Ops.push_back(N->getOperand(3));
3931 Ops.push_back(N->getOperand(4));
3932 Ops.push_back(Chain);
3934 case NVPTXISD::Suld2DArrayV2I16Clamp:
3935 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3936 Ops.push_back(TexHandle);
3937 Ops.push_back(N->getOperand(2));
3938 Ops.push_back(N->getOperand(3));
3939 Ops.push_back(N->getOperand(4));
3940 Ops.push_back(Chain);
3942 case NVPTXISD::Suld2DArrayV2I32Clamp:
3943 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3944 Ops.push_back(TexHandle);
3945 Ops.push_back(N->getOperand(2));
3946 Ops.push_back(N->getOperand(3));
3947 Ops.push_back(N->getOperand(4));
3948 Ops.push_back(Chain);
3950 case NVPTXISD::Suld2DArrayV2I64Clamp:
3951 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3952 Ops.push_back(TexHandle);
3953 Ops.push_back(N->getOperand(2));
3954 Ops.push_back(N->getOperand(3));
3955 Ops.push_back(N->getOperand(4));
3956 Ops.push_back(Chain);
3958 case NVPTXISD::Suld2DArrayV4I8Clamp:
3959 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3960 Ops.push_back(TexHandle);
3961 Ops.push_back(N->getOperand(2));
3962 Ops.push_back(N->getOperand(3));
3963 Ops.push_back(N->getOperand(4));
3964 Ops.push_back(Chain);
3966 case NVPTXISD::Suld2DArrayV4I16Clamp:
3967 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3968 Ops.push_back(TexHandle);
3969 Ops.push_back(N->getOperand(2));
3970 Ops.push_back(N->getOperand(3));
3971 Ops.push_back(N->getOperand(4));
3972 Ops.push_back(Chain);
3974 case NVPTXISD::Suld2DArrayV4I32Clamp:
3975 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3976 Ops.push_back(TexHandle);
3977 Ops.push_back(N->getOperand(2));
3978 Ops.push_back(N->getOperand(3));
3979 Ops.push_back(N->getOperand(4));
3980 Ops.push_back(Chain);
3982 case NVPTXISD::Suld3DI8Clamp:
3983 Opc = NVPTX::SULD_3D_I8_CLAMP;
3984 Ops.push_back(TexHandle);
3985 Ops.push_back(N->getOperand(2));
3986 Ops.push_back(N->getOperand(3));
3987 Ops.push_back(N->getOperand(4));
3988 Ops.push_back(Chain);
3990 case NVPTXISD::Suld3DI16Clamp:
3991 Opc = NVPTX::SULD_3D_I16_CLAMP;
3992 Ops.push_back(TexHandle);
3993 Ops.push_back(N->getOperand(2));
3994 Ops.push_back(N->getOperand(3));
3995 Ops.push_back(N->getOperand(4));
3996 Ops.push_back(Chain);
3998 case NVPTXISD::Suld3DI32Clamp:
3999 Opc = NVPTX::SULD_3D_I32_CLAMP;
4000 Ops.push_back(TexHandle);
4001 Ops.push_back(N->getOperand(2));
4002 Ops.push_back(N->getOperand(3));
4003 Ops.push_back(N->getOperand(4));
4004 Ops.push_back(Chain);
4006 case NVPTXISD::Suld3DI64Clamp:
4007 Opc = NVPTX::SULD_3D_I64_CLAMP;
4008 Ops.push_back(TexHandle);
4009 Ops.push_back(N->getOperand(2));
4010 Ops.push_back(N->getOperand(3));
4011 Ops.push_back(N->getOperand(4));
4012 Ops.push_back(Chain);
4014 case NVPTXISD::Suld3DV2I8Clamp:
4015 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4016 Ops.push_back(TexHandle);
4017 Ops.push_back(N->getOperand(2));
4018 Ops.push_back(N->getOperand(3));
4019 Ops.push_back(N->getOperand(4));
4020 Ops.push_back(Chain);
4022 case NVPTXISD::Suld3DV2I16Clamp:
4023 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4024 Ops.push_back(TexHandle);
4025 Ops.push_back(N->getOperand(2));
4026 Ops.push_back(N->getOperand(3));
4027 Ops.push_back(N->getOperand(4));
4028 Ops.push_back(Chain);
4030 case NVPTXISD::Suld3DV2I32Clamp:
4031 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4032 Ops.push_back(TexHandle);
4033 Ops.push_back(N->getOperand(2));
4034 Ops.push_back(N->getOperand(3));
4035 Ops.push_back(N->getOperand(4));
4036 Ops.push_back(Chain);
4038 case NVPTXISD::Suld3DV2I64Clamp:
4039 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4040 Ops.push_back(TexHandle);
4041 Ops.push_back(N->getOperand(2));
4042 Ops.push_back(N->getOperand(3));
4043 Ops.push_back(N->getOperand(4));
4044 Ops.push_back(Chain);
4046 case NVPTXISD::Suld3DV4I8Clamp:
4047 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4048 Ops.push_back(TexHandle);
4049 Ops.push_back(N->getOperand(2));
4050 Ops.push_back(N->getOperand(3));
4051 Ops.push_back(N->getOperand(4));
4052 Ops.push_back(Chain);
4054 case NVPTXISD::Suld3DV4I16Clamp:
4055 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4056 Ops.push_back(TexHandle);
4057 Ops.push_back(N->getOperand(2));
4058 Ops.push_back(N->getOperand(3));
4059 Ops.push_back(N->getOperand(4));
4060 Ops.push_back(Chain);
4062 case NVPTXISD::Suld3DV4I32Clamp:
4063 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4064 Ops.push_back(TexHandle);
4065 Ops.push_back(N->getOperand(2));
4066 Ops.push_back(N->getOperand(3));
4067 Ops.push_back(N->getOperand(4));
4068 Ops.push_back(Chain);
4070 case NVPTXISD::Suld1DI8Trap:
4071 Opc = NVPTX::SULD_1D_I8_TRAP;
4072 Ops.push_back(TexHandle);
4073 Ops.push_back(N->getOperand(2));
4074 Ops.push_back(Chain);
4076 case NVPTXISD::Suld1DI16Trap:
4077 Opc = NVPTX::SULD_1D_I16_TRAP;
4078 Ops.push_back(TexHandle);
4079 Ops.push_back(N->getOperand(2));
4080 Ops.push_back(Chain);
4082 case NVPTXISD::Suld1DI32Trap:
4083 Opc = NVPTX::SULD_1D_I32_TRAP;
4084 Ops.push_back(TexHandle);
4085 Ops.push_back(N->getOperand(2));
4086 Ops.push_back(Chain);
4088 case NVPTXISD::Suld1DI64Trap:
4089 Opc = NVPTX::SULD_1D_I64_TRAP;
4090 Ops.push_back(TexHandle);
4091 Ops.push_back(N->getOperand(2));
4092 Ops.push_back(Chain);
4094 case NVPTXISD::Suld1DV2I8Trap:
4095 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4096 Ops.push_back(TexHandle);
4097 Ops.push_back(N->getOperand(2));
4098 Ops.push_back(Chain);
4100 case NVPTXISD::Suld1DV2I16Trap:
4101 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4102 Ops.push_back(TexHandle);
4103 Ops.push_back(N->getOperand(2));
4104 Ops.push_back(Chain);
4106 case NVPTXISD::Suld1DV2I32Trap:
4107 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4108 Ops.push_back(TexHandle);
4109 Ops.push_back(N->getOperand(2));
4110 Ops.push_back(Chain);
4112 case NVPTXISD::Suld1DV2I64Trap:
4113 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4114 Ops.push_back(TexHandle);
4115 Ops.push_back(N->getOperand(2));
4116 Ops.push_back(Chain);
4118 case NVPTXISD::Suld1DV4I8Trap:
4119 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4120 Ops.push_back(TexHandle);
4121 Ops.push_back(N->getOperand(2));
4122 Ops.push_back(Chain);
4124 case NVPTXISD::Suld1DV4I16Trap:
4125 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4126 Ops.push_back(TexHandle);
4127 Ops.push_back(N->getOperand(2));
4128 Ops.push_back(Chain);
4130 case NVPTXISD::Suld1DV4I32Trap:
4131 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4132 Ops.push_back(TexHandle);
4133 Ops.push_back(N->getOperand(2));
4134 Ops.push_back(Chain);
4136 case NVPTXISD::Suld1DArrayI8Trap:
4137 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4138 Ops.push_back(TexHandle);
4139 Ops.push_back(N->getOperand(2));
4140 Ops.push_back(N->getOperand(3));
4141 Ops.push_back(Chain);
4143 case NVPTXISD::Suld1DArrayI16Trap:
4144 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4145 Ops.push_back(TexHandle);
4146 Ops.push_back(N->getOperand(2));
4147 Ops.push_back(N->getOperand(3));
4148 Ops.push_back(Chain);
4150 case NVPTXISD::Suld1DArrayI32Trap:
4151 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4152 Ops.push_back(TexHandle);
4153 Ops.push_back(N->getOperand(2));
4154 Ops.push_back(N->getOperand(3));
4155 Ops.push_back(Chain);
4157 case NVPTXISD::Suld1DArrayI64Trap:
4158 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4159 Ops.push_back(TexHandle);
4160 Ops.push_back(N->getOperand(2));
4161 Ops.push_back(N->getOperand(3));
4162 Ops.push_back(Chain);
4164 case NVPTXISD::Suld1DArrayV2I8Trap:
4165 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4166 Ops.push_back(TexHandle);
4167 Ops.push_back(N->getOperand(2));
4168 Ops.push_back(N->getOperand(3));
4169 Ops.push_back(Chain);
4171 case NVPTXISD::Suld1DArrayV2I16Trap:
4172 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4173 Ops.push_back(TexHandle);
4174 Ops.push_back(N->getOperand(2));
4175 Ops.push_back(N->getOperand(3));
4176 Ops.push_back(Chain);
4178 case NVPTXISD::Suld1DArrayV2I32Trap:
4179 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4180 Ops.push_back(TexHandle);
4181 Ops.push_back(N->getOperand(2));
4182 Ops.push_back(N->getOperand(3));
4183 Ops.push_back(Chain);
4185 case NVPTXISD::Suld1DArrayV2I64Trap:
4186 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4187 Ops.push_back(TexHandle);
4188 Ops.push_back(N->getOperand(2));
4189 Ops.push_back(N->getOperand(3));
4190 Ops.push_back(Chain);
4192 case NVPTXISD::Suld1DArrayV4I8Trap:
4193 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4194 Ops.push_back(TexHandle);
4195 Ops.push_back(N->getOperand(2));
4196 Ops.push_back(N->getOperand(3));
4197 Ops.push_back(Chain);
4199 case NVPTXISD::Suld1DArrayV4I16Trap:
4200 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4201 Ops.push_back(TexHandle);
4202 Ops.push_back(N->getOperand(2));
4203 Ops.push_back(N->getOperand(3));
4204 Ops.push_back(Chain);
4206 case NVPTXISD::Suld1DArrayV4I32Trap:
4207 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4208 Ops.push_back(TexHandle);
4209 Ops.push_back(N->getOperand(2));
4210 Ops.push_back(N->getOperand(3));
4211 Ops.push_back(Chain);
4213 case NVPTXISD::Suld2DI8Trap:
4214 Opc = NVPTX::SULD_2D_I8_TRAP;
4215 Ops.push_back(TexHandle);
4216 Ops.push_back(N->getOperand(2));
4217 Ops.push_back(N->getOperand(3));
4218 Ops.push_back(Chain);
4220 case NVPTXISD::Suld2DI16Trap:
4221 Opc = NVPTX::SULD_2D_I16_TRAP;
4222 Ops.push_back(TexHandle);
4223 Ops.push_back(N->getOperand(2));
4224 Ops.push_back(N->getOperand(3));
4225 Ops.push_back(Chain);
4227 case NVPTXISD::Suld2DI32Trap:
4228 Opc = NVPTX::SULD_2D_I32_TRAP;
4229 Ops.push_back(TexHandle);
4230 Ops.push_back(N->getOperand(2));
4231 Ops.push_back(N->getOperand(3));
4232 Ops.push_back(Chain);
4234 case NVPTXISD::Suld2DI64Trap:
4235 Opc = NVPTX::SULD_2D_I64_TRAP;
4236 Ops.push_back(TexHandle);
4237 Ops.push_back(N->getOperand(2));
4238 Ops.push_back(N->getOperand(3));
4239 Ops.push_back(Chain);
4241 case NVPTXISD::Suld2DV2I8Trap:
4242 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4243 Ops.push_back(TexHandle);
4244 Ops.push_back(N->getOperand(2));
4245 Ops.push_back(N->getOperand(3));
4246 Ops.push_back(Chain);
4248 case NVPTXISD::Suld2DV2I16Trap:
4249 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4250 Ops.push_back(TexHandle);
4251 Ops.push_back(N->getOperand(2));
4252 Ops.push_back(N->getOperand(3));
4253 Ops.push_back(Chain);
4255 case NVPTXISD::Suld2DV2I32Trap:
4256 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4257 Ops.push_back(TexHandle);
4258 Ops.push_back(N->getOperand(2));
4259 Ops.push_back(N->getOperand(3));
4260 Ops.push_back(Chain);
4262 case NVPTXISD::Suld2DV2I64Trap:
4263 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4264 Ops.push_back(TexHandle);
4265 Ops.push_back(N->getOperand(2));
4266 Ops.push_back(N->getOperand(3));
4267 Ops.push_back(Chain);
4269 case NVPTXISD::Suld2DV4I8Trap:
4270 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4271 Ops.push_back(TexHandle);
4272 Ops.push_back(N->getOperand(2));
4273 Ops.push_back(N->getOperand(3));
4274 Ops.push_back(Chain);
4276 case NVPTXISD::Suld2DV4I16Trap:
4277 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4278 Ops.push_back(TexHandle);
4279 Ops.push_back(N->getOperand(2));
4280 Ops.push_back(N->getOperand(3));
4281 Ops.push_back(Chain);
4283 case NVPTXISD::Suld2DV4I32Trap:
4284 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4285 Ops.push_back(TexHandle);
4286 Ops.push_back(N->getOperand(2));
4287 Ops.push_back(N->getOperand(3));
4288 Ops.push_back(Chain);
4290 case NVPTXISD::Suld2DArrayI8Trap:
4291 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4292 Ops.push_back(TexHandle);
4293 Ops.push_back(N->getOperand(2));
4294 Ops.push_back(N->getOperand(3));
4295 Ops.push_back(N->getOperand(4));
4296 Ops.push_back(Chain);
4298 case NVPTXISD::Suld2DArrayI16Trap:
4299 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4300 Ops.push_back(TexHandle);
4301 Ops.push_back(N->getOperand(2));
4302 Ops.push_back(N->getOperand(3));
4303 Ops.push_back(N->getOperand(4));
4304 Ops.push_back(Chain);
4306 case NVPTXISD::Suld2DArrayI32Trap:
4307 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4308 Ops.push_back(TexHandle);
4309 Ops.push_back(N->getOperand(2));
4310 Ops.push_back(N->getOperand(3));
4311 Ops.push_back(N->getOperand(4));
4312 Ops.push_back(Chain);
4314 case NVPTXISD::Suld2DArrayI64Trap:
4315 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4316 Ops.push_back(TexHandle);
4317 Ops.push_back(N->getOperand(2));
4318 Ops.push_back(N->getOperand(3));
4319 Ops.push_back(N->getOperand(4));
4320 Ops.push_back(Chain);
4322 case NVPTXISD::Suld2DArrayV2I8Trap:
4323 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4324 Ops.push_back(TexHandle);
4325 Ops.push_back(N->getOperand(2));
4326 Ops.push_back(N->getOperand(3));
4327 Ops.push_back(N->getOperand(4));
4328 Ops.push_back(Chain);
4330 case NVPTXISD::Suld2DArrayV2I16Trap:
4331 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4332 Ops.push_back(TexHandle);
4333 Ops.push_back(N->getOperand(2));
4334 Ops.push_back(N->getOperand(3));
4335 Ops.push_back(N->getOperand(4));
4336 Ops.push_back(Chain);
4338 case NVPTXISD::Suld2DArrayV2I32Trap:
4339 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4340 Ops.push_back(TexHandle);
4341 Ops.push_back(N->getOperand(2));
4342 Ops.push_back(N->getOperand(3));
4343 Ops.push_back(N->getOperand(4));
4344 Ops.push_back(Chain);
4346 case NVPTXISD::Suld2DArrayV2I64Trap:
4347 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4348 Ops.push_back(TexHandle);
4349 Ops.push_back(N->getOperand(2));
4350 Ops.push_back(N->getOperand(3));
4351 Ops.push_back(N->getOperand(4));
4352 Ops.push_back(Chain);
4354 case NVPTXISD::Suld2DArrayV4I8Trap:
4355 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4356 Ops.push_back(TexHandle);
4357 Ops.push_back(N->getOperand(2));
4358 Ops.push_back(N->getOperand(3));
4359 Ops.push_back(N->getOperand(4));
4360 Ops.push_back(Chain);
4362 case NVPTXISD::Suld2DArrayV4I16Trap:
4363 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4364 Ops.push_back(TexHandle);
4365 Ops.push_back(N->getOperand(2));
4366 Ops.push_back(N->getOperand(3));
4367 Ops.push_back(N->getOperand(4));
4368 Ops.push_back(Chain);
4370 case NVPTXISD::Suld2DArrayV4I32Trap:
4371 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4372 Ops.push_back(TexHandle);
4373 Ops.push_back(N->getOperand(2));
4374 Ops.push_back(N->getOperand(3));
4375 Ops.push_back(N->getOperand(4));
4376 Ops.push_back(Chain);
4378 case NVPTXISD::Suld3DI8Trap:
4379 Opc = NVPTX::SULD_3D_I8_TRAP;
4380 Ops.push_back(TexHandle);
4381 Ops.push_back(N->getOperand(2));
4382 Ops.push_back(N->getOperand(3));
4383 Ops.push_back(N->getOperand(4));
4384 Ops.push_back(Chain);
4386 case NVPTXISD::Suld3DI16Trap:
4387 Opc = NVPTX::SULD_3D_I16_TRAP;
4388 Ops.push_back(TexHandle);
4389 Ops.push_back(N->getOperand(2));
4390 Ops.push_back(N->getOperand(3));
4391 Ops.push_back(N->getOperand(4));
4392 Ops.push_back(Chain);
4394 case NVPTXISD::Suld3DI32Trap:
4395 Opc = NVPTX::SULD_3D_I32_TRAP;
4396 Ops.push_back(TexHandle);
4397 Ops.push_back(N->getOperand(2));
4398 Ops.push_back(N->getOperand(3));
4399 Ops.push_back(N->getOperand(4));
4400 Ops.push_back(Chain);
4402 case NVPTXISD::Suld3DI64Trap:
4403 Opc = NVPTX::SULD_3D_I64_TRAP;
4404 Ops.push_back(TexHandle);
4405 Ops.push_back(N->getOperand(2));
4406 Ops.push_back(N->getOperand(3));
4407 Ops.push_back(N->getOperand(4));
4408 Ops.push_back(Chain);
4410 case NVPTXISD::Suld3DV2I8Trap:
4411 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4412 Ops.push_back(TexHandle);
4413 Ops.push_back(N->getOperand(2));
4414 Ops.push_back(N->getOperand(3));
4415 Ops.push_back(N->getOperand(4));
4416 Ops.push_back(Chain);
4418 case NVPTXISD::Suld3DV2I16Trap:
4419 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4420 Ops.push_back(TexHandle);
4421 Ops.push_back(N->getOperand(2));
4422 Ops.push_back(N->getOperand(3));
4423 Ops.push_back(N->getOperand(4));
4424 Ops.push_back(Chain);
4426 case NVPTXISD::Suld3DV2I32Trap:
4427 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4428 Ops.push_back(TexHandle);
4429 Ops.push_back(N->getOperand(2));
4430 Ops.push_back(N->getOperand(3));
4431 Ops.push_back(N->getOperand(4));
4432 Ops.push_back(Chain);
4434 case NVPTXISD::Suld3DV2I64Trap:
4435 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4436 Ops.push_back(TexHandle);
4437 Ops.push_back(N->getOperand(2));
4438 Ops.push_back(N->getOperand(3));
4439 Ops.push_back(N->getOperand(4));
4440 Ops.push_back(Chain);
4442 case NVPTXISD::Suld3DV4I8Trap:
4443 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4444 Ops.push_back(TexHandle);
4445 Ops.push_back(N->getOperand(2));
4446 Ops.push_back(N->getOperand(3));
4447 Ops.push_back(N->getOperand(4));
4448 Ops.push_back(Chain);
4450 case NVPTXISD::Suld3DV4I16Trap:
4451 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4452 Ops.push_back(TexHandle);
4453 Ops.push_back(N->getOperand(2));
4454 Ops.push_back(N->getOperand(3));
4455 Ops.push_back(N->getOperand(4));
4456 Ops.push_back(Chain);
4458 case NVPTXISD::Suld3DV4I32Trap:
4459 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4460 Ops.push_back(TexHandle);
4461 Ops.push_back(N->getOperand(2));
4462 Ops.push_back(N->getOperand(3));
4463 Ops.push_back(N->getOperand(4));
4464 Ops.push_back(Chain);
4466 case NVPTXISD::Suld1DI8Zero:
4467 Opc = NVPTX::SULD_1D_I8_ZERO;
4468 Ops.push_back(TexHandle);
4469 Ops.push_back(N->getOperand(2));
4470 Ops.push_back(Chain);
4472 case NVPTXISD::Suld1DI16Zero:
4473 Opc = NVPTX::SULD_1D_I16_ZERO;
4474 Ops.push_back(TexHandle);
4475 Ops.push_back(N->getOperand(2));
4476 Ops.push_back(Chain);
4478 case NVPTXISD::Suld1DI32Zero:
4479 Opc = NVPTX::SULD_1D_I32_ZERO;
4480 Ops.push_back(TexHandle);
4481 Ops.push_back(N->getOperand(2));
4482 Ops.push_back(Chain);
4484 case NVPTXISD::Suld1DI64Zero:
4485 Opc = NVPTX::SULD_1D_I64_ZERO;
4486 Ops.push_back(TexHandle);
4487 Ops.push_back(N->getOperand(2));
4488 Ops.push_back(Chain);
4490 case NVPTXISD::Suld1DV2I8Zero:
4491 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4492 Ops.push_back(TexHandle);
4493 Ops.push_back(N->getOperand(2));
4494 Ops.push_back(Chain);
4496 case NVPTXISD::Suld1DV2I16Zero:
4497 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4498 Ops.push_back(TexHandle);
4499 Ops.push_back(N->getOperand(2));
4500 Ops.push_back(Chain);
4502 case NVPTXISD::Suld1DV2I32Zero:
4503 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4504 Ops.push_back(TexHandle);
4505 Ops.push_back(N->getOperand(2));
4506 Ops.push_back(Chain);
4508 case NVPTXISD::Suld1DV2I64Zero:
4509 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4510 Ops.push_back(TexHandle);
4511 Ops.push_back(N->getOperand(2));
4512 Ops.push_back(Chain);
4514 case NVPTXISD::Suld1DV4I8Zero:
4515 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4516 Ops.push_back(TexHandle);
4517 Ops.push_back(N->getOperand(2));
4518 Ops.push_back(Chain);
4520 case NVPTXISD::Suld1DV4I16Zero:
4521 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4522 Ops.push_back(TexHandle);
4523 Ops.push_back(N->getOperand(2));
4524 Ops.push_back(Chain);
4526 case NVPTXISD::Suld1DV4I32Zero:
4527 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4528 Ops.push_back(TexHandle);
4529 Ops.push_back(N->getOperand(2));
4530 Ops.push_back(Chain);
4532 case NVPTXISD::Suld1DArrayI8Zero:
4533 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4534 Ops.push_back(TexHandle);
4535 Ops.push_back(N->getOperand(2));
4536 Ops.push_back(N->getOperand(3));
4537 Ops.push_back(Chain);
4539 case NVPTXISD::Suld1DArrayI16Zero:
4540 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4541 Ops.push_back(TexHandle);
4542 Ops.push_back(N->getOperand(2));
4543 Ops.push_back(N->getOperand(3));
4544 Ops.push_back(Chain);
4546 case NVPTXISD::Suld1DArrayI32Zero:
4547 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4548 Ops.push_back(TexHandle);
4549 Ops.push_back(N->getOperand(2));
4550 Ops.push_back(N->getOperand(3));
4551 Ops.push_back(Chain);
4553 case NVPTXISD::Suld1DArrayI64Zero:
4554 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4555 Ops.push_back(TexHandle);
4556 Ops.push_back(N->getOperand(2));
4557 Ops.push_back(N->getOperand(3));
4558 Ops.push_back(Chain);
4560 case NVPTXISD::Suld1DArrayV2I8Zero:
4561 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4562 Ops.push_back(TexHandle);
4563 Ops.push_back(N->getOperand(2));
4564 Ops.push_back(N->getOperand(3));
4565 Ops.push_back(Chain);
4567 case NVPTXISD::Suld1DArrayV2I16Zero:
4568 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4569 Ops.push_back(TexHandle);
4570 Ops.push_back(N->getOperand(2));
4571 Ops.push_back(N->getOperand(3));
4572 Ops.push_back(Chain);
4574 case NVPTXISD::Suld1DArrayV2I32Zero:
4575 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4576 Ops.push_back(TexHandle);
4577 Ops.push_back(N->getOperand(2));
4578 Ops.push_back(N->getOperand(3));
4579 Ops.push_back(Chain);
4581 case NVPTXISD::Suld1DArrayV2I64Zero:
4582 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4583 Ops.push_back(TexHandle);
4584 Ops.push_back(N->getOperand(2));
4585 Ops.push_back(N->getOperand(3));
4586 Ops.push_back(Chain);
4588 case NVPTXISD::Suld1DArrayV4I8Zero:
4589 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4590 Ops.push_back(TexHandle);
4591 Ops.push_back(N->getOperand(2));
4592 Ops.push_back(N->getOperand(3));
4593 Ops.push_back(Chain);
4595 case NVPTXISD::Suld1DArrayV4I16Zero:
4596 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4597 Ops.push_back(TexHandle);
4598 Ops.push_back(N->getOperand(2));
4599 Ops.push_back(N->getOperand(3));
4600 Ops.push_back(Chain);
4602 case NVPTXISD::Suld1DArrayV4I32Zero:
4603 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4604 Ops.push_back(TexHandle);
4605 Ops.push_back(N->getOperand(2));
4606 Ops.push_back(N->getOperand(3));
4607 Ops.push_back(Chain);
4609 case NVPTXISD::Suld2DI8Zero:
4610 Opc = NVPTX::SULD_2D_I8_ZERO;
4611 Ops.push_back(TexHandle);
4612 Ops.push_back(N->getOperand(2));
4613 Ops.push_back(N->getOperand(3));
4614 Ops.push_back(Chain);
4616 case NVPTXISD::Suld2DI16Zero:
4617 Opc = NVPTX::SULD_2D_I16_ZERO;
4618 Ops.push_back(TexHandle);
4619 Ops.push_back(N->getOperand(2));
4620 Ops.push_back(N->getOperand(3));
4621 Ops.push_back(Chain);
4623 case NVPTXISD::Suld2DI32Zero:
4624 Opc = NVPTX::SULD_2D_I32_ZERO;
4625 Ops.push_back(TexHandle);
4626 Ops.push_back(N->getOperand(2));
4627 Ops.push_back(N->getOperand(3));
4628 Ops.push_back(Chain);
4630 case NVPTXISD::Suld2DI64Zero:
4631 Opc = NVPTX::SULD_2D_I64_ZERO;
4632 Ops.push_back(TexHandle);
4633 Ops.push_back(N->getOperand(2));
4634 Ops.push_back(N->getOperand(3));
4635 Ops.push_back(Chain);
4637 case NVPTXISD::Suld2DV2I8Zero:
4638 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4639 Ops.push_back(TexHandle);
4640 Ops.push_back(N->getOperand(2));
4641 Ops.push_back(N->getOperand(3));
4642 Ops.push_back(Chain);
4644 case NVPTXISD::Suld2DV2I16Zero:
4645 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4646 Ops.push_back(TexHandle);
4647 Ops.push_back(N->getOperand(2));
4648 Ops.push_back(N->getOperand(3));
4649 Ops.push_back(Chain);
4651 case NVPTXISD::Suld2DV2I32Zero:
4652 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4653 Ops.push_back(TexHandle);
4654 Ops.push_back(N->getOperand(2));
4655 Ops.push_back(N->getOperand(3));
4656 Ops.push_back(Chain);
4658 case NVPTXISD::Suld2DV2I64Zero:
4659 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4660 Ops.push_back(TexHandle);
4661 Ops.push_back(N->getOperand(2));
4662 Ops.push_back(N->getOperand(3));
4663 Ops.push_back(Chain);
4665 case NVPTXISD::Suld2DV4I8Zero:
4666 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4667 Ops.push_back(TexHandle);
4668 Ops.push_back(N->getOperand(2));
4669 Ops.push_back(N->getOperand(3));
4670 Ops.push_back(Chain);
4672 case NVPTXISD::Suld2DV4I16Zero:
4673 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4674 Ops.push_back(TexHandle);
4675 Ops.push_back(N->getOperand(2));
4676 Ops.push_back(N->getOperand(3));
4677 Ops.push_back(Chain);
4679 case NVPTXISD::Suld2DV4I32Zero:
4680 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4681 Ops.push_back(TexHandle);
4682 Ops.push_back(N->getOperand(2));
4683 Ops.push_back(N->getOperand(3));
4684 Ops.push_back(Chain);
4686 case NVPTXISD::Suld2DArrayI8Zero:
4687 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4688 Ops.push_back(TexHandle);
4689 Ops.push_back(N->getOperand(2));
4690 Ops.push_back(N->getOperand(3));
4691 Ops.push_back(N->getOperand(4));
4692 Ops.push_back(Chain);
4694 case NVPTXISD::Suld2DArrayI16Zero:
4695 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4696 Ops.push_back(TexHandle);
4697 Ops.push_back(N->getOperand(2));
4698 Ops.push_back(N->getOperand(3));
4699 Ops.push_back(N->getOperand(4));
4700 Ops.push_back(Chain);
4702 case NVPTXISD::Suld2DArrayI32Zero:
4703 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4704 Ops.push_back(TexHandle);
4705 Ops.push_back(N->getOperand(2));
4706 Ops.push_back(N->getOperand(3));
4707 Ops.push_back(N->getOperand(4));
4708 Ops.push_back(Chain);
4710 case NVPTXISD::Suld2DArrayI64Zero:
4711 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4712 Ops.push_back(TexHandle);
4713 Ops.push_back(N->getOperand(2));
4714 Ops.push_back(N->getOperand(3));
4715 Ops.push_back(N->getOperand(4));
4716 Ops.push_back(Chain);
4718 case NVPTXISD::Suld2DArrayV2I8Zero:
4719 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4720 Ops.push_back(TexHandle);
4721 Ops.push_back(N->getOperand(2));
4722 Ops.push_back(N->getOperand(3));
4723 Ops.push_back(N->getOperand(4));
4724 Ops.push_back(Chain);
4726 case NVPTXISD::Suld2DArrayV2I16Zero:
4727 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4728 Ops.push_back(TexHandle);
4729 Ops.push_back(N->getOperand(2));
4730 Ops.push_back(N->getOperand(3));
4731 Ops.push_back(N->getOperand(4));
4732 Ops.push_back(Chain);
4734 case NVPTXISD::Suld2DArrayV2I32Zero:
4735 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4736 Ops.push_back(TexHandle);
4737 Ops.push_back(N->getOperand(2));
4738 Ops.push_back(N->getOperand(3));
4739 Ops.push_back(N->getOperand(4));
4740 Ops.push_back(Chain);
4742 case NVPTXISD::Suld2DArrayV2I64Zero:
4743 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4744 Ops.push_back(TexHandle);
4745 Ops.push_back(N->getOperand(2));
4746 Ops.push_back(N->getOperand(3));
4747 Ops.push_back(N->getOperand(4));
4748 Ops.push_back(Chain);
4750 case NVPTXISD::Suld2DArrayV4I8Zero:
4751 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4752 Ops.push_back(TexHandle);
4753 Ops.push_back(N->getOperand(2));
4754 Ops.push_back(N->getOperand(3));
4755 Ops.push_back(N->getOperand(4));
4756 Ops.push_back(Chain);
4758 case NVPTXISD::Suld2DArrayV4I16Zero:
4759 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4760 Ops.push_back(TexHandle);
4761 Ops.push_back(N->getOperand(2));
4762 Ops.push_back(N->getOperand(3));
4763 Ops.push_back(N->getOperand(4));
4764 Ops.push_back(Chain);
4766 case NVPTXISD::Suld2DArrayV4I32Zero:
4767 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4768 Ops.push_back(TexHandle);
4769 Ops.push_back(N->getOperand(2));
4770 Ops.push_back(N->getOperand(3));
4771 Ops.push_back(N->getOperand(4));
4772 Ops.push_back(Chain);
4774 case NVPTXISD::Suld3DI8Zero:
4775 Opc = NVPTX::SULD_3D_I8_ZERO;
4776 Ops.push_back(TexHandle);
4777 Ops.push_back(N->getOperand(2));
4778 Ops.push_back(N->getOperand(3));
4779 Ops.push_back(N->getOperand(4));
4780 Ops.push_back(Chain);
4782 case NVPTXISD::Suld3DI16Zero:
4783 Opc = NVPTX::SULD_3D_I16_ZERO;
4784 Ops.push_back(TexHandle);
4785 Ops.push_back(N->getOperand(2));
4786 Ops.push_back(N->getOperand(3));
4787 Ops.push_back(N->getOperand(4));
4788 Ops.push_back(Chain);
4790 case NVPTXISD::Suld3DI32Zero:
4791 Opc = NVPTX::SULD_3D_I32_ZERO;
4792 Ops.push_back(TexHandle);
4793 Ops.push_back(N->getOperand(2));
4794 Ops.push_back(N->getOperand(3));
4795 Ops.push_back(N->getOperand(4));
4796 Ops.push_back(Chain);
4798 case NVPTXISD::Suld3DI64Zero:
4799 Opc = NVPTX::SULD_3D_I64_ZERO;
4800 Ops.push_back(TexHandle);
4801 Ops.push_back(N->getOperand(2));
4802 Ops.push_back(N->getOperand(3));
4803 Ops.push_back(N->getOperand(4));
4804 Ops.push_back(Chain);
4806 case NVPTXISD::Suld3DV2I8Zero:
4807 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4808 Ops.push_back(TexHandle);
4809 Ops.push_back(N->getOperand(2));
4810 Ops.push_back(N->getOperand(3));
4811 Ops.push_back(N->getOperand(4));
4812 Ops.push_back(Chain);
4814 case NVPTXISD::Suld3DV2I16Zero:
4815 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4816 Ops.push_back(TexHandle);
4817 Ops.push_back(N->getOperand(2));
4818 Ops.push_back(N->getOperand(3));
4819 Ops.push_back(N->getOperand(4));
4820 Ops.push_back(Chain);
4822 case NVPTXISD::Suld3DV2I32Zero:
4823 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4824 Ops.push_back(TexHandle);
4825 Ops.push_back(N->getOperand(2));
4826 Ops.push_back(N->getOperand(3));
4827 Ops.push_back(N->getOperand(4));
4828 Ops.push_back(Chain);
4830 case NVPTXISD::Suld3DV2I64Zero:
4831 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4832 Ops.push_back(TexHandle);
4833 Ops.push_back(N->getOperand(2));
4834 Ops.push_back(N->getOperand(3));
4835 Ops.push_back(N->getOperand(4));
4836 Ops.push_back(Chain);
4838 case NVPTXISD::Suld3DV4I8Zero:
4839 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4840 Ops.push_back(TexHandle);
4841 Ops.push_back(N->getOperand(2));
4842 Ops.push_back(N->getOperand(3));
4843 Ops.push_back(N->getOperand(4));
4844 Ops.push_back(Chain);
4846 case NVPTXISD::Suld3DV4I16Zero:
4847 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4848 Ops.push_back(TexHandle);
4849 Ops.push_back(N->getOperand(2));
4850 Ops.push_back(N->getOperand(3));
4851 Ops.push_back(N->getOperand(4));
4852 Ops.push_back(Chain);
4854 case NVPTXISD::Suld3DV4I32Zero:
4855 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4856 Ops.push_back(TexHandle);
4857 Ops.push_back(N->getOperand(2));
4858 Ops.push_back(N->getOperand(3));
4859 Ops.push_back(N->getOperand(4));
4860 Ops.push_back(Chain);
4863 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
4868 /// SelectBFE - Look for instruction sequences that can be made more efficient
4869 /// by using the 'bfe' (bit-field extract) PTX instruction
4870 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4872 SDValue LHS = N->getOperand(0);
4873 SDValue RHS = N->getOperand(1);
4877 bool IsSigned = false;
4879 if (N->getOpcode() == ISD::AND) {
4880 // Canonicalize the operands
4881 // We want 'and %val, %mask'
4882 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4883 std::swap(LHS, RHS);
4886 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4888 // We need a constant mask on the RHS of the AND
4892 // Extract the mask bits
4893 uint64_t MaskVal = Mask->getZExtValue();
4894 if (!isMask_64(MaskVal)) {
4895 // We *could* handle shifted masks here, but doing so would require an
4896 // 'and' operation to fix up the low-order bits so we would trade
4897 // shr+and for bfe+and, which has the same throughput
4901 // How many bits are in our mask?
4902 uint64_t NumBits = countTrailingOnes(MaskVal);
4903 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4905 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4906 // We have a 'srl/and' pair, extract the effective start bit and length
4907 Val = LHS.getNode()->getOperand(0);
4908 Start = LHS.getNode()->getOperand(1);
4909 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4911 uint64_t StartVal = StartConst->getZExtValue();
4912 // How many "good" bits do we have left? "good" is defined here as bits
4913 // that exist in the original value, not shifted in.
4914 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
4915 if (NumBits > GoodBits) {
4916 // Do not handle the case where bits have been shifted in. In theory
4917 // we could handle this, but the cost is likely higher than just
4918 // emitting the srl/and pair.
4921 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4923 // Do not handle the case where the shift amount (can be zero if no srl
4924 // was found) is not constant. We could handle this case, but it would
4925 // require run-time logic that would be more expensive than just
4926 // emitting the srl/and pair.
4930 // Do not handle the case where the LHS of the and is not a shift. While
4931 // it would be trivial to handle this case, it would just transform
4932 // 'and' -> 'bfe', but 'and' has higher-throughput.
4935 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4936 if (LHS->getOpcode() == ISD::AND) {
4937 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4939 // Shift amount must be constant
4943 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4945 SDValue AndLHS = LHS->getOperand(0);
4946 SDValue AndRHS = LHS->getOperand(1);
4948 // Canonicalize the AND to have the mask on the RHS
4949 if (isa<ConstantSDNode>(AndLHS)) {
4950 std::swap(AndLHS, AndRHS);
4953 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4955 // Mask must be constant
4959 uint64_t MaskVal = MaskCnst->getZExtValue();
4962 if (isMask_64(MaskVal)) {
4964 // The number of bits in the result bitfield will be the number of
4965 // trailing ones (the AND) minus the number of bits we shift off
4966 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4967 } else if (isShiftedMask_64(MaskVal)) {
4968 NumZeros = countTrailingZeros(MaskVal);
4969 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4970 // The number of bits in the result bitfield will be the number of
4971 // trailing zeros plus the number of set bits in the mask minus the
4972 // number of bits we shift off
4973 NumBits = NumZeros + NumOnes - ShiftAmt;
4975 // This is not a mask we can handle
4979 if (ShiftAmt < NumZeros) {
4980 // Handling this case would require extra logic that would make this
4981 // transformation non-profitable
4986 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4987 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4988 } else if (LHS->getOpcode() == ISD::SHL) {
4989 // Here, we have a pattern like:
4991 // (sra (shl val, NN), MM)
4993 // (srl (shl val, NN), MM)
4995 // If MM >= NN, we can efficiently optimize this with bfe
4996 Val = LHS->getOperand(0);
4998 SDValue ShlRHS = LHS->getOperand(1);
4999 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
5001 // Shift amount must be constant
5004 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
5006 SDValue ShrRHS = RHS;
5007 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
5009 // Shift amount must be constant
5012 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5014 // To avoid extra codegen and be profitable, we need Outer >= Inner
5015 if (OuterShiftAmt < InnerShiftAmt) {
5019 // If the outer shift is more than the type size, we have no bitfield to
5020 // extract (since we also check that the inner shift is <= the outer shift
5021 // then this also implies that the inner shift is < the type size)
5022 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
5026 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
5028 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
5031 if (N->getOpcode() == ISD::SRA) {
5032 // If we have a arithmetic right shift, we need to use the signed bfe
5047 // For the BFE operations we form here from "and" and "srl", always use the
5048 // unsigned variants.
5049 if (Val.getValueType() == MVT::i32) {
5051 Opc = NVPTX::BFE_S32rii;
5053 Opc = NVPTX::BFE_U32rii;
5055 } else if (Val.getValueType() == MVT::i64) {
5057 Opc = NVPTX::BFE_S64rii;
5059 Opc = NVPTX::BFE_U64rii;
5062 // We cannot handle this type
5070 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5074 // SelectDirectAddr - Match a direct address for DAG.
5075 // A direct address could be a globaladdress or externalsymbol.
5076 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5077 // Return true if TGA or ES.
5078 if (N.getOpcode() == ISD::TargetGlobalAddress ||
5079 N.getOpcode() == ISD::TargetExternalSymbol) {
5083 if (N.getOpcode() == NVPTXISD::Wrapper) {
5084 Address = N.getOperand(0);
5087 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
5088 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
5089 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
5090 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
5091 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
5092 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
5098 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5099 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5100 if (Addr.getOpcode() == ISD::ADD) {
5101 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5102 SDValue base = Addr.getOperand(0);
5103 if (SelectDirectAddr(base, Base)) {
5104 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5114 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5115 SDValue &Base, SDValue &Offset) {
5116 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5120 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5121 SDValue &Base, SDValue &Offset) {
5122 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5126 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5127 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5128 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5129 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5130 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5133 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5134 Addr.getOpcode() == ISD::TargetGlobalAddress)
5135 return false; // direct calls.
5137 if (Addr.getOpcode() == ISD::ADD) {
5138 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5141 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5142 if (FrameIndexSDNode *FIN =
5143 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5144 // Constant offset from frame ref.
5145 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5147 Base = Addr.getOperand(0);
5148 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5157 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5158 SDValue &Base, SDValue &Offset) {
5159 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5163 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5164 SDValue &Base, SDValue &Offset) {
5165 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5168 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5169 unsigned int spN) const {
5170 const Value *Src = nullptr;
5171 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5172 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5174 Src = mN->getMemOperand()->getValue();
5178 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5179 return (PT->getAddressSpace() == spN);
5183 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5184 /// inline asm expressions.
5185 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5186 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5188 switch (ConstraintID) {
5191 case InlineAsm::Constraint_m: // memory
5192 if (SelectDirectAddr(Op, Op0)) {
5193 OutOps.push_back(Op0);
5194 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5197 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5198 OutOps.push_back(Op0);
5199 OutOps.push_back(Op1);
5207 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5208 /// conversion from \p SrcTy to \p DestTy.
5209 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5211 switch (SrcTy.SimpleTy) {
5213 llvm_unreachable("Unhandled source type");
5215 switch (DestTy.SimpleTy) {
5217 llvm_unreachable("Unhandled dest type");
5219 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5221 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5223 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5226 switch (DestTy.SimpleTy) {
5228 llvm_unreachable("Unhandled dest type");
5230 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5232 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5234 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5237 switch (DestTy.SimpleTy) {
5239 llvm_unreachable("Unhandled dest type");
5241 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5243 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5245 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5248 switch (DestTy.SimpleTy) {
5250 llvm_unreachable("Unhandled dest type");
5252 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5254 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5256 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;