1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 def immFloat0 : PatLeaf<(fpimm), [{
11 float f = (float)N->getValueAPF().convertToFloat();
15 def immFloat1 : PatLeaf<(fpimm), [{
16 float f = (float)N->getValueAPF().convertToFloat();
20 def immDouble0 : PatLeaf<(fpimm), [{
21 double d = (double)N->getValueAPF().convertToDouble();
25 def immDouble1 : PatLeaf<(fpimm), [{
26 double d = (double)N->getValueAPF().convertToDouble();
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
38 [(int_nvvm_barrier0)]>;
39 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
41 !strconcat(".reg .pred \t%p1; \n\t",
42 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
43 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
44 !strconcat("}}", ""))))),
45 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
46 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
48 !strconcat(".reg .pred \t%p1; \n\t",
49 !strconcat(".reg .pred \t%p2; \n\t",
50 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
51 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
52 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
53 !strconcat("}}", ""))))))),
54 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
55 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
57 !strconcat(".reg .pred \t%p1; \n\t",
58 !strconcat(".reg .pred \t%p2; \n\t",
59 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
60 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
61 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
62 !strconcat("}}", ""))))))),
63 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
65 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
66 [(int_nvvm_bar_sync imm:$i)]>;
68 // shfl.{up,down,bfly,idx}.b32
69 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
70 // The last two parameters to shfl can be regs or imms. ptxas is smart
71 // enough to inline constant registers, so strictly speaking we don't need to
72 // handle immediates here. But it's easy enough, and it makes our ptx more
76 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
77 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
78 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
82 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
83 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
84 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
88 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
89 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
90 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
94 (ins regclass:$src, i32imm:$offset, i32imm:$mask),
95 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
96 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
99 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
100 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
101 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
102 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
103 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
104 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
105 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
106 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
108 } // isConvergent = 1
111 //-----------------------------------
112 // Explicit Memory Fence Functions
113 //-----------------------------------
114 class MEMBAR<string StrOp, Intrinsic IntOP> :
115 NVPTXInst<(outs), (ins),
118 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
119 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
120 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
123 //-----------------------------------
125 //-----------------------------------
127 // Map min(1.0, max(0.0, x)) to sat(x)
128 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
130 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
131 // Same story for fmax, fmin.
133 def : Pat<(int_nvvm_fmin_f immFloat1,
134 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
135 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
136 def : Pat<(int_nvvm_fmin_f immFloat1,
137 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
138 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
139 def : Pat<(int_nvvm_fmin_f
140 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
141 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
142 def : Pat<(int_nvvm_fmin_f
143 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
144 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
146 def : Pat<(int_nvvm_fmin_d immDouble1,
147 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
148 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
149 def : Pat<(int_nvvm_fmin_d immDouble1,
150 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
151 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
152 def : Pat<(int_nvvm_fmin_d
153 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
154 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
155 def : Pat<(int_nvvm_fmin_d
156 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
157 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
160 // We need a full string for OpcStr here because we need to deal with case like
162 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
163 NVPTXRegClass src_regclass, Intrinsic IntOP>
164 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
166 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
168 // We need a full string for OpcStr here because we need to deal with the case
169 // like INT_PTX_NATIVE_POWR_F.
170 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
171 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
172 : NVPTXInst<(outs t_regclass:$dst),
173 (ins s0_regclass:$src0, s1_regclass:$src1),
175 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
177 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
178 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
179 NVPTXRegClass s2_regclass, Intrinsic IntOP>
180 : NVPTXInst<(outs t_regclass:$dst),
181 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
183 [(set t_regclass:$dst,
184 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
190 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
192 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
195 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
197 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
200 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
201 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
207 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
208 Int32Regs, Int32Regs, int_nvvm_min_i>;
209 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
210 Int32Regs, Int32Regs, int_nvvm_min_ui>;
212 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
213 Int64Regs, Int64Regs, int_nvvm_min_ll>;
214 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
215 Int64Regs, Int64Regs, int_nvvm_min_ull>;
217 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
218 Int32Regs, Int32Regs, int_nvvm_max_i>;
219 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
220 Int32Regs, Int32Regs, int_nvvm_max_ui>;
222 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
223 Int64Regs, Int64Regs, int_nvvm_max_ll>;
224 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
225 Int64Regs, Int64Regs, int_nvvm_max_ull>;
227 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
228 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
229 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
230 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
232 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
233 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
234 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
235 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
237 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
238 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
239 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
240 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
246 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
247 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
248 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
249 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
251 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
252 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
253 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
254 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
256 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
258 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
260 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
262 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
264 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
266 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
268 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
270 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
271 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
273 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
275 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
277 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
279 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
280 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
282 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
283 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
284 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
285 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
291 def INT_NVVM_DIV_APPROX_FTZ_F
292 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
293 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
294 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
295 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
297 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
298 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
299 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
300 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
301 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
302 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
303 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
304 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
305 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
306 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
307 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
308 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
309 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
310 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
311 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
312 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
314 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
315 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
316 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
317 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
318 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
319 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
320 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
321 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
327 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
329 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
336 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
337 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
338 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
339 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
345 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
346 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
347 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
348 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
349 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
350 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
352 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
353 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
354 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
355 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
356 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
357 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
363 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
365 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
368 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
369 Float32Regs, int_nvvm_fabs_ftz_f>;
370 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
371 Float32Regs, int_nvvm_fabs_f>;
373 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
374 Float64Regs, int_nvvm_fabs_d>;
380 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
381 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
382 def : Pat<(int_nvvm_round_f Float32Regs:$a),
383 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
384 def : Pat<(int_nvvm_round_d Float64Regs:$a),
385 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
391 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
392 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
393 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
394 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
395 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
396 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
402 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
403 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
404 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
405 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
406 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
407 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
413 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
414 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
415 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
416 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
417 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
418 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
420 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
421 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
422 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
423 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
424 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
425 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
431 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
432 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
433 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
434 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
436 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
437 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
438 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
439 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
445 def INT_NVVM_FMA_RN_FTZ_F
446 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
447 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
448 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
449 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
450 def INT_NVVM_FMA_RZ_FTZ_F
451 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
452 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
453 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
454 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
455 def INT_NVVM_FMA_RM_FTZ_F
456 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
457 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
458 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
459 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
460 def INT_NVVM_FMA_RP_FTZ_F
461 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
462 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
463 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
464 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
466 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
467 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
468 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
469 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
470 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
471 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
472 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
473 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
479 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
480 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
481 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
482 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
483 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
484 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
485 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
486 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
487 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
488 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
489 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
490 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
491 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
492 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
493 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
494 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
496 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
497 Float64Regs, int_nvvm_rcp_rn_d>;
498 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
499 Float64Regs, int_nvvm_rcp_rz_d>;
500 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
501 Float64Regs, int_nvvm_rcp_rm_d>;
502 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
503 Float64Regs, int_nvvm_rcp_rp_d>;
505 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
506 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
512 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
513 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
514 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
515 Float32Regs, int_nvvm_sqrt_rn_f>;
516 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
517 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
518 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
519 Float32Regs, int_nvvm_sqrt_rz_f>;
520 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
521 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
522 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
523 Float32Regs, int_nvvm_sqrt_rm_f>;
524 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
525 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
526 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
527 Float32Regs, int_nvvm_sqrt_rp_f>;
528 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
529 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
530 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
531 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
533 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
534 Float64Regs, int_nvvm_sqrt_rn_d>;
535 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
536 Float64Regs, int_nvvm_sqrt_rz_d>;
537 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
538 Float64Regs, int_nvvm_sqrt_rm_d>;
539 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
540 Float64Regs, int_nvvm_sqrt_rp_d>;
542 // nvvm_sqrt intrinsic
543 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
544 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
545 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
546 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
547 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
548 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
549 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
550 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
556 def INT_NVVM_RSQRT_APPROX_FTZ_F
557 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
558 int_nvvm_rsqrt_approx_ftz_f>;
559 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
560 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
561 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
562 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
568 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
569 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
570 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
571 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
572 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
573 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
574 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
575 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
576 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
577 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
578 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
579 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
580 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
581 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
582 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
583 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
585 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
586 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
587 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
588 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
589 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
590 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
591 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
592 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
598 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
599 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
600 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
601 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
602 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
603 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
604 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
605 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
606 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
607 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
608 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
609 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
610 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
611 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
612 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
613 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
615 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
616 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
617 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
618 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
619 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
620 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
621 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
622 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
624 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
625 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
626 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
627 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
628 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
629 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
630 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
631 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
633 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
634 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
635 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
636 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
637 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
638 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
639 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
640 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
642 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
643 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
644 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
645 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
646 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
647 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
648 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
649 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
651 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
652 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
653 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
654 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
655 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
656 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
657 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
658 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
659 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
660 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
661 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
662 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
663 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
664 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
665 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
666 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
668 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
669 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
670 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
671 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
672 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
673 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
674 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
675 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
676 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
677 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
678 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
679 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
680 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
681 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
682 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
683 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
685 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
686 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
687 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
688 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
689 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
690 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
691 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
692 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
694 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
695 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
696 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
697 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
698 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
699 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
700 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
701 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
703 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
704 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
706 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
707 !strconcat(".reg .b32 %temp; \n\t",
708 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
710 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
711 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
712 !strconcat(".reg .b32 %temp; \n\t",
713 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
715 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
717 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
718 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
719 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
720 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
721 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
722 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
723 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
724 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
725 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
726 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
727 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
728 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
729 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
730 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
731 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
732 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
734 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
735 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
736 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
737 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
738 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
739 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
740 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
741 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
742 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
743 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
744 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
745 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
746 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
747 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
748 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
749 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
751 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
752 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
753 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
754 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
755 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
756 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
757 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
758 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
760 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
761 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
762 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
763 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
764 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
765 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
766 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
767 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
769 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
770 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
771 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
772 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
773 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
774 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
775 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
776 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
778 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
779 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
780 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
781 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
782 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
783 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
784 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
785 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
787 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
788 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
789 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
790 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
791 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
792 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
793 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
794 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
796 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
797 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
798 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
799 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
800 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
801 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
802 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
803 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
806 // FIXME: Ideally, we could use these patterns instead of the scope-creating
807 // patterns, but ptxas does not like these since .s16 is not compatible with
808 // .f16. The solution is to use .bXX for all integer register types, but we
809 // are not there yet.
810 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
811 // (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
812 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
813 // (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
815 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
816 // (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
818 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
819 !strconcat(".reg .b16 %temp;\n\t",
820 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
821 !strconcat("mov.b16 \t$dst, %temp;\n",
823 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
824 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
825 !strconcat(".reg .b16 %temp;\n\t",
826 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
827 !strconcat("mov.b16 \t$dst, %temp;\n",
829 Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
831 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
832 !strconcat(".reg .b16 %temp;\n\t",
833 !strconcat("mov.b16 \t%temp, $src0;\n\t",
834 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
836 Float32Regs, Int16Regs, int_nvvm_h2f>;
838 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
839 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
840 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
841 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
842 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
843 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
845 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
846 (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
847 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
848 (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
854 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
855 Float32Regs, int_nvvm_bitcast_f2i>;
856 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
857 Int32Regs, int_nvvm_bitcast_i2f>;
859 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
860 Int64Regs, int_nvvm_bitcast_ll2d>;
861 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
862 Float64Regs, int_nvvm_bitcast_d2ll>;
864 //-----------------------------------
866 //-----------------------------------
868 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
869 : PatFrag<ops, frag, [{
870 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
872 class ATOMIC_SHARED_CHK <dag ops, dag frag>
873 : PatFrag<ops, frag, [{
874 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
876 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
877 : PatFrag<ops, frag, [{
878 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
881 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
882 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
883 Operand IMMType, SDNode IMM, Predicate Pred> {
884 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
889 !strconcat(" \t$dst, [$addr], $b;", ""))))),
890 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
892 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
897 !strconcat(" \t$dst, [$addr], $b;", ""))))),
898 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
901 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
902 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
903 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
904 IntOp, IMMType, IMM, Pred>;
905 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
906 IntOp, IMMType, IMM, Pred>;
909 // has 2 operands, neg the second one
910 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
911 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
912 Operand IMMType, Predicate Pred> {
913 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
914 !strconcat("{{ \n\t",
915 !strconcat(".reg \t.s",
917 !strconcat(" temp; \n\t",
920 !strconcat(" \ttemp, $b; \n\t",
926 !strconcat(" \t$dst, [$addr], temp; \n\t",
927 !strconcat("}}", "")))))))))))))),
928 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
931 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
932 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
934 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
935 IntOp, IMMType, Pred> ;
936 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
937 IntOp, IMMType, Pred> ;
941 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
942 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
943 Operand IMMType, Predicate Pred> {
944 def reg : NVPTXInst<(outs regclass:$dst),
945 (ins ptrclass:$addr, regclass:$b, regclass:$c),
950 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
952 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
954 def imm1 : NVPTXInst<(outs regclass:$dst),
955 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
960 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
961 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
963 def imm2 : NVPTXInst<(outs regclass:$dst),
964 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
969 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
970 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
972 def imm3 : NVPTXInst<(outs regclass:$dst),
973 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
978 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
979 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
982 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
983 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
984 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
985 IntOp, IMMType, Pred>;
986 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
987 IntOp, IMMType, Pred>;
992 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
993 (atomic_load_add_32 node:$a, node:$b)>;
994 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
995 (atomic_load_add_32 node:$a, node:$b)>;
996 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
997 (atomic_load_add_32 node:$a, node:$b)>;
998 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
999 (atomic_load_add_64 node:$a, node:$b)>;
1000 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1001 (atomic_load_add_64 node:$a, node:$b)>;
1002 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1003 (atomic_load_add_64 node:$a, node:$b)>;
1004 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1005 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1006 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1007 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1008 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1009 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1011 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1012 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
1013 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1014 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
1015 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1016 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
1017 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1018 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1020 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1021 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
1022 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1023 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
1024 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1025 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
1026 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1027 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1029 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1030 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
1031 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1032 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
1033 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1034 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
1038 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1039 (atomic_load_sub_32 node:$a, node:$b)>;
1040 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1041 (atomic_load_sub_32 node:$a, node:$b)>;
1042 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1043 (atomic_load_sub_32 node:$a, node:$b)>;
1044 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1045 (atomic_load_sub_64 node:$a, node:$b)>;
1046 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1047 (atomic_load_sub_64 node:$a, node:$b)>;
1048 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1049 (atomic_load_sub_64 node:$a, node:$b)>;
1051 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1052 atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1053 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1054 atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1055 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1056 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1057 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1058 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1059 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1060 atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1061 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1062 atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1063 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1064 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1065 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1066 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1070 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1071 (atomic_swap_32 node:$a, node:$b)>;
1072 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1073 (atomic_swap_32 node:$a, node:$b)>;
1074 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1075 (atomic_swap_32 node:$a, node:$b)>;
1076 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1077 (atomic_swap_64 node:$a, node:$b)>;
1078 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1079 (atomic_swap_64 node:$a, node:$b)>;
1080 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1081 (atomic_swap_64 node:$a, node:$b)>;
1083 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1084 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1085 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1086 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1087 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1088 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1089 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1090 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1091 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1092 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1093 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1094 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1095 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1096 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1097 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1098 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1102 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1103 , (atomic_load_max_32 node:$a, node:$b)>;
1104 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1105 (atomic_load_max_32 node:$a, node:$b)>;
1106 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1107 (atomic_load_max_32 node:$a, node:$b)>;
1108 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1109 , (atomic_load_max_64 node:$a, node:$b)>;
1110 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1111 (atomic_load_max_64 node:$a, node:$b)>;
1112 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1113 (atomic_load_max_64 node:$a, node:$b)>;
1114 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1115 (atomic_load_umax_32 node:$a, node:$b)>;
1116 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1117 (atomic_load_umax_32 node:$a, node:$b)>;
1118 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1119 (atomic_load_umax_32 node:$a, node:$b)>;
1120 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1121 (atomic_load_umax_64 node:$a, node:$b)>;
1122 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1123 (atomic_load_umax_64 node:$a, node:$b)>;
1124 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1125 (atomic_load_umax_64 node:$a, node:$b)>;
1127 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1128 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1129 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1130 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1131 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1132 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1133 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1134 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1135 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1136 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1137 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1138 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1139 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1140 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1141 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1142 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1143 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1144 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1145 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1146 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1147 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1148 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1149 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1150 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1151 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1152 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1153 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1154 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1155 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1156 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1157 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1158 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1162 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1163 (atomic_load_min_32 node:$a, node:$b)>;
1164 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1165 (atomic_load_min_32 node:$a, node:$b)>;
1166 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1167 (atomic_load_min_32 node:$a, node:$b)>;
1168 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1169 (atomic_load_min_64 node:$a, node:$b)>;
1170 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1171 (atomic_load_min_64 node:$a, node:$b)>;
1172 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1173 (atomic_load_min_64 node:$a, node:$b)>;
1174 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1175 (atomic_load_umin_32 node:$a, node:$b)>;
1176 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1177 (atomic_load_umin_32 node:$a, node:$b)>;
1178 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1179 (atomic_load_umin_32 node:$a, node:$b)>;
1180 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1181 (atomic_load_umin_64 node:$a, node:$b)>;
1182 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1183 (atomic_load_umin_64 node:$a, node:$b)>;
1184 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1185 (atomic_load_umin_64 node:$a, node:$b)>;
1187 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1188 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1189 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1190 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1191 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1192 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1193 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1194 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1195 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1196 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1197 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1198 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1199 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1200 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1201 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1202 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1203 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1204 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1205 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1206 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1207 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1208 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1209 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1210 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1211 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1212 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1213 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1214 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1215 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1216 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1217 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1218 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1220 // atom_inc atom_dec
1222 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1223 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1224 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1225 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1226 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1227 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1228 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1229 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1230 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1231 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1232 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1233 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1235 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1236 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1237 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1238 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1239 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1240 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1241 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1242 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1243 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1244 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1245 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1246 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1247 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1248 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1249 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1250 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1254 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255 (atomic_load_and_32 node:$a, node:$b)>;
1256 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257 (atomic_load_and_32 node:$a, node:$b)>;
1258 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259 (atomic_load_and_32 node:$a, node:$b)>;
1260 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261 (atomic_load_and_64 node:$a, node:$b)>;
1262 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263 (atomic_load_and_64 node:$a, node:$b)>;
1264 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265 (atomic_load_and_64 node:$a, node:$b)>;
1267 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1268 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1269 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1270 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1271 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1272 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1273 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1274 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1275 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1276 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1277 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1278 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1279 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1280 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1281 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1282 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1286 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1287 (atomic_load_or_32 node:$a, node:$b)>;
1288 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1289 (atomic_load_or_32 node:$a, node:$b)>;
1290 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1291 (atomic_load_or_32 node:$a, node:$b)>;
1292 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1293 (atomic_load_or_64 node:$a, node:$b)>;
1294 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1295 (atomic_load_or_64 node:$a, node:$b)>;
1296 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1297 (atomic_load_or_64 node:$a, node:$b)>;
1299 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1300 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1301 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1302 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1303 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1304 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1305 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1306 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1307 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1308 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1309 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1310 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1311 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1312 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1313 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1314 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1318 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1319 (atomic_load_xor_32 node:$a, node:$b)>;
1320 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1321 (atomic_load_xor_32 node:$a, node:$b)>;
1322 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1323 (atomic_load_xor_32 node:$a, node:$b)>;
1324 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1325 (atomic_load_xor_64 node:$a, node:$b)>;
1326 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1327 (atomic_load_xor_64 node:$a, node:$b)>;
1328 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1329 (atomic_load_xor_64 node:$a, node:$b)>;
1331 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1332 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1333 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1334 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1335 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1336 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1337 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1338 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1339 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1340 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1341 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1342 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1343 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1344 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1345 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1346 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1350 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1351 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1352 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1353 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1354 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1355 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1356 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1357 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1358 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1359 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1360 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1361 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1363 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1364 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1365 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1366 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1367 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1368 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1369 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1370 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1371 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1372 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1373 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1374 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1375 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1376 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1377 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1378 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1380 // Support for scoped atomic operations. Matches
1381 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1382 // and converts it into the appropriate instruction.
1383 // NOTE: not all possible combinations are implemented
1384 // 'space' is limited to generic as it's the only one needed to support CUDA.
1385 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1386 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1387 dag ins, dag Operands>
1388 : NVPTXInst<(outs regclass:$result), ins,
1390 [(set regclass:$result, Operands)]>,
1393 // Define instruction variants for all addressing modes.
1394 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1395 NVPTXRegClass regclass, Operand ImmType,
1396 SDNode Imm, ValueType ImmTy,
1397 list<Predicate> Preds> {
1398 let AddedComplexity = 1 in {
1399 def : ATOM23_impl<AsmStr, regclass, Preds,
1400 (ins Int32Regs:$src, regclass:$b),
1401 (Intr Int32Regs:$src, regclass:$b)>;
1402 def : ATOM23_impl<AsmStr, regclass, Preds,
1403 (ins Int64Regs:$src, regclass:$b),
1404 (Intr Int64Regs:$src, regclass:$b)>;
1406 // tablegen can't infer argument types from Intrinsic (though it can
1407 // from Instruction) so we have to enforce specific type on
1408 // immediates via explicit cast to ImmTy.
1409 def : ATOM23_impl<AsmStr, regclass, Preds,
1410 (ins Int32Regs:$src, ImmType:$b),
1411 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1412 def : ATOM23_impl<AsmStr, regclass, Preds,
1413 (ins Int64Regs:$src, ImmType:$b),
1414 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1417 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1418 NVPTXRegClass regclass, Operand ImmType,
1419 SDNode Imm, ValueType ImmTy,
1420 list<Predicate> Preds> {
1421 // Variants for register/immediate permutations of $b and $c
1422 let AddedComplexity = 2 in {
1423 def : ATOM23_impl<AsmStr, regclass, Preds,
1424 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1425 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1426 def : ATOM23_impl<AsmStr, regclass, Preds,
1427 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1428 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1430 let AddedComplexity = 1 in {
1431 def : ATOM23_impl<AsmStr, regclass, Preds,
1432 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1433 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1434 def : ATOM23_impl<AsmStr, regclass, Preds,
1435 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1436 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1437 def : ATOM23_impl<AsmStr, regclass, Preds,
1438 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1439 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1440 def : ATOM23_impl<AsmStr, regclass, Preds,
1441 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1442 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1444 def : ATOM23_impl<AsmStr, regclass, Preds,
1445 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1446 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1447 def : ATOM23_impl<AsmStr, regclass, Preds,
1448 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1449 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1452 // Constructs instrinsic name and instruction asm strings.
1453 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1454 string ScopeStr, string SpaceStr,
1455 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1456 ValueType ImmTy, list<Predicate> Preds> {
1457 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1458 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1459 # "." # OpStr # "." # TypeStr
1460 # " \t$result, [$src], $b;",
1462 "int_nvvm_atomic_" # OpStr
1463 # "_" # SpaceStr # "_" # IntTypeStr
1464 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1465 regclass, ImmType, Imm, ImmTy, Preds>;
1467 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1468 string ScopeStr, string SpaceStr,
1469 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1470 ValueType ImmTy, list<Predicate> Preds> {
1471 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1472 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1473 # "." # OpStr # "." # TypeStr
1474 # " \t$result, [$src], $b, $c;",
1476 "int_nvvm_atomic_" # OpStr
1477 # "_" # SpaceStr # "_" # IntTypeStr
1478 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1479 regclass, ImmType, Imm, ImmTy, Preds>;
1482 // Constructs variants for different address spaces.
1483 // For now we only need variants for generic space pointers.
1484 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1485 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1486 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1487 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1488 regclass, ImmType, Imm, ImmTy, Preds>;
1490 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1491 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1492 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1493 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1494 regclass, ImmType, Imm, ImmTy, Preds>;
1497 // Constructs variants for different scopes of atomic op.
1498 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1499 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1500 ValueType ImmTy, list<Predicate> Preds> {
1501 // .gpu scope is default and is currently covered by existing
1502 // atomics w/o explicitly specified scope.
1503 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1504 regclass, ImmType, Imm, ImmTy,
1505 !listconcat(Preds,[hasAtomScope])>;
1506 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1507 regclass, ImmType, Imm, ImmTy,
1508 !listconcat(Preds,[hasAtomScope])>;
1510 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1511 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1512 list<Predicate> Preds> {
1513 // No need to define ".gpu"-scoped atomics. They do the same thing
1514 // as the regular, non-scoped atomics defined elsewhere.
1515 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1516 regclass, ImmType, Imm, ImmTy,
1517 !listconcat(Preds,[hasAtomScope])>;
1518 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1519 regclass, ImmType, Imm, ImmTy,
1520 !listconcat(Preds,[hasAtomScope])>;
1524 multiclass ATOM2_add_impl<string OpStr> {
1525 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1526 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1527 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1528 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1530 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1534 // atom.{and,or,xor}
1535 multiclass ATOM2_bitwise_impl<string OpStr> {
1536 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1537 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1538 [hasAtomBitwise64]>;
1542 multiclass ATOM2_exch_impl<string OpStr> {
1543 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1544 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1548 multiclass ATOM2_minmax_impl<string OpStr> {
1549 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1550 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1551 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1553 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1558 multiclass ATOM2_incdec_impl<string OpStr> {
1559 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1563 multiclass ATOM3_cas_impl<string OpStr> {
1564 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1565 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1568 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1569 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1570 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1571 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1572 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1573 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1574 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1575 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1576 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1577 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1579 //-----------------------------------
1580 // Support for ldu on sm_20 or later
1581 //-----------------------------------
1583 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1584 // read-only in a kernel.
1588 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1589 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1590 !strconcat("ldu.global.", TyStr),
1591 []>, Requires<[hasLDU]>;
1592 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1593 !strconcat("ldu.global.", TyStr),
1594 []>, Requires<[hasLDU]>;
1595 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1596 !strconcat("ldu.global.", TyStr),
1597 []>, Requires<[hasLDU]>;
1598 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1599 !strconcat("ldu.global.", TyStr),
1600 []>, Requires<[hasLDU]>;
1601 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1602 !strconcat("ldu.global.", TyStr),
1603 []>, Requires<[hasLDU]>;
1606 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1607 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1608 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1609 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1610 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1611 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1612 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1613 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1617 // Elementized vector ldu
1618 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1619 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1620 (ins Int32Regs:$src),
1621 !strconcat("ldu.global.", TyStr), []>;
1622 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1623 (ins Int64Regs:$src),
1624 !strconcat("ldu.global.", TyStr), []>;
1625 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1627 !strconcat("ldu.global.", TyStr), []>;
1628 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1630 !strconcat("ldu.global.", TyStr), []>;
1631 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1633 !strconcat("ldu.global.", TyStr), []>;
1636 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1637 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1638 regclass:$dst4), (ins Int32Regs:$src),
1639 !strconcat("ldu.global.", TyStr), []>;
1640 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1641 regclass:$dst4), (ins Int64Regs:$src),
1642 !strconcat("ldu.global.", TyStr), []>;
1643 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1644 regclass:$dst4), (ins MEMri:$src),
1645 !strconcat("ldu.global.", TyStr), []>;
1646 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1647 regclass:$dst4), (ins MEMri64:$src),
1648 !strconcat("ldu.global.", TyStr), []>;
1649 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1650 regclass:$dst4), (ins imemAny:$src),
1651 !strconcat("ldu.global.", TyStr), []>;
1654 defm INT_PTX_LDU_G_v2i8_ELE
1655 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1656 defm INT_PTX_LDU_G_v2i16_ELE
1657 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1658 defm INT_PTX_LDU_G_v2i32_ELE
1659 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1660 defm INT_PTX_LDU_G_v2f32_ELE
1661 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1662 defm INT_PTX_LDU_G_v2i64_ELE
1663 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1664 defm INT_PTX_LDU_G_v2f64_ELE
1665 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1666 defm INT_PTX_LDU_G_v4i8_ELE
1667 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1668 defm INT_PTX_LDU_G_v4i16_ELE
1669 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1671 defm INT_PTX_LDU_G_v4i32_ELE
1672 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1674 defm INT_PTX_LDU_G_v4f32_ELE
1675 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1679 //-----------------------------------
1680 // Support for ldg on sm_35 or later
1681 //-----------------------------------
1683 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1684 // non-coherent texture cache, and therefore the values read must be read-only
1685 // during the lifetime of the kernel.
1687 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1688 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1689 !strconcat("ld.global.nc.", TyStr),
1690 []>, Requires<[hasLDG]>;
1691 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1692 !strconcat("ld.global.nc.", TyStr),
1693 []>, Requires<[hasLDG]>;
1694 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1695 !strconcat("ld.global.nc.", TyStr),
1696 []>, Requires<[hasLDG]>;
1697 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1698 !strconcat("ld.global.nc.", TyStr),
1699 []>, Requires<[hasLDG]>;
1700 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1701 !strconcat("ld.global.nc.", TyStr),
1702 []>, Requires<[hasLDG]>;
1705 defm INT_PTX_LDG_GLOBAL_i8
1706 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1707 defm INT_PTX_LDG_GLOBAL_i16
1708 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1709 defm INT_PTX_LDG_GLOBAL_i32
1710 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1711 defm INT_PTX_LDG_GLOBAL_i64
1712 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1713 defm INT_PTX_LDG_GLOBAL_f32
1714 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1715 defm INT_PTX_LDG_GLOBAL_f64
1716 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1717 defm INT_PTX_LDG_GLOBAL_p32
1718 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1719 defm INT_PTX_LDG_GLOBAL_p64
1720 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1724 // Elementized vector ldg
1725 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1726 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1727 (ins Int32Regs:$src),
1728 !strconcat("ld.global.nc.", TyStr), []>;
1729 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1730 (ins Int64Regs:$src),
1731 !strconcat("ld.global.nc.", TyStr), []>;
1732 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1734 !strconcat("ld.global.nc.", TyStr), []>;
1735 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1737 !strconcat("ld.global.nc.", TyStr), []>;
1738 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1740 !strconcat("ld.global.nc.", TyStr), []>;
1743 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1744 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1745 regclass:$dst4), (ins Int32Regs:$src),
1746 !strconcat("ld.global.nc.", TyStr), []>;
1747 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1748 regclass:$dst4), (ins Int64Regs:$src),
1749 !strconcat("ld.global.nc.", TyStr), []>;
1750 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1751 regclass:$dst4), (ins MEMri:$src),
1752 !strconcat("ld.global.nc.", TyStr), []>;
1753 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1754 regclass:$dst4), (ins MEMri64:$src),
1755 !strconcat("ld.global.nc.", TyStr), []>;
1756 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1757 regclass:$dst4), (ins imemAny:$src),
1758 !strconcat("ld.global.nc.", TyStr), []>;
1761 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1762 defm INT_PTX_LDG_G_v2i8_ELE
1763 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1764 defm INT_PTX_LDG_G_v2i16_ELE
1765 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1766 defm INT_PTX_LDG_G_v2i32_ELE
1767 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1768 defm INT_PTX_LDG_G_v2f32_ELE
1769 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1770 defm INT_PTX_LDG_G_v2i64_ELE
1771 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1772 defm INT_PTX_LDG_G_v2f64_ELE
1773 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1774 defm INT_PTX_LDG_G_v4i8_ELE
1775 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1776 defm INT_PTX_LDG_G_v4i16_ELE
1777 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1778 defm INT_PTX_LDG_G_v4i32_ELE
1779 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1780 defm INT_PTX_LDG_G_v4f32_ELE
1781 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1784 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1785 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1786 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1787 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1788 Requires<[hasGenericLdSt]>;
1789 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1790 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1791 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1792 Requires<[hasGenericLdSt]>;
1794 // @TODO: Are these actually needed? I believe global addresses will be copied
1795 // to register values anyway.
1796 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1797 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1798 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1799 Requires<[hasGenericLdSt]>;
1800 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1801 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1802 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1803 Requires<[hasGenericLdSt]>;*/
1805 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1806 "mov.u32 \t$result, $src;",
1807 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1808 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1809 "mov.u64 \t$result, $src;",
1810 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1812 // @TODO: Are these actually needed? I believe global addresses will be copied
1813 // to register values anyway.
1814 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1815 "mov.u32 \t$result, $src;",
1816 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1817 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1818 "mov.u64 \t$result, $src;",
1819 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1822 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1823 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1824 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
1825 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1826 Requires<[hasGenericLdSt]>;
1827 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1828 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
1829 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1830 Requires<[hasGenericLdSt]>;
1831 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1832 "mov.u32 \t$result, $src;",
1833 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1834 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1835 "mov.u64 \t$result, $src;",
1836 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1839 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1840 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1841 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1842 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1844 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1845 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1846 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1847 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1850 // nvvm.ptr.gen.to.param
1851 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1852 (ins Int32Regs:$src),
1853 "mov.u32 \t$result, $src;",
1854 [(set Int32Regs:$result,
1855 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1856 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1857 (ins Int64Regs:$src),
1858 "mov.u64 \t$result, $src;",
1859 [(set Int64Regs:$result,
1860 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1863 // nvvm.move intrinsicc
1864 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1865 "mov.b16 \t$r, $s;",
1867 (int_nvvm_move_i16 Int16Regs:$s))]>;
1868 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1869 "mov.b32 \t$r, $s;",
1871 (int_nvvm_move_i32 Int32Regs:$s))]>;
1872 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1873 "mov.b64 \t$r, $s;",
1875 (int_nvvm_move_i64 Int64Regs:$s))]>;
1876 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1877 "mov.f32 \t$r, $s;",
1878 [(set Float32Regs:$r,
1879 (int_nvvm_move_float Float32Regs:$s))]>;
1880 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1881 "mov.f64 \t$r, $s;",
1882 [(set Float64Regs:$r,
1883 (int_nvvm_move_double Float64Regs:$s))]>;
1884 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1885 "mov.u32 \t$r, $s;",
1887 (int_nvvm_move_ptr Int32Regs:$s))]>;
1888 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1889 "mov.u64 \t$r, $s;",
1891 (int_nvvm_move_ptr Int64Regs:$s))]>;
1893 // @TODO: Are these actually needed, or will we always just see symbols
1894 // copied to registers first?
1895 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1896 "mov.u32 \t$r, $s;",
1898 (int_nvvm_move_ptr texternalsym:$s))]>;
1899 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1900 "mov.u64 \t$r, $s;",
1902 (int_nvvm_move_ptr texternalsym:$s))]>;*/
1905 // MoveParam %r1, param
1906 // ptr_local_to_gen %r2, %r1
1907 // ptr_gen_to_local %r3, %r2
1911 // @TODO: Revisit this. There is a type
1912 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1913 // instructions are not currently defined. However, we can use the ptr
1914 // variants and the asm printer will do the right thing.
1915 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1916 (MoveParam texternalsym:$src)))),
1917 (nvvm_move_ptr64 texternalsym:$src)>;
1918 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1919 (MoveParam texternalsym:$src)))),
1920 (nvvm_move_ptr32 texternalsym:$src)>;
1923 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1924 "mov.u64 \t$result, $src;", []>;
1926 //-----------------------------------
1927 // Compiler Error Warn
1928 // - Just ignore them in codegen
1929 //-----------------------------------
1931 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1932 "// llvm.nvvm.compiler.warn()",
1933 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1934 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1935 "// llvm.nvvm.compiler.warn()",
1936 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1937 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1938 "// llvm.nvvm.compiler.error()",
1939 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1940 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1941 "// llvm.nvvm.compiler.error()",
1942 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1947 def ISSPACEP_CONST_32
1948 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1949 "isspacep.const \t$d, $a;",
1950 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1951 Requires<[hasPTX31]>;
1952 def ISSPACEP_CONST_64
1953 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1954 "isspacep.const \t$d, $a;",
1955 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1956 Requires<[hasPTX31]>;
1957 def ISSPACEP_GLOBAL_32
1958 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1959 "isspacep.global \t$d, $a;",
1960 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1961 def ISSPACEP_GLOBAL_64
1962 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1963 "isspacep.global \t$d, $a;",
1964 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1965 def ISSPACEP_LOCAL_32
1966 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1967 "isspacep.local \t$d, $a;",
1968 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1969 def ISSPACEP_LOCAL_64
1970 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1971 "isspacep.local \t$d, $a;",
1972 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1973 def ISSPACEP_SHARED_32
1974 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1975 "isspacep.shared \t$d, $a;",
1976 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1977 def ISSPACEP_SHARED_64
1978 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1979 "isspacep.shared \t$d, $a;",
1980 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1983 // Special register reads
1984 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1985 (ins SpecialRegs:$r),
1986 "mov.b32\t$d, $r;", []>;
1988 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1989 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1990 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1991 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1992 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1993 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1994 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1995 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1996 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1997 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1998 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1999 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2000 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2001 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2002 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2003 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2004 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2005 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2006 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2007 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2008 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2009 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2010 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2011 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2012 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2013 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2014 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2015 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2016 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2017 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2018 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2019 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2022 // rotate builtin support
2024 def ROTATE_B32_HW_IMM
2025 : NVPTXInst<(outs Int32Regs:$dst),
2026 (ins Int32Regs:$src, i32imm:$amt),
2027 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2028 [(set Int32Regs:$dst,
2029 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2030 Requires<[hasHWROT32]> ;
2032 def ROTATE_B32_HW_REG
2033 : NVPTXInst<(outs Int32Regs:$dst),
2034 (ins Int32Regs:$src, Int32Regs:$amt),
2035 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2036 [(set Int32Regs:$dst,
2037 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2038 Requires<[hasHWROT32]> ;
2040 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2041 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2042 Requires<[noHWROT32]> ;
2044 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2045 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2046 Requires<[noHWROT32]> ;
2048 let hasSideEffects = 0 in {
2050 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2051 !strconcat("{{\n\t",
2052 !strconcat(".reg .b32 %dummy;\n\t",
2053 !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
2054 !strconcat("}}", "")))),
2058 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2059 !strconcat("{{\n\t",
2060 !strconcat(".reg .b32 %dummy;\n\t",
2061 !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
2062 !strconcat("}}", "")))),
2066 let hasSideEffects = 0 in {
2068 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2069 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2072 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2073 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2074 (GET_LO_INT64 Int64Regs:$src))> ;
2076 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2078 let hasSideEffects = 0 in {
2079 def SHF_L_WRAP_B32_IMM
2080 : NVPTXInst<(outs Int32Regs:$dst),
2081 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2082 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2083 Requires<[hasHWROT32]>;
2085 def SHF_L_WRAP_B32_REG
2086 : NVPTXInst<(outs Int32Regs:$dst),
2087 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2088 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2089 Requires<[hasHWROT32]>;
2091 def SHF_R_WRAP_B32_IMM
2092 : NVPTXInst<(outs Int32Regs:$dst),
2093 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2094 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2095 Requires<[hasHWROT32]>;
2097 def SHF_R_WRAP_B32_REG
2098 : NVPTXInst<(outs Int32Regs:$dst),
2099 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2100 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2101 Requires<[hasHWROT32]>;
2104 // HW version of rotate 64
2105 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2107 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2108 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2109 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2110 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2111 Requires<[hasHWROT32]>;
2113 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2115 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2116 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2117 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2118 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2119 Requires<[hasHWROT32]>;
2122 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2124 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2125 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2126 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2127 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2128 Requires<[hasHWROT32]>;
2130 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2132 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2133 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2134 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2135 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2136 Requires<[hasHWROT32]>;
2138 // SW version of rotate 64
2139 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2140 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2141 Requires<[noHWROT32]>;
2142 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2143 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2144 Requires<[noHWROT32]>;
2145 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2146 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2147 Requires<[noHWROT32]>;
2148 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2149 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2150 Requires<[noHWROT32]>;
2153 //-----------------------------------
2154 // Texture Intrinsics
2155 //-----------------------------------
2157 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2158 // also defined in NVPTXReplaceImageHandles.cpp
2160 // texmode_independent
2161 let IsTex = 1, IsTexModeUnified = 0 in {
2162 // Texture fetch instructions using handles
2164 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2165 Float32Regs:$b, Float32Regs:$a),
2166 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2167 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2170 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2171 Float32Regs:$b, Float32Regs:$a),
2172 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2173 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2175 def TEX_1D_F32_F32_LEVEL
2176 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2177 Float32Regs:$b, Float32Regs:$a),
2178 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2179 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2180 "[$t, $s, \\{$x\\}], $lod;",
2182 def TEX_1D_F32_F32_GRAD
2183 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2184 Float32Regs:$b, Float32Regs:$a),
2185 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2186 Float32Regs:$gradx, Float32Regs:$grady),
2187 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2188 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2191 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2192 Int32Regs:$b, Int32Regs:$a),
2193 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2194 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2197 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2198 Int32Regs:$b, Int32Regs:$a),
2199 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2200 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2202 def TEX_1D_S32_F32_LEVEL
2203 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2204 Int32Regs:$b, Int32Regs:$a),
2205 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2207 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2208 "[$t, $s, \\{$x\\}], $lod;",
2210 def TEX_1D_S32_F32_GRAD
2211 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2212 Int32Regs:$b, Int32Regs:$a),
2213 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2214 Float32Regs:$gradx, Float32Regs:$grady),
2215 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2216 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2219 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2220 Int32Regs:$b, Int32Regs:$a),
2221 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2222 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2225 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2226 Int32Regs:$b, Int32Regs:$a),
2227 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2228 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2230 def TEX_1D_U32_F32_LEVEL
2231 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2232 Int32Regs:$b, Int32Regs:$a),
2233 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2235 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2236 "[$t, $s, \\{$x\\}], $lod;",
2238 def TEX_1D_U32_F32_GRAD
2239 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2240 Int32Regs:$b, Int32Regs:$a),
2241 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2242 Float32Regs:$gradx, Float32Regs:$grady),
2243 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2244 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2247 def TEX_1D_ARRAY_F32_S32
2248 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249 Float32Regs:$b, Float32Regs:$a),
2250 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2251 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2252 "[$t, $s, \\{$l, $x\\}];",
2254 def TEX_1D_ARRAY_F32_F32
2255 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2256 Float32Regs:$b, Float32Regs:$a),
2257 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2258 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2259 "[$t, $s, \\{$l, $x\\}];",
2261 def TEX_1D_ARRAY_F32_F32_LEVEL
2262 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2263 Float32Regs:$b, Float32Regs:$a),
2264 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2266 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2267 "[$t, $s, \\{$l, $x\\}], $lod;",
2269 def TEX_1D_ARRAY_F32_F32_GRAD
2270 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2271 Float32Regs:$b, Float32Regs:$a),
2272 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2273 Float32Regs:$gradx, Float32Regs:$grady),
2274 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2275 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2277 def TEX_1D_ARRAY_S32_S32
2278 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2279 Int32Regs:$b, Int32Regs:$a),
2280 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2281 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2282 "[$t, $s, \\{$l, $x\\}];",
2284 def TEX_1D_ARRAY_S32_F32
2285 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2286 Int32Regs:$b, Int32Regs:$a),
2287 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2288 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2289 "[$t, $s, \\{$l, $x\\}];",
2291 def TEX_1D_ARRAY_S32_F32_LEVEL
2292 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2293 Int32Regs:$b, Int32Regs:$a),
2294 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2296 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2297 "[$t, $s, \\{$l, $x\\}], $lod;",
2299 def TEX_1D_ARRAY_S32_F32_GRAD
2300 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2301 Int32Regs:$b, Int32Regs:$a),
2302 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2303 Float32Regs:$gradx, Float32Regs:$grady),
2304 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2305 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2307 def TEX_1D_ARRAY_U32_S32
2308 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2309 Int32Regs:$b, Int32Regs:$a),
2310 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2311 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2312 "[$t, $s, \\{$l, $x\\}];",
2314 def TEX_1D_ARRAY_U32_F32
2315 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2316 Int32Regs:$b, Int32Regs:$a),
2317 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2318 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2319 "[$t, $s, \\{$l, $x\\}];",
2321 def TEX_1D_ARRAY_U32_F32_LEVEL
2322 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2323 Int32Regs:$b, Int32Regs:$a),
2324 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2326 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2327 "[$t, $s, \\{$l, $x\\}], $lod;",
2329 def TEX_1D_ARRAY_U32_F32_GRAD
2330 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2331 Int32Regs:$b, Int32Regs:$a),
2332 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2333 Float32Regs:$gradx, Float32Regs:$grady),
2334 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2335 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2339 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2340 Float32Regs:$b, Float32Regs:$a),
2341 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2342 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2343 "[$t, $s, \\{$x, $y\\}];",
2346 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2347 Float32Regs:$b, Float32Regs:$a),
2348 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2349 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2350 "[$t, $s, \\{$x, $y\\}];",
2352 def TEX_2D_F32_F32_LEVEL
2353 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2354 Float32Regs:$b, Float32Regs:$a),
2355 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2357 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2358 "[$t, $s, \\{$x, $y\\}], $lod;",
2360 def TEX_2D_F32_F32_GRAD
2361 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2362 Float32Regs:$b, Float32Regs:$a),
2363 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2364 Float32Regs:$gradx0, Float32Regs:$gradx1,
2365 Float32Regs:$grady0, Float32Regs:$grady1),
2366 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2367 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2368 "\\{$grady0, $grady1\\};",
2371 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2372 Int32Regs:$b, Int32Regs:$a),
2373 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2374 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2375 "[$t, $s, \\{$x, $y\\}];",
2378 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2379 Int32Regs:$b, Int32Regs:$a),
2380 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2381 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2382 "[$t, $s, \\{$x, $y\\}];",
2384 def TEX_2D_S32_F32_LEVEL
2385 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2386 Int32Regs:$b, Int32Regs:$a),
2387 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2389 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2390 "[$t, $s, \\{$x, $y\\}], $lod;",
2392 def TEX_2D_S32_F32_GRAD
2393 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2394 Int32Regs:$b, Int32Regs:$a),
2395 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2396 Float32Regs:$gradx0, Float32Regs:$gradx1,
2397 Float32Regs:$grady0, Float32Regs:$grady1),
2398 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2399 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2400 "\\{$grady0, $grady1\\};",
2403 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2404 Int32Regs:$b, Int32Regs:$a),
2405 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2406 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2407 "[$t, $s, \\{$x, $y\\}];",
2410 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2411 Int32Regs:$b, Int32Regs:$a),
2412 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2413 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2414 "[$t, $s, \\{$x, $y\\}];",
2416 def TEX_2D_U32_F32_LEVEL
2417 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2418 Int32Regs:$b, Int32Regs:$a),
2419 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2421 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2422 "[$t, $s, \\{$x, $y\\}], $lod;",
2424 def TEX_2D_U32_F32_GRAD
2425 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2426 Int32Regs:$b, Int32Regs:$a),
2427 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2428 Float32Regs:$gradx0, Float32Regs:$gradx1,
2429 Float32Regs:$grady0, Float32Regs:$grady1),
2430 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2431 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2432 "\\{$grady0, $grady1\\};",
2435 def TEX_2D_ARRAY_F32_S32
2436 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2437 Float32Regs:$b, Float32Regs:$a),
2438 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2440 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2441 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2443 def TEX_2D_ARRAY_F32_F32
2444 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2445 Float32Regs:$b, Float32Regs:$a),
2446 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2448 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2449 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2451 def TEX_2D_ARRAY_F32_F32_LEVEL
2452 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2453 Float32Regs:$b, Float32Regs:$a),
2454 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2455 Float32Regs:$y, Float32Regs:$lod),
2456 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2457 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2459 def TEX_2D_ARRAY_F32_F32_GRAD
2460 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2461 Float32Regs:$b, Float32Regs:$a),
2462 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2463 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2464 Float32Regs:$grady0, Float32Regs:$grady1),
2465 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2466 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2467 "\\{$grady0, $grady1\\};",
2469 def TEX_2D_ARRAY_S32_S32
2470 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2471 Int32Regs:$b, Int32Regs:$a),
2472 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2474 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2475 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2477 def TEX_2D_ARRAY_S32_F32
2478 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2479 Int32Regs:$b, Int32Regs:$a),
2480 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2482 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2483 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2485 def TEX_2D_ARRAY_S32_F32_LEVEL
2486 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2487 Int32Regs:$b, Int32Regs:$a),
2488 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2489 Float32Regs:$y, Float32Regs:$lod),
2490 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2491 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2493 def TEX_2D_ARRAY_S32_F32_GRAD
2494 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2495 Int32Regs:$b, Int32Regs:$a),
2496 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2498 Float32Regs:$gradx0, Float32Regs:$gradx1,
2499 Float32Regs:$grady0, Float32Regs:$grady1),
2500 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2501 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2502 "\\{$grady0, $grady1\\};",
2504 def TEX_2D_ARRAY_U32_S32
2505 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2506 Int32Regs:$b, Int32Regs:$a),
2507 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2509 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2510 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2512 def TEX_2D_ARRAY_U32_F32
2513 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2514 Int32Regs:$b, Int32Regs:$a),
2515 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2517 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2518 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2520 def TEX_2D_ARRAY_U32_F32_LEVEL
2521 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2522 Int32Regs:$b, Int32Regs:$a),
2523 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2524 Float32Regs:$y, Float32Regs:$lod),
2525 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2526 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2528 def TEX_2D_ARRAY_U32_F32_GRAD
2529 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2530 Int32Regs:$b, Int32Regs:$a),
2531 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2533 Float32Regs:$gradx0, Float32Regs:$gradx1,
2534 Float32Regs:$grady0, Float32Regs:$grady1),
2535 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2536 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2537 "\\{$grady0, $grady1\\};",
2541 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2542 Float32Regs:$b, Float32Regs:$a),
2543 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2545 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2546 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2549 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2550 Float32Regs:$b, Float32Regs:$a),
2551 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2553 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2554 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2556 def TEX_3D_F32_F32_LEVEL
2557 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2558 Float32Regs:$b, Float32Regs:$a),
2559 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2560 Float32Regs:$z, Float32Regs:$lod),
2561 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2562 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2564 def TEX_3D_F32_F32_GRAD
2565 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2566 Float32Regs:$b, Float32Regs:$a),
2567 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2569 Float32Regs:$gradx0, Float32Regs:$gradx1,
2570 Float32Regs:$gradx2, Float32Regs:$grady0,
2571 Float32Regs:$grady1, Float32Regs:$grady2),
2572 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2573 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2574 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2575 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2578 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2579 Int32Regs:$b, Int32Regs:$a),
2580 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2582 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2583 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2586 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2587 Int32Regs:$b, Int32Regs:$a),
2588 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2590 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2591 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2593 def TEX_3D_S32_F32_LEVEL
2594 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2595 Int32Regs:$b, Int32Regs:$a),
2596 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2597 Float32Regs:$z, Float32Regs:$lod),
2598 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2599 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2601 def TEX_3D_S32_F32_GRAD
2602 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2603 Int32Regs:$b, Int32Regs:$a),
2604 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2606 Float32Regs:$gradx0, Float32Regs:$gradx1,
2607 Float32Regs:$gradx2, Float32Regs:$grady0,
2608 Float32Regs:$grady1, Float32Regs:$grady2),
2609 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2610 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2611 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2612 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2615 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2616 Int32Regs:$b, Int32Regs:$a),
2617 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2619 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2620 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2623 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2624 Int32Regs:$b, Int32Regs:$a),
2625 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2627 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2628 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2630 def TEX_3D_U32_F32_LEVEL
2631 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2632 Int32Regs:$b, Int32Regs:$a),
2633 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2634 Float32Regs:$z, Float32Regs:$lod),
2635 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2636 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2638 def TEX_3D_U32_F32_GRAD
2639 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2640 Int32Regs:$b, Int32Regs:$a),
2641 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2643 Float32Regs:$gradx0, Float32Regs:$gradx1,
2644 Float32Regs:$gradx2, Float32Regs:$grady0,
2645 Float32Regs:$grady1, Float32Regs:$grady2),
2646 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2647 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2648 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2649 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2652 def TEX_CUBE_F32_F32
2653 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2654 Float32Regs:$b, Float32Regs:$a),
2655 (ins Int64Regs:$t, Int64Regs:$s,
2656 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2657 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2658 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2660 def TEX_CUBE_F32_F32_LEVEL
2661 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2662 Float32Regs:$b, Float32Regs:$a),
2663 (ins Int64Regs:$t, Int64Regs:$s,
2664 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2666 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2667 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2669 def TEX_CUBE_S32_F32
2670 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671 Int32Regs:$b, Int32Regs:$a),
2672 (ins Int64Regs:$t, Int64Regs:$s,
2673 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2674 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2675 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2677 def TEX_CUBE_S32_F32_LEVEL
2678 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679 Int32Regs:$b, Int32Regs:$a),
2680 (ins Int64Regs:$t, Int64Regs:$s,
2681 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2683 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2684 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2686 def TEX_CUBE_U32_F32
2687 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2688 Int32Regs:$b, Int32Regs:$a),
2689 (ins Int64Regs:$t, Int64Regs:$s,
2690 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2691 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2692 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2694 def TEX_CUBE_U32_F32_LEVEL
2695 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2696 Int32Regs:$b, Int32Regs:$a),
2697 (ins Int64Regs:$t, Int64Regs:$s,
2698 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2700 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2701 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2704 def TEX_CUBE_ARRAY_F32_F32
2705 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2706 Float32Regs:$b, Float32Regs:$a),
2707 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2708 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2709 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2710 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2712 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2713 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2714 Float32Regs:$b, Float32Regs:$a),
2715 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2716 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2718 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2719 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2721 def TEX_CUBE_ARRAY_S32_F32
2722 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2723 Int32Regs:$b, Int32Regs:$a),
2724 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2725 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2726 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2727 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2729 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2730 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2731 Int32Regs:$b, Int32Regs:$a),
2732 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2733 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2735 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2736 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2738 def TEX_CUBE_ARRAY_U32_F32
2739 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2740 Int32Regs:$b, Int32Regs:$a),
2741 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2742 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2743 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2744 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2746 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2747 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2748 Int32Regs:$b, Int32Regs:$a),
2749 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2750 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2752 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2753 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2756 def TLD4_R_2D_F32_F32
2757 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2758 Float32Regs:$v2, Float32Regs:$v3),
2759 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2760 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2761 "[$t, $s, \\{$x, $y\\}];",
2763 def TLD4_G_2D_F32_F32
2764 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2765 Float32Regs:$v2, Float32Regs:$v3),
2766 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2767 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2768 "[$t, $s, \\{$x, $y\\}];",
2770 def TLD4_B_2D_F32_F32
2771 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2772 Float32Regs:$v2, Float32Regs:$v3),
2773 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2774 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2775 "[$t, $s, \\{$x, $y\\}];",
2777 def TLD4_A_2D_F32_F32
2778 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2779 Float32Regs:$v2, Float32Regs:$v3),
2780 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2781 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2782 "[$t, $s, \\{$x, $y\\}];",
2784 def TLD4_R_2D_S32_F32
2785 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2786 Int32Regs:$v2, Int32Regs:$v3),
2787 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2788 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2789 "[$t, $s, \\{$x, $y\\}];",
2791 def TLD4_G_2D_S32_F32
2792 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2793 Int32Regs:$v2, Int32Regs:$v3),
2794 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2795 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2796 "[$t, $s, \\{$x, $y\\}];",
2798 def TLD4_B_2D_S32_F32
2799 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2800 Int32Regs:$v2, Int32Regs:$v3),
2801 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2802 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2803 "[$t, $s, \\{$x, $y\\}];",
2805 def TLD4_A_2D_S32_F32
2806 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2807 Int32Regs:$v2, Int32Regs:$v3),
2808 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2809 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2810 "[$t, $s, \\{$x, $y\\}];",
2812 def TLD4_R_2D_U32_F32
2813 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2814 Int32Regs:$v2, Int32Regs:$v3),
2815 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2816 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2817 "[$t, $s, \\{$x, $y\\}];",
2819 def TLD4_G_2D_U32_F32
2820 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2821 Int32Regs:$v2, Int32Regs:$v3),
2822 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2823 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2824 "[$t, $s, \\{$x, $y\\}];",
2826 def TLD4_B_2D_U32_F32
2827 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2828 Int32Regs:$v2, Int32Regs:$v3),
2829 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2830 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2831 "[$t, $s, \\{$x, $y\\}];",
2833 def TLD4_A_2D_U32_F32
2834 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2835 Int32Regs:$v2, Int32Regs:$v3),
2836 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2837 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2838 "[$t, $s, \\{$x, $y\\}];",
2844 let IsTex = 1, IsTexModeUnified = 1 in {
2845 // Texture fetch instructions using handles
2846 def TEX_UNIFIED_1D_F32_S32
2847 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2848 Float32Regs:$b, Float32Regs:$a),
2849 (ins Int64Regs:$t, Int32Regs:$x),
2850 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2852 def TEX_UNIFIED_1D_F32_F32
2853 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2854 Float32Regs:$b, Float32Regs:$a),
2855 (ins Int64Regs:$t, Float32Regs:$x),
2856 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2858 def TEX_UNIFIED_1D_F32_F32_LEVEL
2859 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2860 Float32Regs:$b, Float32Regs:$a),
2861 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2862 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2863 "[$t, \\{$x\\}], $lod;",
2865 def TEX_UNIFIED_1D_F32_F32_GRAD
2866 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2867 Float32Regs:$b, Float32Regs:$a),
2868 (ins Int64Regs:$t, Float32Regs:$x,
2869 Float32Regs:$gradx, Float32Regs:$grady),
2870 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2871 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2873 def TEX_UNIFIED_1D_S32_S32
2874 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2875 Int32Regs:$b, Int32Regs:$a),
2876 (ins Int64Regs:$t, Int32Regs:$x),
2877 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2879 def TEX_UNIFIED_1D_S32_F32
2880 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2881 Int32Regs:$b, Int32Regs:$a),
2882 (ins Int64Regs:$t, Float32Regs:$x),
2883 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2885 def TEX_UNIFIED_1D_S32_F32_LEVEL
2886 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2887 Int32Regs:$b, Int32Regs:$a),
2888 (ins Int64Regs:$t, Float32Regs:$x,
2890 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2891 "[$t, \\{$x\\}], $lod;",
2893 def TEX_UNIFIED_1D_S32_F32_GRAD
2894 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2895 Int32Regs:$b, Int32Regs:$a),
2896 (ins Int64Regs:$t, Float32Regs:$x,
2897 Float32Regs:$gradx, Float32Regs:$grady),
2898 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2899 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2901 def TEX_UNIFIED_1D_U32_S32
2902 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2903 Int32Regs:$b, Int32Regs:$a),
2904 (ins Int64Regs:$t, Int32Regs:$x),
2905 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2907 def TEX_UNIFIED_1D_U32_F32
2908 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2909 Int32Regs:$b, Int32Regs:$a),
2910 (ins Int64Regs:$t, Float32Regs:$x),
2911 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2913 def TEX_UNIFIED_1D_U32_F32_LEVEL
2914 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2915 Int32Regs:$b, Int32Regs:$a),
2916 (ins Int64Regs:$t, Float32Regs:$x,
2918 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2919 "[$t, \\{$x\\}], $lod;",
2921 def TEX_UNIFIED_1D_U32_F32_GRAD
2922 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2923 Int32Regs:$b, Int32Regs:$a),
2924 (ins Int64Regs:$t, Float32Regs:$x,
2925 Float32Regs:$gradx, Float32Regs:$grady),
2926 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2927 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2930 def TEX_UNIFIED_1D_ARRAY_F32_S32
2931 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932 Float32Regs:$b, Float32Regs:$a),
2933 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2934 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2935 "[$t, \\{$l, $x\\}];",
2937 def TEX_UNIFIED_1D_ARRAY_F32_F32
2938 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2939 Float32Regs:$b, Float32Regs:$a),
2940 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2941 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2942 "[$t, \\{$l, $x\\}];",
2944 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2945 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2946 Float32Regs:$b, Float32Regs:$a),
2947 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2949 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2950 "[$t, \\{$l, $x\\}], $lod;",
2952 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2953 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2954 Float32Regs:$b, Float32Regs:$a),
2955 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2956 Float32Regs:$gradx, Float32Regs:$grady),
2957 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2958 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2960 def TEX_UNIFIED_1D_ARRAY_S32_S32
2961 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2962 Int32Regs:$b, Int32Regs:$a),
2963 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2964 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2965 "[$t, \\{$l, $x\\}];",
2967 def TEX_UNIFIED_1D_ARRAY_S32_F32
2968 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2969 Int32Regs:$b, Int32Regs:$a),
2970 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2971 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2972 "[$t, \\{$l, $x\\}];",
2974 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2975 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2976 Int32Regs:$b, Int32Regs:$a),
2977 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2979 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2980 "[$t, \\{$l, $x\\}], $lod;",
2982 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2983 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2984 Int32Regs:$b, Int32Regs:$a),
2985 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2986 Float32Regs:$gradx, Float32Regs:$grady),
2987 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2988 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2990 def TEX_UNIFIED_1D_ARRAY_U32_S32
2991 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2992 Int32Regs:$b, Int32Regs:$a),
2993 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2994 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2995 "[$t, \\{$l, $x\\}];",
2997 def TEX_UNIFIED_1D_ARRAY_U32_F32
2998 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2999 Int32Regs:$b, Int32Regs:$a),
3000 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3001 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3002 "[$t, \\{$l, $x\\}];",
3004 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3005 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3006 Int32Regs:$b, Int32Regs:$a),
3007 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3009 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3010 "[$t, \\{$l, $x\\}], $lod;",
3012 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3013 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3014 Int32Regs:$b, Int32Regs:$a),
3015 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3016 Float32Regs:$gradx, Float32Regs:$grady),
3017 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3018 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3021 def TEX_UNIFIED_2D_F32_S32
3022 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3023 Float32Regs:$b, Float32Regs:$a),
3024 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3025 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3026 "[$t, \\{$x, $y\\}];",
3028 def TEX_UNIFIED_2D_F32_F32
3029 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3030 Float32Regs:$b, Float32Regs:$a),
3031 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3032 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3033 "[$t, \\{$x, $y\\}];",
3035 def TEX_UNIFIED_2D_F32_F32_LEVEL
3036 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3037 Float32Regs:$b, Float32Regs:$a),
3038 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3040 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3041 "[$t, \\{$x, $y\\}], $lod;",
3043 def TEX_UNIFIED_2D_F32_F32_GRAD
3044 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3045 Float32Regs:$b, Float32Regs:$a),
3046 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3047 Float32Regs:$gradx0, Float32Regs:$gradx1,
3048 Float32Regs:$grady0, Float32Regs:$grady1),
3049 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3050 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3051 "\\{$grady0, $grady1\\};",
3053 def TEX_UNIFIED_2D_S32_S32
3054 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3055 Int32Regs:$b, Int32Regs:$a),
3056 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3057 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3058 "[$t, \\{$x, $y\\}];",
3060 def TEX_UNIFIED_2D_S32_F32
3061 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3062 Int32Regs:$b, Int32Regs:$a),
3063 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3064 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3065 "[$t, \\{$x, $y\\}];",
3067 def TEX_UNIFIED_2D_S32_F32_LEVEL
3068 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3069 Int32Regs:$b, Int32Regs:$a),
3070 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3072 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3073 "[$t, \\{$x, $y\\}], $lod;",
3075 def TEX_UNIFIED_2D_S32_F32_GRAD
3076 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3077 Int32Regs:$b, Int32Regs:$a),
3078 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3079 Float32Regs:$gradx0, Float32Regs:$gradx1,
3080 Float32Regs:$grady0, Float32Regs:$grady1),
3081 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3082 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3083 "\\{$grady0, $grady1\\};",
3085 def TEX_UNIFIED_2D_U32_S32
3086 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3087 Int32Regs:$b, Int32Regs:$a),
3088 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3089 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3090 "[$t, \\{$x, $y\\}];",
3092 def TEX_UNIFIED_2D_U32_F32
3093 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3094 Int32Regs:$b, Int32Regs:$a),
3095 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3096 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3097 "[$t, \\{$x, $y\\}];",
3099 def TEX_UNIFIED_2D_U32_F32_LEVEL
3100 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3101 Int32Regs:$b, Int32Regs:$a),
3102 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3104 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3105 "[$t, \\{$x, $y\\}], $lod;",
3107 def TEX_UNIFIED_2D_U32_F32_GRAD
3108 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3109 Int32Regs:$b, Int32Regs:$a),
3110 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3111 Float32Regs:$gradx0, Float32Regs:$gradx1,
3112 Float32Regs:$grady0, Float32Regs:$grady1),
3113 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3114 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3115 "\\{$grady0, $grady1\\};",
3118 def TEX_UNIFIED_2D_ARRAY_F32_S32
3119 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3120 Float32Regs:$b, Float32Regs:$a),
3121 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3123 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3124 "[$t, \\{$l, $x, $y, $y\\}];",
3126 def TEX_UNIFIED_2D_ARRAY_F32_F32
3127 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3128 Float32Regs:$b, Float32Regs:$a),
3129 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3131 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3132 "[$t, \\{$l, $x, $y, $y\\}];",
3134 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3135 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3136 Float32Regs:$b, Float32Regs:$a),
3137 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3138 Float32Regs:$y, Float32Regs:$lod),
3139 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3140 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3142 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3143 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3144 Float32Regs:$b, Float32Regs:$a),
3145 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3146 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3147 Float32Regs:$grady0, Float32Regs:$grady1),
3148 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3149 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3150 "\\{$grady0, $grady1\\};",
3152 def TEX_UNIFIED_2D_ARRAY_S32_S32
3153 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3154 Int32Regs:$b, Int32Regs:$a),
3155 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3157 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3158 "[$t, \\{$l, $x, $y, $y\\}];",
3160 def TEX_UNIFIED_2D_ARRAY_S32_F32
3161 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3162 Int32Regs:$b, Int32Regs:$a),
3163 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3165 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3166 "[$t, \\{$l, $x, $y, $y\\}];",
3168 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3169 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3170 Int32Regs:$b, Int32Regs:$a),
3171 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3172 Float32Regs:$y, Float32Regs:$lod),
3173 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3174 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3176 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3177 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3178 Int32Regs:$b, Int32Regs:$a),
3179 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3181 Float32Regs:$gradx0, Float32Regs:$gradx1,
3182 Float32Regs:$grady0, Float32Regs:$grady1),
3183 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3184 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3185 "\\{$grady0, $grady1\\};",
3187 def TEX_UNIFIED_2D_ARRAY_U32_S32
3188 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3189 Int32Regs:$b, Int32Regs:$a),
3190 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3192 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3193 "[$t, \\{$l, $x, $y, $y\\}];",
3195 def TEX_UNIFIED_2D_ARRAY_U32_F32
3196 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3197 Int32Regs:$b, Int32Regs:$a),
3198 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3200 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3201 "[$t, \\{$l, $x, $y, $y\\}];",
3203 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3204 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3205 Int32Regs:$b, Int32Regs:$a),
3206 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3207 Float32Regs:$y, Float32Regs:$lod),
3208 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3209 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3211 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3212 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3213 Int32Regs:$b, Int32Regs:$a),
3214 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3216 Float32Regs:$gradx0, Float32Regs:$gradx1,
3217 Float32Regs:$grady0, Float32Regs:$grady1),
3218 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3219 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3220 "\\{$grady0, $grady1\\};",
3223 def TEX_UNIFIED_3D_F32_S32
3224 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3225 Float32Regs:$b, Float32Regs:$a),
3226 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3228 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3229 "[$t, \\{$x, $y, $z, $z\\}];",
3231 def TEX_UNIFIED_3D_F32_F32
3232 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3233 Float32Regs:$b, Float32Regs:$a),
3234 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3236 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3237 "[$t, \\{$x, $y, $z, $z\\}];",
3239 def TEX_UNIFIED_3D_F32_F32_LEVEL
3240 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3241 Float32Regs:$b, Float32Regs:$a),
3242 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3243 Float32Regs:$z, Float32Regs:$lod),
3244 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3245 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3247 def TEX_UNIFIED_3D_F32_F32_GRAD
3248 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3249 Float32Regs:$b, Float32Regs:$a),
3250 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3252 Float32Regs:$gradx0, Float32Regs:$gradx1,
3253 Float32Regs:$gradx2, Float32Regs:$grady0,
3254 Float32Regs:$grady1, Float32Regs:$grady2),
3255 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3256 "[$t, \\{$x, $y, $z, $z\\}], "
3257 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3258 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3260 def TEX_UNIFIED_3D_S32_S32
3261 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3262 Int32Regs:$b, Int32Regs:$a),
3263 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3265 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3266 "[$t, \\{$x, $y, $z, $z\\}];",
3268 def TEX_UNIFIED_3D_S32_F32
3269 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3270 Int32Regs:$b, Int32Regs:$a),
3271 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3273 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3274 "[$t, \\{$x, $y, $z, $z\\}];",
3276 def TEX_UNIFIED_3D_S32_F32_LEVEL
3277 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3278 Int32Regs:$b, Int32Regs:$a),
3279 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3280 Float32Regs:$z, Float32Regs:$lod),
3281 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3282 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3284 def TEX_UNIFIED_3D_S32_F32_GRAD
3285 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3286 Int32Regs:$b, Int32Regs:$a),
3287 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3289 Float32Regs:$gradx0, Float32Regs:$gradx1,
3290 Float32Regs:$gradx2, Float32Regs:$grady0,
3291 Float32Regs:$grady1, Float32Regs:$grady2),
3292 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3293 "[$t, \\{$x, $y, $z, $z\\}], "
3294 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3295 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3297 def TEX_UNIFIED_3D_U32_S32
3298 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3299 Int32Regs:$b, Int32Regs:$a),
3300 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3302 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3303 "[$t, \\{$x, $y, $z, $z\\}];",
3305 def TEX_UNIFIED_3D_U32_F32
3306 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3307 Int32Regs:$b, Int32Regs:$a),
3308 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3310 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3311 "[$t, \\{$x, $y, $z, $z\\}];",
3313 def TEX_UNIFIED_3D_U32_F32_LEVEL
3314 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3315 Int32Regs:$b, Int32Regs:$a),
3316 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3317 Float32Regs:$z, Float32Regs:$lod),
3318 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3319 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3321 def TEX_UNIFIED_3D_U32_F32_GRAD
3322 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3323 Int32Regs:$b, Int32Regs:$a),
3324 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3326 Float32Regs:$gradx0, Float32Regs:$gradx1,
3327 Float32Regs:$gradx2, Float32Regs:$grady0,
3328 Float32Regs:$grady1, Float32Regs:$grady2),
3329 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3330 "[$t, \\{$x, $y, $z, $z\\}], "
3331 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3332 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3335 def TEX_UNIFIED_CUBE_F32_F32
3336 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3337 Float32Regs:$b, Float32Regs:$a),
3339 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3340 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3341 "[$t, \\{$x, $y, $z, $z\\}];",
3343 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3344 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3345 Float32Regs:$b, Float32Regs:$a),
3347 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3349 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3350 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3352 def TEX_UNIFIED_CUBE_S32_F32
3353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3354 Int32Regs:$b, Int32Regs:$a),
3356 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3357 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3358 "[$t, \\{$x, $y, $z, $z\\}];",
3360 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3361 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3362 Int32Regs:$b, Int32Regs:$a),
3364 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3366 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3367 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3369 def TEX_UNIFIED_CUBE_U32_F32
3370 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3371 Int32Regs:$b, Int32Regs:$a),
3373 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3374 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3375 "[$t, \\{$x, $y, $z, $z\\}];",
3377 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3378 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3379 Int32Regs:$b, Int32Regs:$a),
3381 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3383 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3384 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3387 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3388 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3389 Float32Regs:$b, Float32Regs:$a),
3390 (ins Int64Regs:$t, Int32Regs:$l,
3391 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3392 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3393 "[$t, \\{$l, $x, $y, $z\\}];",
3395 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3396 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3397 Float32Regs:$b, Float32Regs:$a),
3398 (ins Int64Regs:$t, Int32Regs:$l,
3399 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3401 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3402 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3404 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3405 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3406 Int32Regs:$b, Int32Regs:$a),
3407 (ins Int64Regs:$t, Int32Regs:$l,
3408 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3409 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3410 "[$t, \\{$l, $x, $y, $z\\}];",
3412 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3413 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3414 Int32Regs:$b, Int32Regs:$a),
3415 (ins Int64Regs:$t, Int32Regs:$l,
3416 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3418 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3419 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3421 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3423 Int32Regs:$b, Int32Regs:$a),
3424 (ins Int64Regs:$t, Int32Regs:$l,
3425 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3426 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3427 "[$t, \\{$l, $x, $y, $z\\}];",
3429 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3430 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3431 Int32Regs:$b, Int32Regs:$a),
3432 (ins Int64Regs:$t, Int32Regs:$l,
3433 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3435 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3436 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3439 def TLD4_UNIFIED_R_2D_F32_F32
3440 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3441 Float32Regs:$v2, Float32Regs:$v3),
3442 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3443 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3444 "[$t, \\{$x, $y\\}];",
3446 def TLD4_UNIFIED_G_2D_F32_F32
3447 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3448 Float32Regs:$v2, Float32Regs:$v3),
3449 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3450 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3451 "[$t, \\{$x, $y\\}];",
3453 def TLD4_UNIFIED_B_2D_F32_F32
3454 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3455 Float32Regs:$v2, Float32Regs:$v3),
3456 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3457 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3458 "[$t, \\{$x, $y\\}];",
3460 def TLD4_UNIFIED_A_2D_F32_F32
3461 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3462 Float32Regs:$v2, Float32Regs:$v3),
3463 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3464 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3465 "[$t, \\{$x, $y\\}];",
3467 def TLD4_UNIFIED_R_2D_S32_F32
3468 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3469 Int32Regs:$v2, Int32Regs:$v3),
3470 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3471 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3472 "[$t, \\{$x, $y\\}];",
3474 def TLD4_UNIFIED_G_2D_S32_F32
3475 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3476 Int32Regs:$v2, Int32Regs:$v3),
3477 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3478 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3479 "[$t, \\{$x, $y\\}];",
3481 def TLD4_UNIFIED_B_2D_S32_F32
3482 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3483 Int32Regs:$v2, Int32Regs:$v3),
3484 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3485 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3486 "[$t, \\{$x, $y\\}];",
3488 def TLD4_UNIFIED_A_2D_S32_F32
3489 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3490 Int32Regs:$v2, Int32Regs:$v3),
3491 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3492 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3493 "[$t, \\{$x, $y\\}];",
3495 def TLD4_UNIFIED_R_2D_U32_F32
3496 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3497 Int32Regs:$v2, Int32Regs:$v3),
3498 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3499 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3500 "[$t, \\{$x, $y\\}];",
3502 def TLD4_UNIFIED_G_2D_U32_F32
3503 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3504 Int32Regs:$v2, Int32Regs:$v3),
3505 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3506 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3507 "[$t, \\{$x, $y\\}];",
3509 def TLD4_UNIFIED_B_2D_U32_F32
3510 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3511 Int32Regs:$v2, Int32Regs:$v3),
3512 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3513 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3514 "[$t, \\{$x, $y\\}];",
3516 def TLD4_UNIFIED_A_2D_U32_F32
3517 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3518 Int32Regs:$v2, Int32Regs:$v3),
3519 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3520 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3521 "[$t, \\{$x, $y\\}];",
3527 //=== Surface load instructions
3530 def SULD_1D_I8_CLAMP
3531 : NVPTXInst<(outs Int16Regs:$r),
3532 (ins Int64Regs:$s, Int32Regs:$x),
3533 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3535 def SULD_1D_I16_CLAMP
3536 : NVPTXInst<(outs Int16Regs:$r),
3537 (ins Int64Regs:$s, Int32Regs:$x),
3538 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3540 def SULD_1D_I32_CLAMP
3541 : NVPTXInst<(outs Int32Regs:$r),
3542 (ins Int64Regs:$s, Int32Regs:$x),
3543 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3545 def SULD_1D_I64_CLAMP
3546 : NVPTXInst<(outs Int64Regs:$r),
3547 (ins Int64Regs:$s, Int32Regs:$x),
3548 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3551 def SULD_1D_ARRAY_I8_CLAMP
3552 : NVPTXInst<(outs Int16Regs:$r),
3553 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3554 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3556 def SULD_1D_ARRAY_I16_CLAMP
3557 : NVPTXInst<(outs Int16Regs:$r),
3558 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3559 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3561 def SULD_1D_ARRAY_I32_CLAMP
3562 : NVPTXInst<(outs Int32Regs:$r),
3563 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3564 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3566 def SULD_1D_ARRAY_I64_CLAMP
3567 : NVPTXInst<(outs Int64Regs:$r),
3568 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3569 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3572 def SULD_2D_I8_CLAMP
3573 : NVPTXInst<(outs Int16Regs:$r),
3574 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3575 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3577 def SULD_2D_I16_CLAMP
3578 : NVPTXInst<(outs Int16Regs:$r),
3579 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3580 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3582 def SULD_2D_I32_CLAMP
3583 : NVPTXInst<(outs Int32Regs:$r),
3584 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3585 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3587 def SULD_2D_I64_CLAMP
3588 : NVPTXInst<(outs Int64Regs:$r),
3589 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3590 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3593 def SULD_2D_ARRAY_I8_CLAMP
3594 : NVPTXInst<(outs Int16Regs:$r),
3595 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3596 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3598 def SULD_2D_ARRAY_I16_CLAMP
3599 : NVPTXInst<(outs Int16Regs:$r),
3600 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3601 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3603 def SULD_2D_ARRAY_I32_CLAMP
3604 : NVPTXInst<(outs Int32Regs:$r),
3605 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3606 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3608 def SULD_2D_ARRAY_I64_CLAMP
3609 : NVPTXInst<(outs Int64Regs:$r),
3610 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3611 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3614 def SULD_3D_I8_CLAMP
3615 : NVPTXInst<(outs Int16Regs:$r),
3616 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3617 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3619 def SULD_3D_I16_CLAMP
3620 : NVPTXInst<(outs Int16Regs:$r),
3621 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3622 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3624 def SULD_3D_I32_CLAMP
3625 : NVPTXInst<(outs Int32Regs:$r),
3626 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3627 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3629 def SULD_3D_I64_CLAMP
3630 : NVPTXInst<(outs Int64Regs:$r),
3631 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3632 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3637 def SULD_1D_V2I8_CLAMP
3638 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3639 (ins Int64Regs:$s, Int32Regs:$x),
3640 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3642 def SULD_1D_V2I16_CLAMP
3643 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3644 (ins Int64Regs:$s, Int32Regs:$x),
3645 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3647 def SULD_1D_V2I32_CLAMP
3648 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3649 (ins Int64Regs:$s, Int32Regs:$x),
3650 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3652 def SULD_1D_V2I64_CLAMP
3653 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3654 (ins Int64Regs:$s, Int32Regs:$x),
3655 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3658 def SULD_1D_ARRAY_V2I8_CLAMP
3659 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3660 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3661 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3663 def SULD_1D_ARRAY_V2I16_CLAMP
3664 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3665 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3666 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3668 def SULD_1D_ARRAY_V2I32_CLAMP
3669 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3670 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3671 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3673 def SULD_1D_ARRAY_V2I64_CLAMP
3674 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3675 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3676 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3679 def SULD_2D_V2I8_CLAMP
3680 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3681 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3682 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3684 def SULD_2D_V2I16_CLAMP
3685 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3686 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3687 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3689 def SULD_2D_V2I32_CLAMP
3690 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3691 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3692 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3694 def SULD_2D_V2I64_CLAMP
3695 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3696 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3697 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3700 def SULD_2D_ARRAY_V2I8_CLAMP
3701 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3702 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3703 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3704 "[$s, \\{$l, $x, $y, $y\\}];",
3706 def SULD_2D_ARRAY_V2I16_CLAMP
3707 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3708 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3709 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3710 "[$s, \\{$l, $x, $y, $y\\}];",
3712 def SULD_2D_ARRAY_V2I32_CLAMP
3713 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3714 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3715 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3716 "[$s, \\{$l, $x, $y, $y\\}];",
3718 def SULD_2D_ARRAY_V2I64_CLAMP
3719 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3720 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3721 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3722 "[$s, \\{$l, $x, $y, $y\\}];",
3725 def SULD_3D_V2I8_CLAMP
3726 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3727 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3728 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3730 def SULD_3D_V2I16_CLAMP
3731 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3732 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3733 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3735 def SULD_3D_V2I32_CLAMP
3736 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3737 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3738 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3740 def SULD_3D_V2I64_CLAMP
3741 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3742 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3743 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3748 def SULD_1D_V4I8_CLAMP
3749 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3750 (ins Int64Regs:$s, Int32Regs:$x),
3751 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3753 def SULD_1D_V4I16_CLAMP
3754 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3755 (ins Int64Regs:$s, Int32Regs:$x),
3756 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3758 def SULD_1D_V4I32_CLAMP
3759 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3760 (ins Int64Regs:$s, Int32Regs:$x),
3761 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3764 def SULD_1D_ARRAY_V4I8_CLAMP
3765 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3766 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3767 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3768 "[$s, \\{$l, $x\\}];",
3770 def SULD_1D_ARRAY_V4I16_CLAMP
3771 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3772 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3773 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3774 "[$s, \\{$l, $x\\}];",
3776 def SULD_1D_ARRAY_V4I32_CLAMP
3777 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3778 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3779 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3780 "[$s, \\{$l, $x\\}];",
3783 def SULD_2D_V4I8_CLAMP
3784 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3785 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3786 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3788 def SULD_2D_V4I16_CLAMP
3789 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3790 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3791 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3793 def SULD_2D_V4I32_CLAMP
3794 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3795 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3796 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3799 def SULD_2D_ARRAY_V4I8_CLAMP
3800 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3801 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3802 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3803 "[$s, \\{$l, $x, $y, $y\\}];",
3805 def SULD_2D_ARRAY_V4I16_CLAMP
3806 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3807 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3808 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3809 "[$s, \\{$l, $x, $y, $y\\}];",
3811 def SULD_2D_ARRAY_V4I32_CLAMP
3812 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3813 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3814 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3815 "[$s, \\{$l, $x, $y, $y\\}];",
3819 def SULD_3D_V4I8_CLAMP
3820 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3821 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3822 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3823 "[$s, \\{$x, $y, $z, $z\\}];",
3825 def SULD_3D_V4I16_CLAMP
3826 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3827 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3828 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3829 "[$s, \\{$x, $y, $z, $z\\}];",
3831 def SULD_3D_V4I32_CLAMP
3832 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3833 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3834 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3835 "[$s, \\{$x, $y, $z, $z\\}];",
3843 : NVPTXInst<(outs Int16Regs:$r),
3844 (ins Int64Regs:$s, Int32Regs:$x),
3845 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3847 def SULD_1D_I16_TRAP
3848 : NVPTXInst<(outs Int16Regs:$r),
3849 (ins Int64Regs:$s, Int32Regs:$x),
3850 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3852 def SULD_1D_I32_TRAP
3853 : NVPTXInst<(outs Int32Regs:$r),
3854 (ins Int64Regs:$s, Int32Regs:$x),
3855 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3857 def SULD_1D_I64_TRAP
3858 : NVPTXInst<(outs Int64Regs:$r),
3859 (ins Int64Regs:$s, Int32Regs:$x),
3860 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3863 def SULD_1D_ARRAY_I8_TRAP
3864 : NVPTXInst<(outs Int16Regs:$r),
3865 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3866 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3868 def SULD_1D_ARRAY_I16_TRAP
3869 : NVPTXInst<(outs Int16Regs:$r),
3870 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3871 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3873 def SULD_1D_ARRAY_I32_TRAP
3874 : NVPTXInst<(outs Int32Regs:$r),
3875 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3876 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3878 def SULD_1D_ARRAY_I64_TRAP
3879 : NVPTXInst<(outs Int64Regs:$r),
3880 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3881 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3885 : NVPTXInst<(outs Int16Regs:$r),
3886 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3887 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3889 def SULD_2D_I16_TRAP
3890 : NVPTXInst<(outs Int16Regs:$r),
3891 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3892 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3894 def SULD_2D_I32_TRAP
3895 : NVPTXInst<(outs Int32Regs:$r),
3896 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3897 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3899 def SULD_2D_I64_TRAP
3900 : NVPTXInst<(outs Int64Regs:$r),
3901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3902 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3905 def SULD_2D_ARRAY_I8_TRAP
3906 : NVPTXInst<(outs Int16Regs:$r),
3907 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3908 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3910 def SULD_2D_ARRAY_I16_TRAP
3911 : NVPTXInst<(outs Int16Regs:$r),
3912 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3913 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3915 def SULD_2D_ARRAY_I32_TRAP
3916 : NVPTXInst<(outs Int32Regs:$r),
3917 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3918 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3920 def SULD_2D_ARRAY_I64_TRAP
3921 : NVPTXInst<(outs Int64Regs:$r),
3922 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3923 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3927 : NVPTXInst<(outs Int16Regs:$r),
3928 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3929 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3931 def SULD_3D_I16_TRAP
3932 : NVPTXInst<(outs Int16Regs:$r),
3933 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3934 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3936 def SULD_3D_I32_TRAP
3937 : NVPTXInst<(outs Int32Regs:$r),
3938 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3939 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3941 def SULD_3D_I64_TRAP
3942 : NVPTXInst<(outs Int64Regs:$r),
3943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3944 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3949 def SULD_1D_V2I8_TRAP
3950 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3951 (ins Int64Regs:$s, Int32Regs:$x),
3952 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3954 def SULD_1D_V2I16_TRAP
3955 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3956 (ins Int64Regs:$s, Int32Regs:$x),
3957 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3959 def SULD_1D_V2I32_TRAP
3960 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3961 (ins Int64Regs:$s, Int32Regs:$x),
3962 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3964 def SULD_1D_V2I64_TRAP
3965 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3966 (ins Int64Regs:$s, Int32Regs:$x),
3967 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3970 def SULD_1D_ARRAY_V2I8_TRAP
3971 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3972 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3973 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3975 def SULD_1D_ARRAY_V2I16_TRAP
3976 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3977 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3978 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3980 def SULD_1D_ARRAY_V2I32_TRAP
3981 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3982 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3983 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3985 def SULD_1D_ARRAY_V2I64_TRAP
3986 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3987 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3988 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3991 def SULD_2D_V2I8_TRAP
3992 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3993 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3994 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3996 def SULD_2D_V2I16_TRAP
3997 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3998 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3999 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4001 def SULD_2D_V2I32_TRAP
4002 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4003 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4004 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4006 def SULD_2D_V2I64_TRAP
4007 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4008 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4009 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4012 def SULD_2D_ARRAY_V2I8_TRAP
4013 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4014 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4015 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4016 "[$s, \\{$l, $x, $y, $y\\}];",
4018 def SULD_2D_ARRAY_V2I16_TRAP
4019 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4020 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4021 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4022 "[$s, \\{$l, $x, $y, $y\\}];",
4024 def SULD_2D_ARRAY_V2I32_TRAP
4025 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4026 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4027 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4028 "[$s, \\{$l, $x, $y, $y\\}];",
4030 def SULD_2D_ARRAY_V2I64_TRAP
4031 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4032 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4033 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4034 "[$s, \\{$l, $x, $y, $y\\}];",
4037 def SULD_3D_V2I8_TRAP
4038 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4039 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4040 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4042 def SULD_3D_V2I16_TRAP
4043 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4045 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4047 def SULD_3D_V2I32_TRAP
4048 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4049 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4050 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4052 def SULD_3D_V2I64_TRAP
4053 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4054 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4055 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4060 def SULD_1D_V4I8_TRAP
4061 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4062 (ins Int64Regs:$s, Int32Regs:$x),
4063 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4065 def SULD_1D_V4I16_TRAP
4066 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4067 (ins Int64Regs:$s, Int32Regs:$x),
4068 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4070 def SULD_1D_V4I32_TRAP
4071 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4072 (ins Int64Regs:$s, Int32Regs:$x),
4073 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4076 def SULD_1D_ARRAY_V4I8_TRAP
4077 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4078 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4079 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4080 "[$s, \\{$l, $x\\}];",
4082 def SULD_1D_ARRAY_V4I16_TRAP
4083 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4084 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4085 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4086 "[$s, \\{$l, $x\\}];",
4088 def SULD_1D_ARRAY_V4I32_TRAP
4089 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4090 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4091 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4092 "[$s, \\{$l, $x\\}];",
4095 def SULD_2D_V4I8_TRAP
4096 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4097 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4098 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4100 def SULD_2D_V4I16_TRAP
4101 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4102 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4103 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4105 def SULD_2D_V4I32_TRAP
4106 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4107 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4108 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4111 def SULD_2D_ARRAY_V4I8_TRAP
4112 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4113 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4114 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4115 "[$s, \\{$l, $x, $y, $y\\}];",
4117 def SULD_2D_ARRAY_V4I16_TRAP
4118 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4119 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4120 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4121 "[$s, \\{$l, $x, $y, $y\\}];",
4123 def SULD_2D_ARRAY_V4I32_TRAP
4124 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4125 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4126 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4127 "[$s, \\{$l, $x, $y, $y\\}];",
4131 def SULD_3D_V4I8_TRAP
4132 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4133 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4134 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4135 "[$s, \\{$x, $y, $z, $z\\}];",
4137 def SULD_3D_V4I16_TRAP
4138 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4139 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4140 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4141 "[$s, \\{$x, $y, $z, $z\\}];",
4143 def SULD_3D_V4I32_TRAP
4144 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4145 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4146 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4147 "[$s, \\{$x, $y, $z, $z\\}];",
4154 : NVPTXInst<(outs Int16Regs:$r),
4155 (ins Int64Regs:$s, Int32Regs:$x),
4156 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4158 def SULD_1D_I16_ZERO
4159 : NVPTXInst<(outs Int16Regs:$r),
4160 (ins Int64Regs:$s, Int32Regs:$x),
4161 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4163 def SULD_1D_I32_ZERO
4164 : NVPTXInst<(outs Int32Regs:$r),
4165 (ins Int64Regs:$s, Int32Regs:$x),
4166 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4168 def SULD_1D_I64_ZERO
4169 : NVPTXInst<(outs Int64Regs:$r),
4170 (ins Int64Regs:$s, Int32Regs:$x),
4171 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4174 def SULD_1D_ARRAY_I8_ZERO
4175 : NVPTXInst<(outs Int16Regs:$r),
4176 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4177 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4179 def SULD_1D_ARRAY_I16_ZERO
4180 : NVPTXInst<(outs Int16Regs:$r),
4181 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4182 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4184 def SULD_1D_ARRAY_I32_ZERO
4185 : NVPTXInst<(outs Int32Regs:$r),
4186 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4187 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4189 def SULD_1D_ARRAY_I64_ZERO
4190 : NVPTXInst<(outs Int64Regs:$r),
4191 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4192 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4196 : NVPTXInst<(outs Int16Regs:$r),
4197 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4198 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4200 def SULD_2D_I16_ZERO
4201 : NVPTXInst<(outs Int16Regs:$r),
4202 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4203 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4205 def SULD_2D_I32_ZERO
4206 : NVPTXInst<(outs Int32Regs:$r),
4207 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4208 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4210 def SULD_2D_I64_ZERO
4211 : NVPTXInst<(outs Int64Regs:$r),
4212 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4213 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4216 def SULD_2D_ARRAY_I8_ZERO
4217 : NVPTXInst<(outs Int16Regs:$r),
4218 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4219 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4221 def SULD_2D_ARRAY_I16_ZERO
4222 : NVPTXInst<(outs Int16Regs:$r),
4223 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4224 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4226 def SULD_2D_ARRAY_I32_ZERO
4227 : NVPTXInst<(outs Int32Regs:$r),
4228 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4229 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4231 def SULD_2D_ARRAY_I64_ZERO
4232 : NVPTXInst<(outs Int64Regs:$r),
4233 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4234 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4238 : NVPTXInst<(outs Int16Regs:$r),
4239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4240 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4242 def SULD_3D_I16_ZERO
4243 : NVPTXInst<(outs Int16Regs:$r),
4244 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4245 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4247 def SULD_3D_I32_ZERO
4248 : NVPTXInst<(outs Int32Regs:$r),
4249 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4250 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4252 def SULD_3D_I64_ZERO
4253 : NVPTXInst<(outs Int64Regs:$r),
4254 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4255 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4260 def SULD_1D_V2I8_ZERO
4261 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4262 (ins Int64Regs:$s, Int32Regs:$x),
4263 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4265 def SULD_1D_V2I16_ZERO
4266 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4267 (ins Int64Regs:$s, Int32Regs:$x),
4268 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4270 def SULD_1D_V2I32_ZERO
4271 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4272 (ins Int64Regs:$s, Int32Regs:$x),
4273 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4275 def SULD_1D_V2I64_ZERO
4276 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4277 (ins Int64Regs:$s, Int32Regs:$x),
4278 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4281 def SULD_1D_ARRAY_V2I8_ZERO
4282 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4283 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4284 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4286 def SULD_1D_ARRAY_V2I16_ZERO
4287 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4288 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4289 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4291 def SULD_1D_ARRAY_V2I32_ZERO
4292 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4293 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4294 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4296 def SULD_1D_ARRAY_V2I64_ZERO
4297 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4298 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4299 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4302 def SULD_2D_V2I8_ZERO
4303 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4304 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4305 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4307 def SULD_2D_V2I16_ZERO
4308 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4309 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4310 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4312 def SULD_2D_V2I32_ZERO
4313 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4314 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4315 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4317 def SULD_2D_V2I64_ZERO
4318 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4319 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4320 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4323 def SULD_2D_ARRAY_V2I8_ZERO
4324 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4325 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4326 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4327 "[$s, \\{$l, $x, $y, $y\\}];",
4329 def SULD_2D_ARRAY_V2I16_ZERO
4330 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4331 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4332 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4333 "[$s, \\{$l, $x, $y, $y\\}];",
4335 def SULD_2D_ARRAY_V2I32_ZERO
4336 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4337 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4338 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4339 "[$s, \\{$l, $x, $y, $y\\}];",
4341 def SULD_2D_ARRAY_V2I64_ZERO
4342 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4343 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4344 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4345 "[$s, \\{$l, $x, $y, $y\\}];",
4348 def SULD_3D_V2I8_ZERO
4349 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4350 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4351 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4353 def SULD_3D_V2I16_ZERO
4354 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4355 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4356 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4358 def SULD_3D_V2I32_ZERO
4359 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4360 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4361 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4363 def SULD_3D_V2I64_ZERO
4364 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4365 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4366 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4371 def SULD_1D_V4I8_ZERO
4372 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4373 (ins Int64Regs:$s, Int32Regs:$x),
4374 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4376 def SULD_1D_V4I16_ZERO
4377 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4378 (ins Int64Regs:$s, Int32Regs:$x),
4379 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4381 def SULD_1D_V4I32_ZERO
4382 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4383 (ins Int64Regs:$s, Int32Regs:$x),
4384 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4387 def SULD_1D_ARRAY_V4I8_ZERO
4388 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4389 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4390 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4391 "[$s, \\{$l, $x\\}];",
4393 def SULD_1D_ARRAY_V4I16_ZERO
4394 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4395 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4396 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4397 "[$s, \\{$l, $x\\}];",
4399 def SULD_1D_ARRAY_V4I32_ZERO
4400 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4401 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4402 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4403 "[$s, \\{$l, $x\\}];",
4406 def SULD_2D_V4I8_ZERO
4407 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4408 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4409 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4411 def SULD_2D_V4I16_ZERO
4412 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4413 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4414 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4416 def SULD_2D_V4I32_ZERO
4417 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4418 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4419 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4422 def SULD_2D_ARRAY_V4I8_ZERO
4423 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4424 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4425 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4426 "[$s, \\{$l, $x, $y, $y\\}];",
4428 def SULD_2D_ARRAY_V4I16_ZERO
4429 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4430 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4431 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4432 "[$s, \\{$l, $x, $y, $y\\}];",
4434 def SULD_2D_ARRAY_V4I32_ZERO
4435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4436 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4437 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4438 "[$s, \\{$l, $x, $y, $y\\}];",
4442 def SULD_3D_V4I8_ZERO
4443 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4446 "[$s, \\{$x, $y, $z, $z\\}];",
4448 def SULD_3D_V4I16_ZERO
4449 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4450 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4451 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4452 "[$s, \\{$x, $y, $z, $z\\}];",
4454 def SULD_3D_V4I32_ZERO
4455 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4456 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4457 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4458 "[$s, \\{$x, $y, $z, $z\\}];",
4462 //-----------------------------------
4463 // Texture Query Intrinsics
4464 //-----------------------------------
4466 let IsSurfTexQuery = 1 in {
4467 def TXQ_CHANNEL_ORDER
4468 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4469 "txq.channel_order.b32 \t$d, [$a];",
4471 def TXQ_CHANNEL_DATA_TYPE
4472 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4473 "txq.channel_data_type.b32 \t$d, [$a];",
4476 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4477 "txq.width.b32 \t$d, [$a];",
4480 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4481 "txq.height.b32 \t$d, [$a];",
4484 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4485 "txq.depth.b32 \t$d, [$a];",
4488 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4489 "txq.array_size.b32 \t$d, [$a];",
4492 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4493 "txq.num_samples.b32 \t$d, [$a];",
4495 def TXQ_NUM_MIPMAP_LEVELS
4496 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4497 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4501 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4502 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4503 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4504 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4505 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4506 (TXQ_WIDTH Int64Regs:$a)>;
4507 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4508 (TXQ_HEIGHT Int64Regs:$a)>;
4509 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4510 (TXQ_DEPTH Int64Regs:$a)>;
4511 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4512 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4513 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4514 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4515 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4516 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4519 //-----------------------------------
4520 // Surface Query Intrinsics
4521 //-----------------------------------
4523 let IsSurfTexQuery = 1 in {
4524 def SUQ_CHANNEL_ORDER
4525 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4526 "suq.channel_order.b32 \t$d, [$a];",
4528 def SUQ_CHANNEL_DATA_TYPE
4529 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4530 "suq.channel_data_type.b32 \t$d, [$a];",
4533 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4534 "suq.width.b32 \t$d, [$a];",
4537 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4538 "suq.height.b32 \t$d, [$a];",
4541 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4542 "suq.depth.b32 \t$d, [$a];",
4545 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4546 "suq.array_size.b32 \t$d, [$a];",
4550 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4551 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4552 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4553 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4554 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4555 (SUQ_WIDTH Int64Regs:$a)>;
4556 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4557 (SUQ_HEIGHT Int64Regs:$a)>;
4558 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4559 (SUQ_DEPTH Int64Regs:$a)>;
4560 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4561 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4564 //===- Handle Query -------------------------------------------------------===//
4566 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4568 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4569 "istypep.samplerref \t$d, $a;",
4570 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4572 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4573 "istypep.surfref \t$d, $a;",
4574 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4576 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4577 "istypep.texref \t$d, $a;",
4578 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4580 //===- Surface Stores -----------------------------------------------------===//
4585 def SUST_B_1D_B8_CLAMP
4587 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4588 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4590 def SUST_B_1D_B16_CLAMP
4592 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4593 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4595 def SUST_B_1D_B32_CLAMP
4597 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4598 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4600 def SUST_B_1D_B64_CLAMP
4602 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4603 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4605 def SUST_B_1D_V2B8_CLAMP
4607 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4608 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4610 def SUST_B_1D_V2B16_CLAMP
4612 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4613 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4615 def SUST_B_1D_V2B32_CLAMP
4617 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4618 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4620 def SUST_B_1D_V2B64_CLAMP
4622 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4623 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4625 def SUST_B_1D_V4B8_CLAMP
4627 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4628 Int16Regs:$b, Int16Regs:$a),
4629 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4631 def SUST_B_1D_V4B16_CLAMP
4633 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4634 Int16Regs:$b, Int16Regs:$a),
4635 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4637 def SUST_B_1D_V4B32_CLAMP
4639 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4640 Int32Regs:$b, Int32Regs:$a),
4641 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4645 def SUST_B_1D_ARRAY_B8_CLAMP
4647 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4648 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4650 def SUST_B_1D_ARRAY_B16_CLAMP
4652 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4653 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4655 def SUST_B_1D_ARRAY_B32_CLAMP
4657 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4658 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4660 def SUST_B_1D_ARRAY_B64_CLAMP
4662 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4663 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4665 def SUST_B_1D_ARRAY_V2B8_CLAMP
4667 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4669 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4671 def SUST_B_1D_ARRAY_V2B16_CLAMP
4673 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4675 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4677 def SUST_B_1D_ARRAY_V2B32_CLAMP
4679 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4681 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4683 def SUST_B_1D_ARRAY_V2B64_CLAMP
4685 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4687 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4689 def SUST_B_1D_ARRAY_V4B8_CLAMP
4691 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4692 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4693 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4694 "\\{$r, $g, $b, $a\\};",
4696 def SUST_B_1D_ARRAY_V4B16_CLAMP
4698 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4699 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4700 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4701 "\\{$r, $g, $b, $a\\};",
4703 def SUST_B_1D_ARRAY_V4B32_CLAMP
4705 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4706 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4707 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4708 "\\{$r, $g, $b, $a\\};",
4712 def SUST_B_2D_B8_CLAMP
4714 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4715 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4717 def SUST_B_2D_B16_CLAMP
4719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4720 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4722 def SUST_B_2D_B32_CLAMP
4724 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4725 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4727 def SUST_B_2D_B64_CLAMP
4729 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4730 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4732 def SUST_B_2D_V2B8_CLAMP
4734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4736 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4738 def SUST_B_2D_V2B16_CLAMP
4740 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4742 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4744 def SUST_B_2D_V2B32_CLAMP
4746 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4748 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4750 def SUST_B_2D_V2B64_CLAMP
4752 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4754 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4756 def SUST_B_2D_V4B8_CLAMP
4758 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4759 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4760 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4761 "\\{$r, $g, $b, $a\\};",
4763 def SUST_B_2D_V4B16_CLAMP
4765 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4766 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4767 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4768 "\\{$r, $g, $b, $a\\};",
4770 def SUST_B_2D_V4B32_CLAMP
4772 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4773 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4774 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4775 "\\{$r, $g, $b, $a\\};",
4779 def SUST_B_2D_ARRAY_B8_CLAMP
4781 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4783 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4785 def SUST_B_2D_ARRAY_B16_CLAMP
4787 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4789 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4791 def SUST_B_2D_ARRAY_B32_CLAMP
4793 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4795 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4797 def SUST_B_2D_ARRAY_B64_CLAMP
4799 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4801 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4803 def SUST_B_2D_ARRAY_V2B8_CLAMP
4805 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4806 Int16Regs:$r, Int16Regs:$g),
4807 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4810 def SUST_B_2D_ARRAY_V2B16_CLAMP
4812 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4813 Int16Regs:$r, Int16Regs:$g),
4814 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4817 def SUST_B_2D_ARRAY_V2B32_CLAMP
4819 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4820 Int32Regs:$r, Int32Regs:$g),
4821 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4824 def SUST_B_2D_ARRAY_V2B64_CLAMP
4826 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4827 Int64Regs:$r, Int64Regs:$g),
4828 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4831 def SUST_B_2D_ARRAY_V4B8_CLAMP
4833 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4834 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4835 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4836 "\\{$r, $g, $b, $a\\};",
4838 def SUST_B_2D_ARRAY_V4B16_CLAMP
4840 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4841 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4842 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4843 "\\{$r, $g, $b, $a\\};",
4845 def SUST_B_2D_ARRAY_V4B32_CLAMP
4847 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4848 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4849 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4850 "\\{$r, $g, $b, $a\\};",
4854 def SUST_B_3D_B8_CLAMP
4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4858 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4860 def SUST_B_3D_B16_CLAMP
4862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4864 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4866 def SUST_B_3D_B32_CLAMP
4868 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4870 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4872 def SUST_B_3D_B64_CLAMP
4874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4876 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4878 def SUST_B_3D_V2B8_CLAMP
4880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4881 Int16Regs:$r, Int16Regs:$g),
4882 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4885 def SUST_B_3D_V2B16_CLAMP
4887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4888 Int16Regs:$r, Int16Regs:$g),
4889 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4892 def SUST_B_3D_V2B32_CLAMP
4894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4895 Int32Regs:$r, Int32Regs:$g),
4896 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4899 def SUST_B_3D_V2B64_CLAMP
4901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4902 Int64Regs:$r, Int64Regs:$g),
4903 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4906 def SUST_B_3D_V4B8_CLAMP
4908 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4909 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4910 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4911 "\\{$r, $g, $b, $a\\};",
4913 def SUST_B_3D_V4B16_CLAMP
4915 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4916 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4917 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4918 "\\{$r, $g, $b, $a\\};",
4920 def SUST_B_3D_V4B32_CLAMP
4922 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4923 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4924 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4925 "\\{$r, $g, $b, $a\\};",
4930 def SUST_B_1D_B8_TRAP
4932 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4933 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4935 def SUST_B_1D_B16_TRAP
4937 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4938 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4940 def SUST_B_1D_B32_TRAP
4942 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4943 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4945 def SUST_B_1D_B64_TRAP
4947 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4948 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4950 def SUST_B_1D_V2B8_TRAP
4952 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4953 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4955 def SUST_B_1D_V2B16_TRAP
4957 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4958 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4960 def SUST_B_1D_V2B32_TRAP
4962 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4963 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4965 def SUST_B_1D_V2B64_TRAP
4967 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4968 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4970 def SUST_B_1D_V4B8_TRAP
4972 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4973 Int16Regs:$b, Int16Regs:$a),
4974 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4976 def SUST_B_1D_V4B16_TRAP
4978 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4979 Int16Regs:$b, Int16Regs:$a),
4980 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4982 def SUST_B_1D_V4B32_TRAP
4984 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4985 Int32Regs:$b, Int32Regs:$a),
4986 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4990 def SUST_B_1D_ARRAY_B8_TRAP
4992 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4993 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4995 def SUST_B_1D_ARRAY_B16_TRAP
4997 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4998 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5000 def SUST_B_1D_ARRAY_B32_TRAP
5002 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5003 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5005 def SUST_B_1D_ARRAY_B64_TRAP
5007 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5008 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5010 def SUST_B_1D_ARRAY_V2B8_TRAP
5012 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5014 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5016 def SUST_B_1D_ARRAY_V2B16_TRAP
5018 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5020 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5022 def SUST_B_1D_ARRAY_V2B32_TRAP
5024 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5026 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5028 def SUST_B_1D_ARRAY_V2B64_TRAP
5030 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5032 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5034 def SUST_B_1D_ARRAY_V4B8_TRAP
5036 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5037 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5038 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5039 "\\{$r, $g, $b, $a\\};",
5041 def SUST_B_1D_ARRAY_V4B16_TRAP
5043 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5044 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5046 "\\{$r, $g, $b, $a\\};",
5048 def SUST_B_1D_ARRAY_V4B32_TRAP
5050 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5051 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5052 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5053 "\\{$r, $g, $b, $a\\};",
5057 def SUST_B_2D_B8_TRAP
5059 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5060 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5062 def SUST_B_2D_B16_TRAP
5064 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5065 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5067 def SUST_B_2D_B32_TRAP
5069 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5070 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5072 def SUST_B_2D_B64_TRAP
5074 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5075 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5077 def SUST_B_2D_V2B8_TRAP
5079 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5081 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5083 def SUST_B_2D_V2B16_TRAP
5085 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5087 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5089 def SUST_B_2D_V2B32_TRAP
5091 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5093 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5095 def SUST_B_2D_V2B64_TRAP
5097 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5099 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5101 def SUST_B_2D_V4B8_TRAP
5103 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5104 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5106 "\\{$r, $g, $b, $a\\};",
5108 def SUST_B_2D_V4B16_TRAP
5110 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5111 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5112 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5113 "\\{$r, $g, $b, $a\\};",
5115 def SUST_B_2D_V4B32_TRAP
5117 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5118 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5119 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5120 "\\{$r, $g, $b, $a\\};",
5124 def SUST_B_2D_ARRAY_B8_TRAP
5126 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5128 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5130 def SUST_B_2D_ARRAY_B16_TRAP
5132 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5134 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5136 def SUST_B_2D_ARRAY_B32_TRAP
5138 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5140 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5142 def SUST_B_2D_ARRAY_B64_TRAP
5144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5146 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5148 def SUST_B_2D_ARRAY_V2B8_TRAP
5150 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5151 Int16Regs:$r, Int16Regs:$g),
5152 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5155 def SUST_B_2D_ARRAY_V2B16_TRAP
5157 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5158 Int16Regs:$r, Int16Regs:$g),
5159 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5162 def SUST_B_2D_ARRAY_V2B32_TRAP
5164 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5165 Int32Regs:$r, Int32Regs:$g),
5166 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5169 def SUST_B_2D_ARRAY_V2B64_TRAP
5171 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5172 Int64Regs:$r, Int64Regs:$g),
5173 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5176 def SUST_B_2D_ARRAY_V4B8_TRAP
5178 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5179 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5180 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5181 "\\{$r, $g, $b, $a\\};",
5183 def SUST_B_2D_ARRAY_V4B16_TRAP
5185 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5186 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5187 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5188 "\\{$r, $g, $b, $a\\};",
5190 def SUST_B_2D_ARRAY_V4B32_TRAP
5192 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5193 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5194 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5195 "\\{$r, $g, $b, $a\\};",
5199 def SUST_B_3D_B8_TRAP
5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5203 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5205 def SUST_B_3D_B16_TRAP
5207 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5209 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5211 def SUST_B_3D_B32_TRAP
5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5215 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5217 def SUST_B_3D_B64_TRAP
5219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5221 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5223 def SUST_B_3D_V2B8_TRAP
5225 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5226 Int16Regs:$r, Int16Regs:$g),
5227 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5230 def SUST_B_3D_V2B16_TRAP
5232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5233 Int16Regs:$r, Int16Regs:$g),
5234 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5237 def SUST_B_3D_V2B32_TRAP
5239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5240 Int32Regs:$r, Int32Regs:$g),
5241 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5244 def SUST_B_3D_V2B64_TRAP
5246 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5247 Int64Regs:$r, Int64Regs:$g),
5248 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5251 def SUST_B_3D_V4B8_TRAP
5253 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5254 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5255 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5256 "\\{$r, $g, $b, $a\\};",
5258 def SUST_B_3D_V4B16_TRAP
5260 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5261 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5262 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5263 "\\{$r, $g, $b, $a\\};",
5265 def SUST_B_3D_V4B32_TRAP
5267 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5268 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5269 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5270 "\\{$r, $g, $b, $a\\};",
5275 def SUST_B_1D_B8_ZERO
5277 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5278 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5280 def SUST_B_1D_B16_ZERO
5282 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5283 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5285 def SUST_B_1D_B32_ZERO
5287 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5288 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5290 def SUST_B_1D_B64_ZERO
5292 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5293 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5295 def SUST_B_1D_V2B8_ZERO
5297 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5298 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5300 def SUST_B_1D_V2B16_ZERO
5302 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5303 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5305 def SUST_B_1D_V2B32_ZERO
5307 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5308 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5310 def SUST_B_1D_V2B64_ZERO
5312 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5313 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5315 def SUST_B_1D_V4B8_ZERO
5317 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5318 Int16Regs:$b, Int16Regs:$a),
5319 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5321 def SUST_B_1D_V4B16_ZERO
5323 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5324 Int16Regs:$b, Int16Regs:$a),
5325 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5327 def SUST_B_1D_V4B32_ZERO
5329 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5330 Int32Regs:$b, Int32Regs:$a),
5331 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5335 def SUST_B_1D_ARRAY_B8_ZERO
5337 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5338 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5340 def SUST_B_1D_ARRAY_B16_ZERO
5342 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5343 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5345 def SUST_B_1D_ARRAY_B32_ZERO
5347 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5348 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5350 def SUST_B_1D_ARRAY_B64_ZERO
5352 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5353 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5355 def SUST_B_1D_ARRAY_V2B8_ZERO
5357 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5359 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5361 def SUST_B_1D_ARRAY_V2B16_ZERO
5363 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5365 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5367 def SUST_B_1D_ARRAY_V2B32_ZERO
5369 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5371 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5373 def SUST_B_1D_ARRAY_V2B64_ZERO
5375 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5377 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5379 def SUST_B_1D_ARRAY_V4B8_ZERO
5381 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5382 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5383 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5384 "\\{$r, $g, $b, $a\\};",
5386 def SUST_B_1D_ARRAY_V4B16_ZERO
5388 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5389 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5390 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5391 "\\{$r, $g, $b, $a\\};",
5393 def SUST_B_1D_ARRAY_V4B32_ZERO
5395 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5396 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5397 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5398 "\\{$r, $g, $b, $a\\};",
5402 def SUST_B_2D_B8_ZERO
5404 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5405 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5407 def SUST_B_2D_B16_ZERO
5409 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5410 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5412 def SUST_B_2D_B32_ZERO
5414 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5415 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5417 def SUST_B_2D_B64_ZERO
5419 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5420 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5422 def SUST_B_2D_V2B8_ZERO
5424 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5426 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5428 def SUST_B_2D_V2B16_ZERO
5430 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5432 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5434 def SUST_B_2D_V2B32_ZERO
5436 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5438 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5440 def SUST_B_2D_V2B64_ZERO
5442 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5444 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5446 def SUST_B_2D_V4B8_ZERO
5448 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5449 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5450 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5451 "\\{$r, $g, $b, $a\\};",
5453 def SUST_B_2D_V4B16_ZERO
5455 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5456 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5457 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5458 "\\{$r, $g, $b, $a\\};",
5460 def SUST_B_2D_V4B32_ZERO
5462 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5463 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5464 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5465 "\\{$r, $g, $b, $a\\};",
5469 def SUST_B_2D_ARRAY_B8_ZERO
5471 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5473 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5475 def SUST_B_2D_ARRAY_B16_ZERO
5477 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5479 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5481 def SUST_B_2D_ARRAY_B32_ZERO
5483 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5485 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5487 def SUST_B_2D_ARRAY_B64_ZERO
5489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5491 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5493 def SUST_B_2D_ARRAY_V2B8_ZERO
5495 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5496 Int16Regs:$r, Int16Regs:$g),
5497 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5500 def SUST_B_2D_ARRAY_V2B16_ZERO
5502 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5503 Int16Regs:$r, Int16Regs:$g),
5504 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5507 def SUST_B_2D_ARRAY_V2B32_ZERO
5509 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5510 Int32Regs:$r, Int32Regs:$g),
5511 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5514 def SUST_B_2D_ARRAY_V2B64_ZERO
5516 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5517 Int64Regs:$r, Int64Regs:$g),
5518 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5521 def SUST_B_2D_ARRAY_V4B8_ZERO
5523 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5524 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5525 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5526 "\\{$r, $g, $b, $a\\};",
5528 def SUST_B_2D_ARRAY_V4B16_ZERO
5530 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5531 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5532 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5533 "\\{$r, $g, $b, $a\\};",
5535 def SUST_B_2D_ARRAY_V4B32_ZERO
5537 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5538 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5539 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5540 "\\{$r, $g, $b, $a\\};",
5544 def SUST_B_3D_B8_ZERO
5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5548 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5550 def SUST_B_3D_B16_ZERO
5552 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5554 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5556 def SUST_B_3D_B32_ZERO
5558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5560 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5562 def SUST_B_3D_B64_ZERO
5564 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5566 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5568 def SUST_B_3D_V2B8_ZERO
5570 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5571 Int16Regs:$r, Int16Regs:$g),
5572 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5575 def SUST_B_3D_V2B16_ZERO
5577 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5578 Int16Regs:$r, Int16Regs:$g),
5579 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5582 def SUST_B_3D_V2B32_ZERO
5584 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5585 Int32Regs:$r, Int32Regs:$g),
5586 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5589 def SUST_B_3D_V2B64_ZERO
5591 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5592 Int64Regs:$r, Int64Regs:$g),
5593 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5596 def SUST_B_3D_V4B8_ZERO
5598 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5599 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5600 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5601 "\\{$r, $g, $b, $a\\};",
5603 def SUST_B_3D_V4B16_ZERO
5605 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5606 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5607 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5608 "\\{$r, $g, $b, $a\\};",
5610 def SUST_B_3D_V4B32_ZERO
5612 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5613 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5614 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5615 "\\{$r, $g, $b, $a\\};",
5622 def SUST_P_1D_B8_TRAP
5624 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5625 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5627 def SUST_P_1D_B16_TRAP
5629 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5630 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5632 def SUST_P_1D_B32_TRAP
5634 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5635 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5637 def SUST_P_1D_V2B8_TRAP
5639 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5640 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5642 def SUST_P_1D_V2B16_TRAP
5644 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5645 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5647 def SUST_P_1D_V2B32_TRAP
5649 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5650 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5652 def SUST_P_1D_V4B8_TRAP
5654 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5655 Int16Regs:$b, Int16Regs:$a),
5656 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5658 def SUST_P_1D_V4B16_TRAP
5660 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5661 Int16Regs:$b, Int16Regs:$a),
5662 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5664 def SUST_P_1D_V4B32_TRAP
5666 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5667 Int32Regs:$b, Int32Regs:$a),
5668 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5672 def SUST_P_1D_ARRAY_B8_TRAP
5674 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5675 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5677 def SUST_P_1D_ARRAY_B16_TRAP
5679 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5680 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5682 def SUST_P_1D_ARRAY_B32_TRAP
5684 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5685 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5687 def SUST_P_1D_ARRAY_V2B8_TRAP
5689 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5691 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5693 def SUST_P_1D_ARRAY_V2B16_TRAP
5695 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5697 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5699 def SUST_P_1D_ARRAY_V2B32_TRAP
5701 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5703 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5705 def SUST_P_1D_ARRAY_V4B8_TRAP
5707 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5708 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5709 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5710 "\\{$r, $g, $b, $a\\};",
5712 def SUST_P_1D_ARRAY_V4B16_TRAP
5714 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5715 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5716 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5717 "\\{$r, $g, $b, $a\\};",
5719 def SUST_P_1D_ARRAY_V4B32_TRAP
5721 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5722 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5723 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5724 "\\{$r, $g, $b, $a\\};",
5728 def SUST_P_2D_B8_TRAP
5730 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5731 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5733 def SUST_P_2D_B16_TRAP
5735 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5736 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5738 def SUST_P_2D_B32_TRAP
5740 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5741 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5743 def SUST_P_2D_V2B8_TRAP
5745 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5747 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5749 def SUST_P_2D_V2B16_TRAP
5751 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5753 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5755 def SUST_P_2D_V2B32_TRAP
5757 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5759 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5761 def SUST_P_2D_V4B8_TRAP
5763 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5764 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5765 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5766 "\\{$r, $g, $b, $a\\};",
5768 def SUST_P_2D_V4B16_TRAP
5770 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5771 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5772 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5773 "\\{$r, $g, $b, $a\\};",
5775 def SUST_P_2D_V4B32_TRAP
5777 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5778 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5779 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5780 "\\{$r, $g, $b, $a\\};",
5784 def SUST_P_2D_ARRAY_B8_TRAP
5786 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5788 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5790 def SUST_P_2D_ARRAY_B16_TRAP
5792 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5794 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5796 def SUST_P_2D_ARRAY_B32_TRAP
5798 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5800 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5802 def SUST_P_2D_ARRAY_V2B8_TRAP
5804 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5805 Int16Regs:$r, Int16Regs:$g),
5806 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5809 def SUST_P_2D_ARRAY_V2B16_TRAP
5811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5812 Int16Regs:$r, Int16Regs:$g),
5813 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5816 def SUST_P_2D_ARRAY_V2B32_TRAP
5818 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5819 Int32Regs:$r, Int32Regs:$g),
5820 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5823 def SUST_P_2D_ARRAY_V4B8_TRAP
5825 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5826 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5827 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5828 "\\{$r, $g, $b, $a\\};",
5830 def SUST_P_2D_ARRAY_V4B16_TRAP
5832 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5833 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5834 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5835 "\\{$r, $g, $b, $a\\};",
5837 def SUST_P_2D_ARRAY_V4B32_TRAP
5839 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5840 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5841 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5842 "\\{$r, $g, $b, $a\\};",
5846 def SUST_P_3D_B8_TRAP
5848 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5850 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5852 def SUST_P_3D_B16_TRAP
5854 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5856 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5858 def SUST_P_3D_B32_TRAP
5860 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5862 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5864 def SUST_P_3D_V2B8_TRAP
5866 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5867 Int16Regs:$r, Int16Regs:$g),
5868 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5871 def SUST_P_3D_V2B16_TRAP
5873 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5874 Int16Regs:$r, Int16Regs:$g),
5875 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5878 def SUST_P_3D_V2B32_TRAP
5880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5881 Int32Regs:$r, Int32Regs:$g),
5882 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5885 def SUST_P_3D_V4B8_TRAP
5887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5888 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5889 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5890 "\\{$r, $g, $b, $a\\};",
5892 def SUST_P_3D_V4B16_TRAP
5894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5895 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5896 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5897 "\\{$r, $g, $b, $a\\};",
5899 def SUST_P_3D_V4B32_TRAP
5901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5902 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5903 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5904 "\\{$r, $g, $b, $a\\};",
5908 // Surface store instruction patterns
5909 // I'm not sure why we can't just include these in the instruction definitions,
5910 // but TableGen complains of type errors :(
5913 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5914 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5915 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5917 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5918 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5919 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5921 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5922 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5923 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5925 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5926 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5927 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5929 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5930 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5931 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5932 Int16Regs:$r, Int16Regs:$g)>;
5934 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5935 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5936 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5937 Int16Regs:$r, Int16Regs:$g)>;
5939 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5940 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5941 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5942 Int32Regs:$r, Int32Regs:$g)>;
5944 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5945 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5946 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5947 Int64Regs:$r, Int64Regs:$g)>;
5949 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5950 Int64Regs:$s, Int32Regs:$x,
5951 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5952 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5953 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5955 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5956 Int64Regs:$s, Int32Regs:$x,
5957 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5958 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5959 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5961 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5962 Int64Regs:$s, Int32Regs:$x,
5963 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5964 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5965 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5969 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5970 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5971 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5974 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5975 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5976 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5979 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5980 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5981 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5984 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5985 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5986 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5989 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5990 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5991 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5992 Int16Regs:$r, Int16Regs:$g)>;
5994 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5995 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5996 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5997 Int16Regs:$r, Int16Regs:$g)>;
5999 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6000 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6001 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6002 Int32Regs:$r, Int32Regs:$g)>;
6004 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6005 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6006 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6007 Int64Regs:$r, Int64Regs:$g)>;
6009 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6010 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6011 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6012 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6015 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6016 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6017 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6018 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6019 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6021 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6022 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6023 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6024 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6025 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6029 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6030 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6031 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6034 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6035 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6036 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6039 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6040 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6041 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6044 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6045 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6046 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6049 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6050 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6051 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6052 Int16Regs:$r, Int16Regs:$g)>;
6054 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6055 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6056 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6057 Int16Regs:$r, Int16Regs:$g)>;
6059 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6060 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6061 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062 Int32Regs:$r, Int32Regs:$g)>;
6064 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6065 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6066 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6067 Int64Regs:$r, Int64Regs:$g)>;
6069 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6070 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6071 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6072 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6073 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6075 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6076 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6077 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6078 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6079 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6081 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6082 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6083 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6084 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6089 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6090 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6091 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6092 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6095 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6096 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6097 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6098 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6101 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6103 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6104 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6107 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6109 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6110 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6113 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6114 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6115 Int16Regs:$r, Int16Regs:$g),
6116 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6117 Int32Regs:$x, Int32Regs:$y,
6118 Int16Regs:$r, Int16Regs:$g)>;
6120 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6121 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6122 Int16Regs:$r, Int16Regs:$g),
6123 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6124 Int32Regs:$x, Int32Regs:$y,
6125 Int16Regs:$r, Int16Regs:$g)>;
6127 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6128 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6130 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6131 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6133 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6134 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6136 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6137 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6139 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6140 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6141 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6142 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6143 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6144 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6146 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6148 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6149 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6150 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6151 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6153 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6154 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6155 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6156 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6157 Int32Regs:$x, Int32Regs:$y,
6158 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6162 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6163 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6165 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6166 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6169 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6170 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6172 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6173 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6176 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6179 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6180 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6183 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6184 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6186 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6187 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6190 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6191 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6192 Int16Regs:$r, Int16Regs:$g),
6193 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6194 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6195 Int16Regs:$r, Int16Regs:$g)>;
6197 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6198 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6199 Int16Regs:$r, Int16Regs:$g),
6200 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6201 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6202 Int16Regs:$r, Int16Regs:$g)>;
6204 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6205 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6206 Int32Regs:$r, Int32Regs:$g),
6207 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6208 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6209 Int32Regs:$r, Int32Regs:$g)>;
6211 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6213 Int64Regs:$r, Int64Regs:$g),
6214 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6215 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6216 Int64Regs:$r, Int64Regs:$g)>;
6218 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6220 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6221 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6222 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6223 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6225 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6227 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6228 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6229 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6230 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6232 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6233 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6234 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6235 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6236 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6237 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6241 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6242 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6243 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6245 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6246 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6247 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6249 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6250 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6251 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6253 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6254 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6255 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6257 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6258 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6259 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6260 Int16Regs:$r, Int16Regs:$g)>;
6262 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6263 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6264 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6265 Int16Regs:$r, Int16Regs:$g)>;
6267 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6268 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6269 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6270 Int32Regs:$r, Int32Regs:$g)>;
6272 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6273 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6274 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6275 Int64Regs:$r, Int64Regs:$g)>;
6277 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6278 Int64Regs:$s, Int32Regs:$x,
6279 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6280 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6281 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6283 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6284 Int64Regs:$s, Int32Regs:$x,
6285 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6286 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6287 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6289 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6290 Int64Regs:$s, Int32Regs:$x,
6291 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6292 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6293 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6297 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6298 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6299 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6302 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6303 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6304 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6307 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6308 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6309 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6312 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6313 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6314 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6317 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6318 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6319 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6320 Int16Regs:$r, Int16Regs:$g)>;
6322 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6323 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6324 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6325 Int16Regs:$r, Int16Regs:$g)>;
6327 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6328 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6329 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6330 Int32Regs:$r, Int32Regs:$g)>;
6332 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6333 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6334 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6335 Int64Regs:$r, Int64Regs:$g)>;
6337 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6338 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6339 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6340 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6341 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6343 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6344 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6345 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6346 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6347 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6349 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6350 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6351 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6352 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6353 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6357 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6358 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6359 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6362 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6363 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6364 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6367 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6368 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6369 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6372 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6373 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6374 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6377 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6378 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6379 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6380 Int16Regs:$r, Int16Regs:$g)>;
6382 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6383 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6384 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6385 Int16Regs:$r, Int16Regs:$g)>;
6387 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6388 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6389 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6390 Int32Regs:$r, Int32Regs:$g)>;
6392 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6393 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6394 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6395 Int64Regs:$r, Int64Regs:$g)>;
6397 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6398 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6399 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6400 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6403 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6404 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6405 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6406 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6407 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6409 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6410 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6411 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6412 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6413 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6417 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6418 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6419 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6420 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6423 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6424 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6425 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6426 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6429 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6430 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6431 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6432 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6435 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6436 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6437 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6438 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6441 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6442 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6443 Int16Regs:$r, Int16Regs:$g),
6444 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6445 Int32Regs:$x, Int32Regs:$y,
6446 Int16Regs:$r, Int16Regs:$g)>;
6448 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6449 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6450 Int16Regs:$r, Int16Regs:$g),
6451 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6452 Int32Regs:$x, Int32Regs:$y,
6453 Int16Regs:$r, Int16Regs:$g)>;
6455 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6456 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6458 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6459 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6461 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6462 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6464 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6465 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6467 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6468 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6469 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6470 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6471 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6472 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6474 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6475 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6476 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6477 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6478 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6479 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6481 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6482 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6483 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6484 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6485 Int32Regs:$x, Int32Regs:$y,
6486 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6490 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6491 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6493 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6494 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6497 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6498 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6500 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6501 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6504 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6507 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6508 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6511 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6512 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6514 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6515 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6518 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6519 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6520 Int16Regs:$r, Int16Regs:$g),
6521 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6522 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6523 Int16Regs:$r, Int16Regs:$g)>;
6525 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6526 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6527 Int16Regs:$r, Int16Regs:$g),
6528 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6529 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6530 Int16Regs:$r, Int16Regs:$g)>;
6532 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6533 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6534 Int32Regs:$r, Int32Regs:$g),
6535 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6536 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6537 Int32Regs:$r, Int32Regs:$g)>;
6539 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6540 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6541 Int64Regs:$r, Int64Regs:$g),
6542 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6543 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6544 Int64Regs:$r, Int64Regs:$g)>;
6546 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6547 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6548 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6549 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6550 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6551 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6553 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6554 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6555 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6556 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6557 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6558 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6560 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6561 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6562 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6563 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6564 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6565 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6569 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6570 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6571 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6573 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6574 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6575 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6577 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6578 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6579 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6581 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6582 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6583 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6585 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6586 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6587 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6588 Int16Regs:$r, Int16Regs:$g)>;
6590 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6591 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6592 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6593 Int16Regs:$r, Int16Regs:$g)>;
6595 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6596 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6597 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6598 Int32Regs:$r, Int32Regs:$g)>;
6600 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6601 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6602 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6603 Int64Regs:$r, Int64Regs:$g)>;
6605 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6606 Int64Regs:$s, Int32Regs:$x,
6607 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6608 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6609 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6611 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6612 Int64Regs:$s, Int32Regs:$x,
6613 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6614 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6615 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6617 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6618 Int64Regs:$s, Int32Regs:$x,
6619 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6620 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6621 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6625 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6626 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6627 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6630 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6631 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6632 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6635 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6636 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6637 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6640 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6641 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6642 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6645 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6646 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6647 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6648 Int16Regs:$r, Int16Regs:$g)>;
6650 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6651 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6652 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6653 Int16Regs:$r, Int16Regs:$g)>;
6655 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6656 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6657 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6658 Int32Regs:$r, Int32Regs:$g)>;
6660 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6661 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6662 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6663 Int64Regs:$r, Int64Regs:$g)>;
6665 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6666 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6667 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6668 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6669 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6671 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6672 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6673 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6674 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6675 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6677 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6678 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6679 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6680 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6681 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6685 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6686 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6687 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6690 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6691 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6692 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6695 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6696 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6697 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6700 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6701 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6702 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6705 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6706 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6707 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6708 Int16Regs:$r, Int16Regs:$g)>;
6710 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6711 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6712 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6713 Int16Regs:$r, Int16Regs:$g)>;
6715 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6716 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6717 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6718 Int32Regs:$r, Int32Regs:$g)>;
6720 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6721 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6722 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6723 Int64Regs:$r, Int64Regs:$g)>;
6725 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6726 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6727 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6728 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6731 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6732 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6733 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6734 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6737 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6738 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6739 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6740 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6741 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6745 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6746 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6747 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6748 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6751 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6752 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6753 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6754 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6757 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6758 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6759 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6760 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6763 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6764 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6765 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6766 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6769 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6770 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6771 Int16Regs:$r, Int16Regs:$g),
6772 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6773 Int32Regs:$x, Int32Regs:$y,
6774 Int16Regs:$r, Int16Regs:$g)>;
6776 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6777 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6778 Int16Regs:$r, Int16Regs:$g),
6779 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6780 Int32Regs:$x, Int32Regs:$y,
6781 Int16Regs:$r, Int16Regs:$g)>;
6783 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6784 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6786 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6787 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6789 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6790 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6792 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6793 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6795 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6796 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6797 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6798 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6799 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6800 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6802 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6803 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6804 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6805 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6806 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6807 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6809 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6810 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6811 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6812 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6813 Int32Regs:$x, Int32Regs:$y,
6814 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6818 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6819 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6821 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6822 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6825 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6826 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6828 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6829 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6832 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6833 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6835 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6836 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6839 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6840 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6842 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6843 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6846 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6847 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6848 Int16Regs:$r, Int16Regs:$g),
6849 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6850 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6851 Int16Regs:$r, Int16Regs:$g)>;
6853 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6854 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6855 Int16Regs:$r, Int16Regs:$g),
6856 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6857 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6858 Int16Regs:$r, Int16Regs:$g)>;
6860 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6861 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6862 Int32Regs:$r, Int32Regs:$g),
6863 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6864 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6865 Int32Regs:$r, Int32Regs:$g)>;
6867 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6868 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6869 Int64Regs:$r, Int64Regs:$g),
6870 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6871 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6872 Int64Regs:$r, Int64Regs:$g)>;
6874 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6875 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6876 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6877 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6878 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6879 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6881 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6882 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6883 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6884 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6885 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6886 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6888 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6889 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6890 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6891 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6892 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6893 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6898 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6899 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6900 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6902 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6903 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6904 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6906 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6907 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6908 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6910 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6911 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6912 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6913 Int16Regs:$r, Int16Regs:$g)>;
6915 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6916 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6917 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6918 Int16Regs:$r, Int16Regs:$g)>;
6920 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6921 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6922 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6923 Int32Regs:$r, Int32Regs:$g)>;
6925 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6926 Int64Regs:$s, Int32Regs:$x,
6927 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6928 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6929 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6931 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6932 Int64Regs:$s, Int32Regs:$x,
6933 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6934 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6935 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6937 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6938 Int64Regs:$s, Int32Regs:$x,
6939 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6940 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6941 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6945 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6946 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6947 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6950 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6951 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6952 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6955 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6956 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6957 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6960 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6961 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6962 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6963 Int16Regs:$r, Int16Regs:$g)>;
6965 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6966 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6967 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6968 Int16Regs:$r, Int16Regs:$g)>;
6970 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6971 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6972 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6973 Int32Regs:$r, Int32Regs:$g)>;
6975 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6976 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6977 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6978 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6979 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6981 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6982 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6983 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6984 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6985 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6987 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6988 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6989 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6990 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6991 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6995 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6996 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6997 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7000 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7001 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7002 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7005 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7007 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7010 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7012 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7013 Int16Regs:$r, Int16Regs:$g)>;
7015 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7016 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7017 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7018 Int16Regs:$r, Int16Regs:$g)>;
7020 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7021 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7022 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7023 Int32Regs:$r, Int32Regs:$g)>;
7025 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7026 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7027 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7028 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7029 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7031 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7032 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7033 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7034 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7035 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7037 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7038 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7039 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7040 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7041 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7045 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7046 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7047 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7048 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7051 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7052 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7053 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7054 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7057 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7058 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7059 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7060 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7063 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7064 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7065 Int16Regs:$r, Int16Regs:$g),
7066 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7067 Int32Regs:$x, Int32Regs:$y,
7068 Int16Regs:$r, Int16Regs:$g)>;
7070 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7071 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7072 Int16Regs:$r, Int16Regs:$g),
7073 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7074 Int32Regs:$x, Int32Regs:$y,
7075 Int16Regs:$r, Int16Regs:$g)>;
7077 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7078 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7080 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7081 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7083 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7084 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7085 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7086 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7087 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7088 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7090 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7092 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7093 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7094 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7095 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7097 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7099 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7100 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7101 Int32Regs:$x, Int32Regs:$y,
7102 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7106 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7107 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7109 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7110 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7113 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7114 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7116 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7117 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7120 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7121 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7123 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7124 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7127 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7128 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7129 Int16Regs:$r, Int16Regs:$g),
7130 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7131 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7132 Int16Regs:$r, Int16Regs:$g)>;
7134 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7135 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7136 Int16Regs:$r, Int16Regs:$g),
7137 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7138 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7139 Int16Regs:$r, Int16Regs:$g)>;
7141 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7142 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7143 Int32Regs:$r, Int32Regs:$g),
7144 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7145 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7146 Int32Regs:$r, Int32Regs:$g)>;
7148 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7149 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7150 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7151 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7152 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7153 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7155 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7156 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7157 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7158 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7159 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7160 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7162 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7163 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7164 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7165 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7166 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7167 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7169 //-----------------------------------
7170 // Read Special Registers
7171 //-----------------------------------
7173 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7174 : NVPTXInst<(outs Int64Regs:$d), (ins),
7175 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
7176 [(set Int64Regs:$d, (intop))]>;
7178 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7179 : NVPTXInst<(outs Int32Regs:$d), (ins),
7180 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
7181 [(set Int32Regs:$d, (intop))]>;
7183 // TODO Add read vector-version of special registers
7185 def INT_PTX_SREG_TID_X :
7186 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7187 def INT_PTX_SREG_TID_Y :
7188 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7189 def INT_PTX_SREG_TID_Z :
7190 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7191 def INT_PTX_SREG_TID_W :
7192 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7194 def INT_PTX_SREG_NTID_X :
7195 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7196 def INT_PTX_SREG_NTID_Y :
7197 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7198 def INT_PTX_SREG_NTID_Z :
7199 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7200 def INT_PTX_SREG_NTID_W :
7201 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7203 def INT_PTX_SREG_LANEID :
7204 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7205 def INT_PTX_SREG_WARPID :
7206 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7207 def INT_PTX_SREG_NWARPID :
7208 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7210 def INT_PTX_SREG_CTAID_X :
7211 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7212 def INT_PTX_SREG_CTAID_Y :
7213 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7214 def INT_PTX_SREG_CTAID_Z :
7215 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7216 def INT_PTX_SREG_CTAID_W :
7217 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7219 def INT_PTX_SREG_NCTAID_X :
7220 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7221 def INT_PTX_SREG_NCTAID_Y :
7222 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7223 def INT_PTX_SREG_NCTAID_Z :
7224 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7225 def INT_PTX_SREG_NCTAID_W :
7226 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7228 def INT_PTX_SREG_SMID :
7229 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7230 def INT_PTX_SREG_NSMID :
7231 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7232 def INT_PTX_SREG_GRIDID :
7233 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7235 def INT_PTX_SREG_LANEMASK_EQ :
7236 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7237 def INT_PTX_SREG_LANEMASK_LE :
7238 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7239 def INT_PTX_SREG_LANEMASK_LT :
7240 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7241 def INT_PTX_SREG_LANEMASK_GE :
7242 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7243 def INT_PTX_SREG_LANEMASK_GT :
7244 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7246 def INT_PTX_SREG_CLOCK :
7247 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7248 def INT_PTX_SREG_CLOCK64 :
7249 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7251 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7252 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7253 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7254 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7256 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7257 // handle the constant.
7258 def INT_PTX_SREG_WARPSIZE :
7259 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7260 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;