1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 def immFloat0 : PatLeaf<(fpimm), [{
11 float f = (float)N->getValueAPF().convertToFloat();
15 def immFloat1 : PatLeaf<(fpimm), [{
16 float f = (float)N->getValueAPF().convertToFloat();
20 def immDouble0 : PatLeaf<(fpimm), [{
21 double d = (double)N->getValueAPF().convertToDouble();
25 def immDouble1 : PatLeaf<(fpimm), [{
26 double d = (double)N->getValueAPF().convertToDouble();
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
38 [(int_nvvm_barrier0)]>;
39 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
41 !strconcat(".reg .pred \t%p1; \n\t",
42 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
43 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
44 !strconcat("}}", ""))))),
45 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
46 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
48 !strconcat(".reg .pred \t%p1; \n\t",
49 !strconcat(".reg .pred \t%p2; \n\t",
50 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
51 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
52 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
53 !strconcat("}}", ""))))))),
54 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
55 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
57 !strconcat(".reg .pred \t%p1; \n\t",
58 !strconcat(".reg .pred \t%p2; \n\t",
59 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
60 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
61 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
62 !strconcat("}}", ""))))))),
63 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
65 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
66 [(int_nvvm_bar_sync imm:$i)]>;
68 // shfl.{up,down,bfly,idx}.b32
69 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
70 // The last two parameters to shfl can be regs or imms. ptxas is smart
71 // enough to inline constant registers, so strictly speaking we don't need to
72 // handle immediates here. But it's easy enough, and it makes our ptx more
76 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
77 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
78 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
82 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
83 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
84 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
88 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
89 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
90 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
94 (ins regclass:$src, i32imm:$offset, i32imm:$mask),
95 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
96 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
99 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
100 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
101 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
102 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
103 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
104 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
105 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
106 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
108 } // isConvergent = 1
111 //-----------------------------------
112 // Explicit Memory Fence Functions
113 //-----------------------------------
114 class MEMBAR<string StrOp, Intrinsic IntOP> :
115 NVPTXInst<(outs), (ins),
118 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
119 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
120 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
123 //-----------------------------------
125 //-----------------------------------
127 // Map min(1.0, max(0.0, x)) to sat(x)
128 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
130 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
131 // Same story for fmax, fmin.
133 def : Pat<(int_nvvm_fmin_f immFloat1,
134 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
135 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
136 def : Pat<(int_nvvm_fmin_f immFloat1,
137 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
138 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
139 def : Pat<(int_nvvm_fmin_f
140 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
141 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
142 def : Pat<(int_nvvm_fmin_f
143 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
144 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
146 def : Pat<(int_nvvm_fmin_d immDouble1,
147 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
148 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
149 def : Pat<(int_nvvm_fmin_d immDouble1,
150 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
151 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
152 def : Pat<(int_nvvm_fmin_d
153 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
154 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
155 def : Pat<(int_nvvm_fmin_d
156 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
157 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
160 // We need a full string for OpcStr here because we need to deal with case like
162 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
163 NVPTXRegClass src_regclass, Intrinsic IntOP>
164 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
166 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
168 // We need a full string for OpcStr here because we need to deal with the case
169 // like INT_PTX_NATIVE_POWR_F.
170 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
171 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
172 : NVPTXInst<(outs t_regclass:$dst),
173 (ins s0_regclass:$src0, s1_regclass:$src1),
175 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
177 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
178 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
179 NVPTXRegClass s2_regclass, Intrinsic IntOP>
180 : NVPTXInst<(outs t_regclass:$dst),
181 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
183 [(set t_regclass:$dst,
184 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
190 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
192 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
195 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
197 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
200 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
201 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
207 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
208 Int32Regs, Int32Regs, int_nvvm_min_i>;
209 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
210 Int32Regs, Int32Regs, int_nvvm_min_ui>;
212 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
213 Int64Regs, Int64Regs, int_nvvm_min_ll>;
214 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
215 Int64Regs, Int64Regs, int_nvvm_min_ull>;
217 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
218 Int32Regs, Int32Regs, int_nvvm_max_i>;
219 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
220 Int32Regs, Int32Regs, int_nvvm_max_ui>;
222 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
223 Int64Regs, Int64Regs, int_nvvm_max_ll>;
224 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
225 Int64Regs, Int64Regs, int_nvvm_max_ull>;
227 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
228 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
229 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
230 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
232 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
233 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
234 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
235 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
237 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
238 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
239 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
240 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
246 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
247 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
248 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
249 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
251 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
252 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
253 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
254 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
256 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
258 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
260 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
262 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
264 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
266 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
268 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
270 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
271 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
273 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
275 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
277 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
279 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
280 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
282 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
283 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
284 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
285 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
291 def INT_NVVM_DIV_APPROX_FTZ_F
292 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
293 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
294 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
295 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
297 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
298 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
299 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
300 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
301 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
302 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
303 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
304 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
305 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
306 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
307 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
308 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
309 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
310 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
311 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
312 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
314 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
315 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
316 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
317 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
318 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
319 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
320 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
321 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
327 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
329 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
336 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
337 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
338 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
339 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
345 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
346 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
347 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
348 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
349 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
350 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
352 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
353 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
354 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
355 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
356 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
357 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
363 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
365 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
368 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
369 Float32Regs, int_nvvm_fabs_ftz_f>;
370 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
371 Float32Regs, int_nvvm_fabs_f>;
373 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
374 Float64Regs, int_nvvm_fabs_d>;
380 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
381 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
382 def : Pat<(int_nvvm_round_f Float32Regs:$a),
383 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
384 def : Pat<(int_nvvm_round_d Float64Regs:$a),
385 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
391 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
392 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
393 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
394 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
395 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
396 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
402 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
403 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
404 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
405 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
406 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
407 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
413 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
414 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
415 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
416 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
417 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
418 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
420 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
421 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
422 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
423 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
424 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
425 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
431 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
432 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
433 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
434 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
436 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
437 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
438 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
439 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
445 def INT_NVVM_FMA_RN_FTZ_F
446 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
447 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
448 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
449 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
450 def INT_NVVM_FMA_RZ_FTZ_F
451 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
452 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
453 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
454 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
455 def INT_NVVM_FMA_RM_FTZ_F
456 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
457 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
458 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
459 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
460 def INT_NVVM_FMA_RP_FTZ_F
461 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
462 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
463 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
464 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
466 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
467 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
468 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
469 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
470 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
471 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
472 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
473 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
479 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
480 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
481 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
482 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
483 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
484 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
485 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
486 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
487 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
488 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
489 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
490 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
491 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
492 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
493 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
494 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
496 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
497 Float64Regs, int_nvvm_rcp_rn_d>;
498 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
499 Float64Regs, int_nvvm_rcp_rz_d>;
500 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
501 Float64Regs, int_nvvm_rcp_rm_d>;
502 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
503 Float64Regs, int_nvvm_rcp_rp_d>;
505 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
506 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
512 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
513 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
514 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
515 Float32Regs, int_nvvm_sqrt_rn_f>;
516 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
517 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
518 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
519 Float32Regs, int_nvvm_sqrt_rz_f>;
520 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
521 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
522 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
523 Float32Regs, int_nvvm_sqrt_rm_f>;
524 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
525 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
526 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
527 Float32Regs, int_nvvm_sqrt_rp_f>;
528 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
529 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
530 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
531 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
533 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
534 Float64Regs, int_nvvm_sqrt_rn_d>;
535 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
536 Float64Regs, int_nvvm_sqrt_rz_d>;
537 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
538 Float64Regs, int_nvvm_sqrt_rm_d>;
539 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
540 Float64Regs, int_nvvm_sqrt_rp_d>;
542 // nvvm_sqrt intrinsic
543 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
544 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
545 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
546 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
547 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
548 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
549 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
550 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
556 def INT_NVVM_RSQRT_APPROX_FTZ_F
557 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
558 int_nvvm_rsqrt_approx_ftz_f>;
559 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
560 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
561 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
562 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
568 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
569 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
570 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
571 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
572 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
573 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
574 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
575 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
576 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
577 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
578 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
579 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
580 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
581 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
582 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
583 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
585 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
586 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
587 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
588 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
589 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
590 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
591 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
592 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
598 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
599 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
600 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
601 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
602 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
603 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
604 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
605 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
606 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
607 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
608 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
609 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
610 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
611 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
612 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
613 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
615 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
616 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
617 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
618 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
619 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
620 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
621 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
622 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
624 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
625 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
626 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
627 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
628 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
629 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
630 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
631 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
633 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
634 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
635 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
636 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
637 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
638 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
639 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
640 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
642 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
643 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
644 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
645 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
646 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
647 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
648 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
649 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
651 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
652 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
653 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
654 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
655 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
656 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
657 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
658 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
659 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
660 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
661 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
662 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
663 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
664 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
665 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
666 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
668 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
669 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
670 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
671 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
672 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
673 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
674 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
675 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
676 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
677 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
678 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
679 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
680 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
681 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
682 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
683 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
685 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
686 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
687 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
688 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
689 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
690 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
691 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
692 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
694 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
695 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
696 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
697 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
698 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
699 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
700 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
701 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
703 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
704 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
706 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
707 !strconcat(".reg .b32 %temp; \n\t",
708 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
710 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
711 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
712 !strconcat(".reg .b32 %temp; \n\t",
713 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
715 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
717 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
718 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
719 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
720 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
721 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
722 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
723 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
724 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
725 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
726 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
727 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
728 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
729 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
730 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
731 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
732 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
734 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
735 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
736 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
737 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
738 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
739 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
740 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
741 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
742 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
743 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
744 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
745 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
746 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
747 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
748 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
749 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
751 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
752 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
753 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
754 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
755 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
756 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
757 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
758 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
760 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
761 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
762 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
763 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
764 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
765 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
766 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
767 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
769 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
770 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
771 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
772 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
773 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
774 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
775 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
776 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
778 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
779 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
780 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
781 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
782 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
783 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
784 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
785 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
787 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
788 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
789 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
790 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
791 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
792 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
793 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
794 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
796 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
797 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
798 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
799 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
800 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
801 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
802 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
803 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
806 // FIXME: Ideally, we could use these patterns instead of the scope-creating
807 // patterns, but ptxas does not like these since .s16 is not compatible with
808 // .f16. The solution is to use .bXX for all integer register types, but we
809 // are not there yet.
810 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
811 // (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
812 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
813 // (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
815 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
816 // (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
818 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
819 !strconcat(".reg .b16 %temp;\n\t",
820 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
821 !strconcat("mov.b16 \t$dst, %temp;\n",
823 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
824 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
825 !strconcat(".reg .b16 %temp;\n\t",
826 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
827 !strconcat("mov.b16 \t$dst, %temp;\n",
829 Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
831 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
832 !strconcat(".reg .b16 %temp;\n\t",
833 !strconcat("mov.b16 \t%temp, $src0;\n\t",
834 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
836 Float32Regs, Int16Regs, int_nvvm_h2f>;
838 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
839 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
840 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
841 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
842 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
843 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
845 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
846 (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
847 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
848 (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
854 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
855 Float32Regs, int_nvvm_bitcast_f2i>;
856 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
857 Int32Regs, int_nvvm_bitcast_i2f>;
859 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
860 Int64Regs, int_nvvm_bitcast_ll2d>;
861 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
862 Float64Regs, int_nvvm_bitcast_d2ll>;
864 //-----------------------------------
866 //-----------------------------------
868 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
869 : PatFrag<ops, frag, [{
870 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
872 class ATOMIC_SHARED_CHK <dag ops, dag frag>
873 : PatFrag<ops, frag, [{
874 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
876 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
877 : PatFrag<ops, frag, [{
878 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
881 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
882 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
883 Operand IMMType, SDNode IMM, Predicate Pred> {
884 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
889 !strconcat(" \t$dst, [$addr], $b;", ""))))),
890 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
892 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
897 !strconcat(" \t$dst, [$addr], $b;", ""))))),
898 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
901 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
902 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
903 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
904 IntOp, IMMType, IMM, Pred>;
905 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
906 IntOp, IMMType, IMM, Pred>;
909 // has 2 operands, neg the second one
910 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
911 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
912 Operand IMMType, Predicate Pred> {
913 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
914 !strconcat("{{ \n\t",
915 !strconcat(".reg \t.s",
917 !strconcat(" temp; \n\t",
920 !strconcat(" \ttemp, $b; \n\t",
926 !strconcat(" \t$dst, [$addr], temp; \n\t",
927 !strconcat("}}", "")))))))))))))),
928 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
931 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
932 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
934 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
935 IntOp, IMMType, Pred> ;
936 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
937 IntOp, IMMType, Pred> ;
941 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
942 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
943 Operand IMMType, Predicate Pred> {
944 def reg : NVPTXInst<(outs regclass:$dst),
945 (ins ptrclass:$addr, regclass:$b, regclass:$c),
950 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
952 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
954 def imm1 : NVPTXInst<(outs regclass:$dst),
955 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
960 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
961 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
963 def imm2 : NVPTXInst<(outs regclass:$dst),
964 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
969 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
970 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
972 def imm3 : NVPTXInst<(outs regclass:$dst),
973 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
978 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
979 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
982 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
983 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
984 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
985 IntOp, IMMType, Pred>;
986 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
987 IntOp, IMMType, Pred>;
992 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
993 (atomic_load_add_32 node:$a, node:$b)>;
994 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
995 (atomic_load_add_32 node:$a, node:$b)>;
996 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
997 (atomic_load_add_32 node:$a, node:$b)>;
998 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
999 (atomic_load_add_64 node:$a, node:$b)>;
1000 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1001 (atomic_load_add_64 node:$a, node:$b)>;
1002 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1003 (atomic_load_add_64 node:$a, node:$b)>;
1004 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1005 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1006 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1007 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1008 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1009 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1011 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1012 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
1013 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1014 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
1015 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1016 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
1017 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1018 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1020 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1021 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
1022 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1023 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
1024 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1025 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
1026 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1027 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1029 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1030 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
1031 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1032 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
1033 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1034 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
1038 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1039 (atomic_load_sub_32 node:$a, node:$b)>;
1040 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1041 (atomic_load_sub_32 node:$a, node:$b)>;
1042 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1043 (atomic_load_sub_32 node:$a, node:$b)>;
1044 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1045 (atomic_load_sub_64 node:$a, node:$b)>;
1046 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1047 (atomic_load_sub_64 node:$a, node:$b)>;
1048 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1049 (atomic_load_sub_64 node:$a, node:$b)>;
1051 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1052 atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1053 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1054 atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1055 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1056 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1057 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1058 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1059 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1060 atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1061 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1062 atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1063 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1064 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1065 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1066 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1070 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1071 (atomic_swap_32 node:$a, node:$b)>;
1072 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1073 (atomic_swap_32 node:$a, node:$b)>;
1074 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1075 (atomic_swap_32 node:$a, node:$b)>;
1076 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1077 (atomic_swap_64 node:$a, node:$b)>;
1078 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1079 (atomic_swap_64 node:$a, node:$b)>;
1080 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1081 (atomic_swap_64 node:$a, node:$b)>;
1083 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1084 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1085 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1086 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1087 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1088 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1089 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1090 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1091 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1092 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1093 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1094 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1095 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1096 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1097 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1098 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1102 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1103 , (atomic_load_max_32 node:$a, node:$b)>;
1104 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1105 (atomic_load_max_32 node:$a, node:$b)>;
1106 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1107 (atomic_load_max_32 node:$a, node:$b)>;
1108 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1109 , (atomic_load_max_64 node:$a, node:$b)>;
1110 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1111 (atomic_load_max_64 node:$a, node:$b)>;
1112 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1113 (atomic_load_max_64 node:$a, node:$b)>;
1114 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1115 (atomic_load_umax_32 node:$a, node:$b)>;
1116 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1117 (atomic_load_umax_32 node:$a, node:$b)>;
1118 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1119 (atomic_load_umax_32 node:$a, node:$b)>;
1120 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1121 (atomic_load_umax_64 node:$a, node:$b)>;
1122 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1123 (atomic_load_umax_64 node:$a, node:$b)>;
1124 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1125 (atomic_load_umax_64 node:$a, node:$b)>;
1127 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1128 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1129 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1130 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1131 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1132 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1133 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1134 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1135 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1136 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1137 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1138 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1139 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1140 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1141 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1142 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1143 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1144 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1145 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1146 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1147 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1148 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1149 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1150 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1151 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1152 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1153 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1154 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1155 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1156 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1157 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1158 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1162 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1163 (atomic_load_min_32 node:$a, node:$b)>;
1164 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1165 (atomic_load_min_32 node:$a, node:$b)>;
1166 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1167 (atomic_load_min_32 node:$a, node:$b)>;
1168 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1169 (atomic_load_min_64 node:$a, node:$b)>;
1170 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1171 (atomic_load_min_64 node:$a, node:$b)>;
1172 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1173 (atomic_load_min_64 node:$a, node:$b)>;
1174 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1175 (atomic_load_umin_32 node:$a, node:$b)>;
1176 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1177 (atomic_load_umin_32 node:$a, node:$b)>;
1178 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1179 (atomic_load_umin_32 node:$a, node:$b)>;
1180 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1181 (atomic_load_umin_64 node:$a, node:$b)>;
1182 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1183 (atomic_load_umin_64 node:$a, node:$b)>;
1184 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1185 (atomic_load_umin_64 node:$a, node:$b)>;
1187 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1188 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1189 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1190 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1191 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1192 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1193 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1194 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1195 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1196 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1197 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1198 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1199 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1200 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1201 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1202 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1203 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1204 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1205 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1206 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1207 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1208 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1209 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1210 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1211 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1212 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1213 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1214 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1215 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1216 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1217 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1218 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1220 // atom_inc atom_dec
1222 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1223 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1224 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1225 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1226 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1227 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1228 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1229 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1230 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1231 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1232 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1233 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1235 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1236 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1237 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1238 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1239 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1240 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1241 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1242 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1243 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1244 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1245 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1246 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1247 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1248 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1249 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1250 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1254 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255 (atomic_load_and_32 node:$a, node:$b)>;
1256 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257 (atomic_load_and_32 node:$a, node:$b)>;
1258 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259 (atomic_load_and_32 node:$a, node:$b)>;
1260 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261 (atomic_load_and_64 node:$a, node:$b)>;
1262 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263 (atomic_load_and_64 node:$a, node:$b)>;
1264 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265 (atomic_load_and_64 node:$a, node:$b)>;
1267 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1268 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1269 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1270 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1271 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1272 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1273 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1274 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1275 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1276 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1277 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1278 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1279 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1280 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1281 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1282 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1286 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1287 (atomic_load_or_32 node:$a, node:$b)>;
1288 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1289 (atomic_load_or_32 node:$a, node:$b)>;
1290 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1291 (atomic_load_or_32 node:$a, node:$b)>;
1292 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1293 (atomic_load_or_64 node:$a, node:$b)>;
1294 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1295 (atomic_load_or_64 node:$a, node:$b)>;
1296 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1297 (atomic_load_or_64 node:$a, node:$b)>;
1299 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1300 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1301 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1302 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1303 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1304 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1305 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1306 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1307 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1308 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1309 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1310 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1311 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1312 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1313 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1314 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1318 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1319 (atomic_load_xor_32 node:$a, node:$b)>;
1320 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1321 (atomic_load_xor_32 node:$a, node:$b)>;
1322 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1323 (atomic_load_xor_32 node:$a, node:$b)>;
1324 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1325 (atomic_load_xor_64 node:$a, node:$b)>;
1326 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1327 (atomic_load_xor_64 node:$a, node:$b)>;
1328 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1329 (atomic_load_xor_64 node:$a, node:$b)>;
1331 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1332 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1333 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1334 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1335 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1336 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1337 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1338 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1339 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1340 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1341 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1342 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1343 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1344 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1345 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1346 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1350 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1351 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1352 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1353 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1354 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1355 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1356 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1357 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1358 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1359 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1360 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1361 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1363 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1364 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1365 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1366 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1367 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1368 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1369 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1370 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1371 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1372 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1373 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1374 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1375 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1376 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1377 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1378 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1383 //-----------------------------------
1384 // Support for ldu on sm_20 or later
1385 //-----------------------------------
1387 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1388 // read-only in a kernel.
1392 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1393 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1394 !strconcat("ldu.global.", TyStr),
1395 []>, Requires<[hasLDU]>;
1396 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1397 !strconcat("ldu.global.", TyStr),
1398 []>, Requires<[hasLDU]>;
1399 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1400 !strconcat("ldu.global.", TyStr),
1401 []>, Requires<[hasLDU]>;
1402 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1403 !strconcat("ldu.global.", TyStr),
1404 []>, Requires<[hasLDU]>;
1405 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1406 !strconcat("ldu.global.", TyStr),
1407 []>, Requires<[hasLDU]>;
1410 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1411 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1412 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1413 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1414 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1415 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1416 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1417 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1421 // Elementized vector ldu
1422 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1423 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1424 (ins Int32Regs:$src),
1425 !strconcat("ldu.global.", TyStr), []>;
1426 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1427 (ins Int64Regs:$src),
1428 !strconcat("ldu.global.", TyStr), []>;
1429 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1431 !strconcat("ldu.global.", TyStr), []>;
1432 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1434 !strconcat("ldu.global.", TyStr), []>;
1435 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1437 !strconcat("ldu.global.", TyStr), []>;
1440 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1441 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1442 regclass:$dst4), (ins Int32Regs:$src),
1443 !strconcat("ldu.global.", TyStr), []>;
1444 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1445 regclass:$dst4), (ins Int64Regs:$src),
1446 !strconcat("ldu.global.", TyStr), []>;
1447 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1448 regclass:$dst4), (ins MEMri:$src),
1449 !strconcat("ldu.global.", TyStr), []>;
1450 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1451 regclass:$dst4), (ins MEMri64:$src),
1452 !strconcat("ldu.global.", TyStr), []>;
1453 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1454 regclass:$dst4), (ins imemAny:$src),
1455 !strconcat("ldu.global.", TyStr), []>;
1458 defm INT_PTX_LDU_G_v2i8_ELE
1459 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1460 defm INT_PTX_LDU_G_v2i16_ELE
1461 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1462 defm INT_PTX_LDU_G_v2i32_ELE
1463 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1464 defm INT_PTX_LDU_G_v2f32_ELE
1465 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1466 defm INT_PTX_LDU_G_v2i64_ELE
1467 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1468 defm INT_PTX_LDU_G_v2f64_ELE
1469 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1470 defm INT_PTX_LDU_G_v4i8_ELE
1471 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1472 defm INT_PTX_LDU_G_v4i16_ELE
1473 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1475 defm INT_PTX_LDU_G_v4i32_ELE
1476 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1478 defm INT_PTX_LDU_G_v4f32_ELE
1479 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1483 //-----------------------------------
1484 // Support for ldg on sm_35 or later
1485 //-----------------------------------
1487 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1488 // non-coherent texture cache, and therefore the values read must be read-only
1489 // during the lifetime of the kernel.
1491 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1492 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1493 !strconcat("ld.global.nc.", TyStr),
1494 []>, Requires<[hasLDG]>;
1495 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1496 !strconcat("ld.global.nc.", TyStr),
1497 []>, Requires<[hasLDG]>;
1498 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1499 !strconcat("ld.global.nc.", TyStr),
1500 []>, Requires<[hasLDG]>;
1501 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1502 !strconcat("ld.global.nc.", TyStr),
1503 []>, Requires<[hasLDG]>;
1504 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1505 !strconcat("ld.global.nc.", TyStr),
1506 []>, Requires<[hasLDG]>;
1509 defm INT_PTX_LDG_GLOBAL_i8
1510 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1511 defm INT_PTX_LDG_GLOBAL_i16
1512 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1513 defm INT_PTX_LDG_GLOBAL_i32
1514 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1515 defm INT_PTX_LDG_GLOBAL_i64
1516 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1517 defm INT_PTX_LDG_GLOBAL_f32
1518 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1519 defm INT_PTX_LDG_GLOBAL_f64
1520 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1521 defm INT_PTX_LDG_GLOBAL_p32
1522 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1523 defm INT_PTX_LDG_GLOBAL_p64
1524 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1528 // Elementized vector ldg
1529 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1530 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1531 (ins Int32Regs:$src),
1532 !strconcat("ld.global.nc.", TyStr), []>;
1533 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1534 (ins Int64Regs:$src),
1535 !strconcat("ld.global.nc.", TyStr), []>;
1536 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1538 !strconcat("ld.global.nc.", TyStr), []>;
1539 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1541 !strconcat("ld.global.nc.", TyStr), []>;
1542 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1544 !strconcat("ld.global.nc.", TyStr), []>;
1547 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1548 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1549 regclass:$dst4), (ins Int32Regs:$src),
1550 !strconcat("ld.global.nc.", TyStr), []>;
1551 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1552 regclass:$dst4), (ins Int64Regs:$src),
1553 !strconcat("ld.global.nc.", TyStr), []>;
1554 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1555 regclass:$dst4), (ins MEMri:$src),
1556 !strconcat("ld.global.nc.", TyStr), []>;
1557 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1558 regclass:$dst4), (ins MEMri64:$src),
1559 !strconcat("ld.global.nc.", TyStr), []>;
1560 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1561 regclass:$dst4), (ins imemAny:$src),
1562 !strconcat("ld.global.nc.", TyStr), []>;
1565 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1566 defm INT_PTX_LDG_G_v2i8_ELE
1567 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1568 defm INT_PTX_LDG_G_v2i16_ELE
1569 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1570 defm INT_PTX_LDG_G_v2i32_ELE
1571 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1572 defm INT_PTX_LDG_G_v2f32_ELE
1573 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1574 defm INT_PTX_LDG_G_v2i64_ELE
1575 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1576 defm INT_PTX_LDG_G_v2f64_ELE
1577 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1578 defm INT_PTX_LDG_G_v4i8_ELE
1579 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1580 defm INT_PTX_LDG_G_v4i16_ELE
1581 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1582 defm INT_PTX_LDG_G_v4i32_ELE
1583 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1584 defm INT_PTX_LDG_G_v4f32_ELE
1585 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1588 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1589 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1590 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1591 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1592 Requires<[hasGenericLdSt]>;
1593 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1594 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1595 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1596 Requires<[hasGenericLdSt]>;
1598 // @TODO: Are these actually needed? I believe global addresses will be copied
1599 // to register values anyway.
1600 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1601 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1602 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1603 Requires<[hasGenericLdSt]>;
1604 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1605 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1606 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1607 Requires<[hasGenericLdSt]>;*/
1609 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1610 "mov.u32 \t$result, $src;",
1611 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1612 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1613 "mov.u64 \t$result, $src;",
1614 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1616 // @TODO: Are these actually needed? I believe global addresses will be copied
1617 // to register values anyway.
1618 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1619 "mov.u32 \t$result, $src;",
1620 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1621 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1622 "mov.u64 \t$result, $src;",
1623 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1626 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1627 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1628 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
1629 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1630 Requires<[hasGenericLdSt]>;
1631 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1632 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
1633 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1634 Requires<[hasGenericLdSt]>;
1635 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1636 "mov.u32 \t$result, $src;",
1637 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1638 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1639 "mov.u64 \t$result, $src;",
1640 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1643 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1644 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1645 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1646 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1648 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1649 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1650 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1651 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1654 // nvvm.ptr.gen.to.param
1655 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1656 (ins Int32Regs:$src),
1657 "mov.u32 \t$result, $src;",
1658 [(set Int32Regs:$result,
1659 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1660 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1661 (ins Int64Regs:$src),
1662 "mov.u64 \t$result, $src;",
1663 [(set Int64Regs:$result,
1664 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1667 // nvvm.move intrinsicc
1668 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1669 "mov.b16 \t$r, $s;",
1671 (int_nvvm_move_i16 Int16Regs:$s))]>;
1672 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1673 "mov.b32 \t$r, $s;",
1675 (int_nvvm_move_i32 Int32Regs:$s))]>;
1676 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1677 "mov.b64 \t$r, $s;",
1679 (int_nvvm_move_i64 Int64Regs:$s))]>;
1680 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1681 "mov.f32 \t$r, $s;",
1682 [(set Float32Regs:$r,
1683 (int_nvvm_move_float Float32Regs:$s))]>;
1684 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1685 "mov.f64 \t$r, $s;",
1686 [(set Float64Regs:$r,
1687 (int_nvvm_move_double Float64Regs:$s))]>;
1688 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1689 "mov.u32 \t$r, $s;",
1691 (int_nvvm_move_ptr Int32Regs:$s))]>;
1692 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1693 "mov.u64 \t$r, $s;",
1695 (int_nvvm_move_ptr Int64Regs:$s))]>;
1697 // @TODO: Are these actually needed, or will we always just see symbols
1698 // copied to registers first?
1699 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1700 "mov.u32 \t$r, $s;",
1702 (int_nvvm_move_ptr texternalsym:$s))]>;
1703 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1704 "mov.u64 \t$r, $s;",
1706 (int_nvvm_move_ptr texternalsym:$s))]>;*/
1709 // MoveParam %r1, param
1710 // ptr_local_to_gen %r2, %r1
1711 // ptr_gen_to_local %r3, %r2
1715 // @TODO: Revisit this. There is a type
1716 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1717 // instructions are not currently defined. However, we can use the ptr
1718 // variants and the asm printer will do the right thing.
1719 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1720 (MoveParam texternalsym:$src)))),
1721 (nvvm_move_ptr64 texternalsym:$src)>;
1722 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1723 (MoveParam texternalsym:$src)))),
1724 (nvvm_move_ptr32 texternalsym:$src)>;
1727 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1728 "mov.u64 \t$result, $src;", []>;
1730 //-----------------------------------
1731 // Compiler Error Warn
1732 // - Just ignore them in codegen
1733 //-----------------------------------
1735 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1736 "// llvm.nvvm.compiler.warn()",
1737 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1738 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1739 "// llvm.nvvm.compiler.warn()",
1740 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1741 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1742 "// llvm.nvvm.compiler.error()",
1743 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1744 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1745 "// llvm.nvvm.compiler.error()",
1746 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1751 def ISSPACEP_CONST_32
1752 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1753 "isspacep.const \t$d, $a;",
1754 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1755 Requires<[hasPTX31]>;
1756 def ISSPACEP_CONST_64
1757 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1758 "isspacep.const \t$d, $a;",
1759 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1760 Requires<[hasPTX31]>;
1761 def ISSPACEP_GLOBAL_32
1762 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1763 "isspacep.global \t$d, $a;",
1764 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1765 def ISSPACEP_GLOBAL_64
1766 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1767 "isspacep.global \t$d, $a;",
1768 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1769 def ISSPACEP_LOCAL_32
1770 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1771 "isspacep.local \t$d, $a;",
1772 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1773 def ISSPACEP_LOCAL_64
1774 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1775 "isspacep.local \t$d, $a;",
1776 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1777 def ISSPACEP_SHARED_32
1778 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1779 "isspacep.shared \t$d, $a;",
1780 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1781 def ISSPACEP_SHARED_64
1782 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1783 "isspacep.shared \t$d, $a;",
1784 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1787 // Special register reads
1788 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1789 (ins SpecialRegs:$r),
1790 "mov.b32\t$d, $r;", []>;
1792 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1793 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1794 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1795 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1796 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1797 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1798 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1799 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1800 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1801 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1802 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1803 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
1804 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
1805 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
1806 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
1807 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
1808 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
1809 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
1810 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
1811 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
1812 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
1813 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
1814 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
1815 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
1816 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
1817 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
1818 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
1819 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
1820 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
1821 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
1822 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
1823 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
1826 // rotate builtin support
1828 def ROTATE_B32_HW_IMM
1829 : NVPTXInst<(outs Int32Regs:$dst),
1830 (ins Int32Regs:$src, i32imm:$amt),
1831 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1832 [(set Int32Regs:$dst,
1833 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
1834 Requires<[hasHWROT32]> ;
1836 def ROTATE_B32_HW_REG
1837 : NVPTXInst<(outs Int32Regs:$dst),
1838 (ins Int32Regs:$src, Int32Regs:$amt),
1839 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1840 [(set Int32Regs:$dst,
1841 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
1842 Requires<[hasHWROT32]> ;
1844 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
1845 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1846 Requires<[noHWROT32]> ;
1848 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
1849 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
1850 Requires<[noHWROT32]> ;
1852 let hasSideEffects = 0 in {
1854 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1855 !strconcat("{{\n\t",
1856 !strconcat(".reg .b32 %dummy;\n\t",
1857 !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
1858 !strconcat("}}", "")))),
1862 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1863 !strconcat("{{\n\t",
1864 !strconcat(".reg .b32 %dummy;\n\t",
1865 !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
1866 !strconcat("}}", "")))),
1870 let hasSideEffects = 0 in {
1872 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
1873 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
1876 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
1877 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
1878 (GET_LO_INT64 Int64Regs:$src))> ;
1880 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
1882 let hasSideEffects = 0 in {
1883 def SHF_L_WRAP_B32_IMM
1884 : NVPTXInst<(outs Int32Regs:$dst),
1885 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1886 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1887 Requires<[hasHWROT32]>;
1889 def SHF_L_WRAP_B32_REG
1890 : NVPTXInst<(outs Int32Regs:$dst),
1891 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1892 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1893 Requires<[hasHWROT32]>;
1895 def SHF_R_WRAP_B32_IMM
1896 : NVPTXInst<(outs Int32Regs:$dst),
1897 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1898 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1899 Requires<[hasHWROT32]>;
1901 def SHF_R_WRAP_B32_REG
1902 : NVPTXInst<(outs Int32Regs:$dst),
1903 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1904 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1905 Requires<[hasHWROT32]>;
1908 // HW version of rotate 64
1909 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1911 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1912 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
1913 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1914 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
1915 Requires<[hasHWROT32]>;
1917 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1919 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1920 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
1921 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1922 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1923 Requires<[hasHWROT32]>;
1926 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1928 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1929 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
1930 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1931 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
1932 Requires<[hasHWROT32]>;
1934 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1936 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1937 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
1938 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1939 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1940 Requires<[hasHWROT32]>;
1942 // SW version of rotate 64
1943 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1944 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1945 Requires<[noHWROT32]>;
1946 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1947 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1948 Requires<[noHWROT32]>;
1949 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1950 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
1951 Requires<[noHWROT32]>;
1952 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1953 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1954 Requires<[noHWROT32]>;
1957 //-----------------------------------
1958 // Texture Intrinsics
1959 //-----------------------------------
1961 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
1962 // also defined in NVPTXReplaceImageHandles.cpp
1964 // texmode_independent
1965 let IsTex = 1, IsTexModeUnified = 0 in {
1966 // Texture fetch instructions using handles
1968 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1969 Float32Regs:$b, Float32Regs:$a),
1970 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1971 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1974 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1975 Float32Regs:$b, Float32Regs:$a),
1976 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
1977 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1979 def TEX_1D_F32_F32_LEVEL
1980 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1981 Float32Regs:$b, Float32Regs:$a),
1982 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
1983 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1984 "[$t, $s, \\{$x\\}], $lod;",
1986 def TEX_1D_F32_F32_GRAD
1987 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1988 Float32Regs:$b, Float32Regs:$a),
1989 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1990 Float32Regs:$gradx, Float32Regs:$grady),
1991 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1992 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
1995 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1996 Int32Regs:$b, Int32Regs:$a),
1997 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1998 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2001 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2002 Int32Regs:$b, Int32Regs:$a),
2003 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2004 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2006 def TEX_1D_S32_F32_LEVEL
2007 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2008 Int32Regs:$b, Int32Regs:$a),
2009 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2011 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2012 "[$t, $s, \\{$x\\}], $lod;",
2014 def TEX_1D_S32_F32_GRAD
2015 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2016 Int32Regs:$b, Int32Regs:$a),
2017 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2018 Float32Regs:$gradx, Float32Regs:$grady),
2019 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2020 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2023 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2024 Int32Regs:$b, Int32Regs:$a),
2025 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2026 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2029 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2030 Int32Regs:$b, Int32Regs:$a),
2031 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2032 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2034 def TEX_1D_U32_F32_LEVEL
2035 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2036 Int32Regs:$b, Int32Regs:$a),
2037 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2039 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2040 "[$t, $s, \\{$x\\}], $lod;",
2042 def TEX_1D_U32_F32_GRAD
2043 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2044 Int32Regs:$b, Int32Regs:$a),
2045 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2046 Float32Regs:$gradx, Float32Regs:$grady),
2047 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2048 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2051 def TEX_1D_ARRAY_F32_S32
2052 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2053 Float32Regs:$b, Float32Regs:$a),
2054 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2055 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2056 "[$t, $s, \\{$l, $x\\}];",
2058 def TEX_1D_ARRAY_F32_F32
2059 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2060 Float32Regs:$b, Float32Regs:$a),
2061 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2062 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2063 "[$t, $s, \\{$l, $x\\}];",
2065 def TEX_1D_ARRAY_F32_F32_LEVEL
2066 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2067 Float32Regs:$b, Float32Regs:$a),
2068 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2070 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2071 "[$t, $s, \\{$l, $x\\}], $lod;",
2073 def TEX_1D_ARRAY_F32_F32_GRAD
2074 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2075 Float32Regs:$b, Float32Regs:$a),
2076 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2077 Float32Regs:$gradx, Float32Regs:$grady),
2078 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2079 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2081 def TEX_1D_ARRAY_S32_S32
2082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2083 Int32Regs:$b, Int32Regs:$a),
2084 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2085 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2086 "[$t, $s, \\{$l, $x\\}];",
2088 def TEX_1D_ARRAY_S32_F32
2089 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2090 Int32Regs:$b, Int32Regs:$a),
2091 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2092 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2093 "[$t, $s, \\{$l, $x\\}];",
2095 def TEX_1D_ARRAY_S32_F32_LEVEL
2096 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2097 Int32Regs:$b, Int32Regs:$a),
2098 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2100 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2101 "[$t, $s, \\{$l, $x\\}], $lod;",
2103 def TEX_1D_ARRAY_S32_F32_GRAD
2104 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2105 Int32Regs:$b, Int32Regs:$a),
2106 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2107 Float32Regs:$gradx, Float32Regs:$grady),
2108 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2109 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2111 def TEX_1D_ARRAY_U32_S32
2112 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2113 Int32Regs:$b, Int32Regs:$a),
2114 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2115 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2116 "[$t, $s, \\{$l, $x\\}];",
2118 def TEX_1D_ARRAY_U32_F32
2119 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2120 Int32Regs:$b, Int32Regs:$a),
2121 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2122 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2123 "[$t, $s, \\{$l, $x\\}];",
2125 def TEX_1D_ARRAY_U32_F32_LEVEL
2126 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2127 Int32Regs:$b, Int32Regs:$a),
2128 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2130 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2131 "[$t, $s, \\{$l, $x\\}], $lod;",
2133 def TEX_1D_ARRAY_U32_F32_GRAD
2134 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2135 Int32Regs:$b, Int32Regs:$a),
2136 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2137 Float32Regs:$gradx, Float32Regs:$grady),
2138 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2139 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2143 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2144 Float32Regs:$b, Float32Regs:$a),
2145 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2146 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2147 "[$t, $s, \\{$x, $y\\}];",
2150 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2151 Float32Regs:$b, Float32Regs:$a),
2152 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2153 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2154 "[$t, $s, \\{$x, $y\\}];",
2156 def TEX_2D_F32_F32_LEVEL
2157 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2158 Float32Regs:$b, Float32Regs:$a),
2159 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2161 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2162 "[$t, $s, \\{$x, $y\\}], $lod;",
2164 def TEX_2D_F32_F32_GRAD
2165 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2166 Float32Regs:$b, Float32Regs:$a),
2167 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2168 Float32Regs:$gradx0, Float32Regs:$gradx1,
2169 Float32Regs:$grady0, Float32Regs:$grady1),
2170 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2171 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2172 "\\{$grady0, $grady1\\};",
2175 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2176 Int32Regs:$b, Int32Regs:$a),
2177 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2178 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2179 "[$t, $s, \\{$x, $y\\}];",
2182 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2183 Int32Regs:$b, Int32Regs:$a),
2184 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2185 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2186 "[$t, $s, \\{$x, $y\\}];",
2188 def TEX_2D_S32_F32_LEVEL
2189 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2190 Int32Regs:$b, Int32Regs:$a),
2191 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2193 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2194 "[$t, $s, \\{$x, $y\\}], $lod;",
2196 def TEX_2D_S32_F32_GRAD
2197 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2198 Int32Regs:$b, Int32Regs:$a),
2199 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2200 Float32Regs:$gradx0, Float32Regs:$gradx1,
2201 Float32Regs:$grady0, Float32Regs:$grady1),
2202 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2203 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2204 "\\{$grady0, $grady1\\};",
2207 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2208 Int32Regs:$b, Int32Regs:$a),
2209 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2210 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2211 "[$t, $s, \\{$x, $y\\}];",
2214 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2215 Int32Regs:$b, Int32Regs:$a),
2216 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2217 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2218 "[$t, $s, \\{$x, $y\\}];",
2220 def TEX_2D_U32_F32_LEVEL
2221 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2222 Int32Regs:$b, Int32Regs:$a),
2223 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2225 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2226 "[$t, $s, \\{$x, $y\\}], $lod;",
2228 def TEX_2D_U32_F32_GRAD
2229 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2230 Int32Regs:$b, Int32Regs:$a),
2231 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2232 Float32Regs:$gradx0, Float32Regs:$gradx1,
2233 Float32Regs:$grady0, Float32Regs:$grady1),
2234 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2235 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2236 "\\{$grady0, $grady1\\};",
2239 def TEX_2D_ARRAY_F32_S32
2240 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2241 Float32Regs:$b, Float32Regs:$a),
2242 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2244 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2245 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2247 def TEX_2D_ARRAY_F32_F32
2248 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249 Float32Regs:$b, Float32Regs:$a),
2250 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2252 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2253 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2255 def TEX_2D_ARRAY_F32_F32_LEVEL
2256 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2257 Float32Regs:$b, Float32Regs:$a),
2258 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2259 Float32Regs:$y, Float32Regs:$lod),
2260 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2261 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2263 def TEX_2D_ARRAY_F32_F32_GRAD
2264 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2265 Float32Regs:$b, Float32Regs:$a),
2266 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2267 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2268 Float32Regs:$grady0, Float32Regs:$grady1),
2269 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2270 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2271 "\\{$grady0, $grady1\\};",
2273 def TEX_2D_ARRAY_S32_S32
2274 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2275 Int32Regs:$b, Int32Regs:$a),
2276 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2278 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2279 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2281 def TEX_2D_ARRAY_S32_F32
2282 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2283 Int32Regs:$b, Int32Regs:$a),
2284 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2286 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2287 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2289 def TEX_2D_ARRAY_S32_F32_LEVEL
2290 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2291 Int32Regs:$b, Int32Regs:$a),
2292 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2293 Float32Regs:$y, Float32Regs:$lod),
2294 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2295 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2297 def TEX_2D_ARRAY_S32_F32_GRAD
2298 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2299 Int32Regs:$b, Int32Regs:$a),
2300 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2302 Float32Regs:$gradx0, Float32Regs:$gradx1,
2303 Float32Regs:$grady0, Float32Regs:$grady1),
2304 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2305 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2306 "\\{$grady0, $grady1\\};",
2308 def TEX_2D_ARRAY_U32_S32
2309 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2310 Int32Regs:$b, Int32Regs:$a),
2311 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2313 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2314 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2316 def TEX_2D_ARRAY_U32_F32
2317 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2318 Int32Regs:$b, Int32Regs:$a),
2319 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2321 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2322 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2324 def TEX_2D_ARRAY_U32_F32_LEVEL
2325 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2326 Int32Regs:$b, Int32Regs:$a),
2327 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2328 Float32Regs:$y, Float32Regs:$lod),
2329 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2330 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2332 def TEX_2D_ARRAY_U32_F32_GRAD
2333 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2334 Int32Regs:$b, Int32Regs:$a),
2335 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2337 Float32Regs:$gradx0, Float32Regs:$gradx1,
2338 Float32Regs:$grady0, Float32Regs:$grady1),
2339 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2340 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2341 "\\{$grady0, $grady1\\};",
2345 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2346 Float32Regs:$b, Float32Regs:$a),
2347 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2349 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2350 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2353 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2354 Float32Regs:$b, Float32Regs:$a),
2355 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2357 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2358 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2360 def TEX_3D_F32_F32_LEVEL
2361 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2362 Float32Regs:$b, Float32Regs:$a),
2363 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2364 Float32Regs:$z, Float32Regs:$lod),
2365 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2366 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2368 def TEX_3D_F32_F32_GRAD
2369 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2370 Float32Regs:$b, Float32Regs:$a),
2371 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2373 Float32Regs:$gradx0, Float32Regs:$gradx1,
2374 Float32Regs:$gradx2, Float32Regs:$grady0,
2375 Float32Regs:$grady1, Float32Regs:$grady2),
2376 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2377 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2378 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2379 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2382 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2383 Int32Regs:$b, Int32Regs:$a),
2384 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2386 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2387 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2390 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2391 Int32Regs:$b, Int32Regs:$a),
2392 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2394 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2395 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2397 def TEX_3D_S32_F32_LEVEL
2398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2399 Int32Regs:$b, Int32Regs:$a),
2400 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2401 Float32Regs:$z, Float32Regs:$lod),
2402 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2403 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2405 def TEX_3D_S32_F32_GRAD
2406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2407 Int32Regs:$b, Int32Regs:$a),
2408 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2410 Float32Regs:$gradx0, Float32Regs:$gradx1,
2411 Float32Regs:$gradx2, Float32Regs:$grady0,
2412 Float32Regs:$grady1, Float32Regs:$grady2),
2413 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2414 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2415 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2416 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2419 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2420 Int32Regs:$b, Int32Regs:$a),
2421 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2423 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2424 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2427 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2428 Int32Regs:$b, Int32Regs:$a),
2429 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2431 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2432 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2434 def TEX_3D_U32_F32_LEVEL
2435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2436 Int32Regs:$b, Int32Regs:$a),
2437 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2438 Float32Regs:$z, Float32Regs:$lod),
2439 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2440 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2442 def TEX_3D_U32_F32_GRAD
2443 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2444 Int32Regs:$b, Int32Regs:$a),
2445 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2447 Float32Regs:$gradx0, Float32Regs:$gradx1,
2448 Float32Regs:$gradx2, Float32Regs:$grady0,
2449 Float32Regs:$grady1, Float32Regs:$grady2),
2450 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2451 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2452 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2453 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2456 def TEX_CUBE_F32_F32
2457 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2458 Float32Regs:$b, Float32Regs:$a),
2459 (ins Int64Regs:$t, Int64Regs:$s,
2460 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2461 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2462 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2464 def TEX_CUBE_F32_F32_LEVEL
2465 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2466 Float32Regs:$b, Float32Regs:$a),
2467 (ins Int64Regs:$t, Int64Regs:$s,
2468 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2470 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2471 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2473 def TEX_CUBE_S32_F32
2474 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2475 Int32Regs:$b, Int32Regs:$a),
2476 (ins Int64Regs:$t, Int64Regs:$s,
2477 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2478 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2479 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2481 def TEX_CUBE_S32_F32_LEVEL
2482 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2483 Int32Regs:$b, Int32Regs:$a),
2484 (ins Int64Regs:$t, Int64Regs:$s,
2485 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2487 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2488 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2490 def TEX_CUBE_U32_F32
2491 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2492 Int32Regs:$b, Int32Regs:$a),
2493 (ins Int64Regs:$t, Int64Regs:$s,
2494 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2495 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2496 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2498 def TEX_CUBE_U32_F32_LEVEL
2499 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2500 Int32Regs:$b, Int32Regs:$a),
2501 (ins Int64Regs:$t, Int64Regs:$s,
2502 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2504 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2505 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2508 def TEX_CUBE_ARRAY_F32_F32
2509 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2510 Float32Regs:$b, Float32Regs:$a),
2511 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2512 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2513 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2514 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2516 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2517 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2518 Float32Regs:$b, Float32Regs:$a),
2519 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2520 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2522 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2523 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2525 def TEX_CUBE_ARRAY_S32_F32
2526 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2527 Int32Regs:$b, Int32Regs:$a),
2528 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2529 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2530 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2531 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2533 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2534 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2535 Int32Regs:$b, Int32Regs:$a),
2536 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2537 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2539 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2540 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2542 def TEX_CUBE_ARRAY_U32_F32
2543 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2544 Int32Regs:$b, Int32Regs:$a),
2545 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2546 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2547 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2548 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2550 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2551 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2552 Int32Regs:$b, Int32Regs:$a),
2553 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2554 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2556 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2557 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2560 def TLD4_R_2D_F32_F32
2561 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2562 Float32Regs:$v2, Float32Regs:$v3),
2563 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2564 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2565 "[$t, $s, \\{$x, $y\\}];",
2567 def TLD4_G_2D_F32_F32
2568 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2569 Float32Regs:$v2, Float32Regs:$v3),
2570 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2571 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2572 "[$t, $s, \\{$x, $y\\}];",
2574 def TLD4_B_2D_F32_F32
2575 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2576 Float32Regs:$v2, Float32Regs:$v3),
2577 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2578 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2579 "[$t, $s, \\{$x, $y\\}];",
2581 def TLD4_A_2D_F32_F32
2582 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2583 Float32Regs:$v2, Float32Regs:$v3),
2584 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2585 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2586 "[$t, $s, \\{$x, $y\\}];",
2588 def TLD4_R_2D_S32_F32
2589 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2590 Int32Regs:$v2, Int32Regs:$v3),
2591 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2592 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2593 "[$t, $s, \\{$x, $y\\}];",
2595 def TLD4_G_2D_S32_F32
2596 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2597 Int32Regs:$v2, Int32Regs:$v3),
2598 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2599 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2600 "[$t, $s, \\{$x, $y\\}];",
2602 def TLD4_B_2D_S32_F32
2603 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2604 Int32Regs:$v2, Int32Regs:$v3),
2605 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2606 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2607 "[$t, $s, \\{$x, $y\\}];",
2609 def TLD4_A_2D_S32_F32
2610 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2611 Int32Regs:$v2, Int32Regs:$v3),
2612 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2613 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2614 "[$t, $s, \\{$x, $y\\}];",
2616 def TLD4_R_2D_U32_F32
2617 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2618 Int32Regs:$v2, Int32Regs:$v3),
2619 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2620 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2621 "[$t, $s, \\{$x, $y\\}];",
2623 def TLD4_G_2D_U32_F32
2624 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2625 Int32Regs:$v2, Int32Regs:$v3),
2626 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2627 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2628 "[$t, $s, \\{$x, $y\\}];",
2630 def TLD4_B_2D_U32_F32
2631 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2632 Int32Regs:$v2, Int32Regs:$v3),
2633 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2634 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2635 "[$t, $s, \\{$x, $y\\}];",
2637 def TLD4_A_2D_U32_F32
2638 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2639 Int32Regs:$v2, Int32Regs:$v3),
2640 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2641 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2642 "[$t, $s, \\{$x, $y\\}];",
2648 let IsTex = 1, IsTexModeUnified = 1 in {
2649 // Texture fetch instructions using handles
2650 def TEX_UNIFIED_1D_F32_S32
2651 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2652 Float32Regs:$b, Float32Regs:$a),
2653 (ins Int64Regs:$t, Int32Regs:$x),
2654 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2656 def TEX_UNIFIED_1D_F32_F32
2657 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2658 Float32Regs:$b, Float32Regs:$a),
2659 (ins Int64Regs:$t, Float32Regs:$x),
2660 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2662 def TEX_UNIFIED_1D_F32_F32_LEVEL
2663 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2664 Float32Regs:$b, Float32Regs:$a),
2665 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2666 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2667 "[$t, \\{$x\\}], $lod;",
2669 def TEX_UNIFIED_1D_F32_F32_GRAD
2670 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2671 Float32Regs:$b, Float32Regs:$a),
2672 (ins Int64Regs:$t, Float32Regs:$x,
2673 Float32Regs:$gradx, Float32Regs:$grady),
2674 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2675 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2677 def TEX_UNIFIED_1D_S32_S32
2678 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679 Int32Regs:$b, Int32Regs:$a),
2680 (ins Int64Regs:$t, Int32Regs:$x),
2681 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2683 def TEX_UNIFIED_1D_S32_F32
2684 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2685 Int32Regs:$b, Int32Regs:$a),
2686 (ins Int64Regs:$t, Float32Regs:$x),
2687 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2689 def TEX_UNIFIED_1D_S32_F32_LEVEL
2690 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2691 Int32Regs:$b, Int32Regs:$a),
2692 (ins Int64Regs:$t, Float32Regs:$x,
2694 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2695 "[$t, \\{$x\\}], $lod;",
2697 def TEX_UNIFIED_1D_S32_F32_GRAD
2698 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699 Int32Regs:$b, Int32Regs:$a),
2700 (ins Int64Regs:$t, Float32Regs:$x,
2701 Float32Regs:$gradx, Float32Regs:$grady),
2702 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2703 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2705 def TEX_UNIFIED_1D_U32_S32
2706 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2707 Int32Regs:$b, Int32Regs:$a),
2708 (ins Int64Regs:$t, Int32Regs:$x),
2709 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2711 def TEX_UNIFIED_1D_U32_F32
2712 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2713 Int32Regs:$b, Int32Regs:$a),
2714 (ins Int64Regs:$t, Float32Regs:$x),
2715 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2717 def TEX_UNIFIED_1D_U32_F32_LEVEL
2718 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2719 Int32Regs:$b, Int32Regs:$a),
2720 (ins Int64Regs:$t, Float32Regs:$x,
2722 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2723 "[$t, \\{$x\\}], $lod;",
2725 def TEX_UNIFIED_1D_U32_F32_GRAD
2726 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2727 Int32Regs:$b, Int32Regs:$a),
2728 (ins Int64Regs:$t, Float32Regs:$x,
2729 Float32Regs:$gradx, Float32Regs:$grady),
2730 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2731 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2734 def TEX_UNIFIED_1D_ARRAY_F32_S32
2735 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2736 Float32Regs:$b, Float32Regs:$a),
2737 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2738 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2739 "[$t, \\{$l, $x\\}];",
2741 def TEX_UNIFIED_1D_ARRAY_F32_F32
2742 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2743 Float32Regs:$b, Float32Regs:$a),
2744 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2745 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2746 "[$t, \\{$l, $x\\}];",
2748 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2749 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2750 Float32Regs:$b, Float32Regs:$a),
2751 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2753 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2754 "[$t, \\{$l, $x\\}], $lod;",
2756 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2757 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2758 Float32Regs:$b, Float32Regs:$a),
2759 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2760 Float32Regs:$gradx, Float32Regs:$grady),
2761 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2762 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2764 def TEX_UNIFIED_1D_ARRAY_S32_S32
2765 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2766 Int32Regs:$b, Int32Regs:$a),
2767 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2768 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2769 "[$t, \\{$l, $x\\}];",
2771 def TEX_UNIFIED_1D_ARRAY_S32_F32
2772 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2773 Int32Regs:$b, Int32Regs:$a),
2774 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2775 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2776 "[$t, \\{$l, $x\\}];",
2778 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2779 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2780 Int32Regs:$b, Int32Regs:$a),
2781 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2783 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2784 "[$t, \\{$l, $x\\}], $lod;",
2786 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2787 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2788 Int32Regs:$b, Int32Regs:$a),
2789 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2790 Float32Regs:$gradx, Float32Regs:$grady),
2791 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2792 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2794 def TEX_UNIFIED_1D_ARRAY_U32_S32
2795 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2796 Int32Regs:$b, Int32Regs:$a),
2797 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2798 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2799 "[$t, \\{$l, $x\\}];",
2801 def TEX_UNIFIED_1D_ARRAY_U32_F32
2802 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2803 Int32Regs:$b, Int32Regs:$a),
2804 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2805 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2806 "[$t, \\{$l, $x\\}];",
2808 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
2809 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2810 Int32Regs:$b, Int32Regs:$a),
2811 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2813 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2814 "[$t, \\{$l, $x\\}], $lod;",
2816 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
2817 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2818 Int32Regs:$b, Int32Regs:$a),
2819 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2820 Float32Regs:$gradx, Float32Regs:$grady),
2821 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2822 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2825 def TEX_UNIFIED_2D_F32_S32
2826 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2827 Float32Regs:$b, Float32Regs:$a),
2828 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2829 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2830 "[$t, \\{$x, $y\\}];",
2832 def TEX_UNIFIED_2D_F32_F32
2833 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2834 Float32Regs:$b, Float32Regs:$a),
2835 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2836 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2837 "[$t, \\{$x, $y\\}];",
2839 def TEX_UNIFIED_2D_F32_F32_LEVEL
2840 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2841 Float32Regs:$b, Float32Regs:$a),
2842 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2844 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2845 "[$t, \\{$x, $y\\}], $lod;",
2847 def TEX_UNIFIED_2D_F32_F32_GRAD
2848 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2849 Float32Regs:$b, Float32Regs:$a),
2850 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2851 Float32Regs:$gradx0, Float32Regs:$gradx1,
2852 Float32Regs:$grady0, Float32Regs:$grady1),
2853 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2854 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2855 "\\{$grady0, $grady1\\};",
2857 def TEX_UNIFIED_2D_S32_S32
2858 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2859 Int32Regs:$b, Int32Regs:$a),
2860 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2861 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2862 "[$t, \\{$x, $y\\}];",
2864 def TEX_UNIFIED_2D_S32_F32
2865 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2866 Int32Regs:$b, Int32Regs:$a),
2867 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2868 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2869 "[$t, \\{$x, $y\\}];",
2871 def TEX_UNIFIED_2D_S32_F32_LEVEL
2872 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2873 Int32Regs:$b, Int32Regs:$a),
2874 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2876 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2877 "[$t, \\{$x, $y\\}], $lod;",
2879 def TEX_UNIFIED_2D_S32_F32_GRAD
2880 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2881 Int32Regs:$b, Int32Regs:$a),
2882 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2883 Float32Regs:$gradx0, Float32Regs:$gradx1,
2884 Float32Regs:$grady0, Float32Regs:$grady1),
2885 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2886 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2887 "\\{$grady0, $grady1\\};",
2889 def TEX_UNIFIED_2D_U32_S32
2890 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2891 Int32Regs:$b, Int32Regs:$a),
2892 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2893 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2894 "[$t, \\{$x, $y\\}];",
2896 def TEX_UNIFIED_2D_U32_F32
2897 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2898 Int32Regs:$b, Int32Regs:$a),
2899 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2900 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2901 "[$t, \\{$x, $y\\}];",
2903 def TEX_UNIFIED_2D_U32_F32_LEVEL
2904 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2905 Int32Regs:$b, Int32Regs:$a),
2906 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2908 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2909 "[$t, \\{$x, $y\\}], $lod;",
2911 def TEX_UNIFIED_2D_U32_F32_GRAD
2912 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2913 Int32Regs:$b, Int32Regs:$a),
2914 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2915 Float32Regs:$gradx0, Float32Regs:$gradx1,
2916 Float32Regs:$grady0, Float32Regs:$grady1),
2917 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2918 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2919 "\\{$grady0, $grady1\\};",
2922 def TEX_UNIFIED_2D_ARRAY_F32_S32
2923 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2924 Float32Regs:$b, Float32Regs:$a),
2925 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2927 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2928 "[$t, \\{$l, $x, $y, $y\\}];",
2930 def TEX_UNIFIED_2D_ARRAY_F32_F32
2931 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932 Float32Regs:$b, Float32Regs:$a),
2933 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2935 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2936 "[$t, \\{$l, $x, $y, $y\\}];",
2938 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
2939 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2940 Float32Regs:$b, Float32Regs:$a),
2941 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2942 Float32Regs:$y, Float32Regs:$lod),
2943 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2944 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2946 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
2947 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2948 Float32Regs:$b, Float32Regs:$a),
2949 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2950 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2951 Float32Regs:$grady0, Float32Regs:$grady1),
2952 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2953 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2954 "\\{$grady0, $grady1\\};",
2956 def TEX_UNIFIED_2D_ARRAY_S32_S32
2957 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2958 Int32Regs:$b, Int32Regs:$a),
2959 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2961 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2962 "[$t, \\{$l, $x, $y, $y\\}];",
2964 def TEX_UNIFIED_2D_ARRAY_S32_F32
2965 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2966 Int32Regs:$b, Int32Regs:$a),
2967 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2969 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2970 "[$t, \\{$l, $x, $y, $y\\}];",
2972 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
2973 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2974 Int32Regs:$b, Int32Regs:$a),
2975 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2976 Float32Regs:$y, Float32Regs:$lod),
2977 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2978 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2980 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
2981 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2982 Int32Regs:$b, Int32Regs:$a),
2983 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2985 Float32Regs:$gradx0, Float32Regs:$gradx1,
2986 Float32Regs:$grady0, Float32Regs:$grady1),
2987 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2988 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2989 "\\{$grady0, $grady1\\};",
2991 def TEX_UNIFIED_2D_ARRAY_U32_S32
2992 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993 Int32Regs:$b, Int32Regs:$a),
2994 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2996 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2997 "[$t, \\{$l, $x, $y, $y\\}];",
2999 def TEX_UNIFIED_2D_ARRAY_U32_F32
3000 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3001 Int32Regs:$b, Int32Regs:$a),
3002 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3004 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3005 "[$t, \\{$l, $x, $y, $y\\}];",
3007 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3008 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3009 Int32Regs:$b, Int32Regs:$a),
3010 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3011 Float32Regs:$y, Float32Regs:$lod),
3012 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3013 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3015 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3016 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3017 Int32Regs:$b, Int32Regs:$a),
3018 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3020 Float32Regs:$gradx0, Float32Regs:$gradx1,
3021 Float32Regs:$grady0, Float32Regs:$grady1),
3022 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3023 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3024 "\\{$grady0, $grady1\\};",
3027 def TEX_UNIFIED_3D_F32_S32
3028 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3029 Float32Regs:$b, Float32Regs:$a),
3030 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3032 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3033 "[$t, \\{$x, $y, $z, $z\\}];",
3035 def TEX_UNIFIED_3D_F32_F32
3036 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3037 Float32Regs:$b, Float32Regs:$a),
3038 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3040 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3041 "[$t, \\{$x, $y, $z, $z\\}];",
3043 def TEX_UNIFIED_3D_F32_F32_LEVEL
3044 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3045 Float32Regs:$b, Float32Regs:$a),
3046 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3047 Float32Regs:$z, Float32Regs:$lod),
3048 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3049 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3051 def TEX_UNIFIED_3D_F32_F32_GRAD
3052 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3053 Float32Regs:$b, Float32Regs:$a),
3054 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3056 Float32Regs:$gradx0, Float32Regs:$gradx1,
3057 Float32Regs:$gradx2, Float32Regs:$grady0,
3058 Float32Regs:$grady1, Float32Regs:$grady2),
3059 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3060 "[$t, \\{$x, $y, $z, $z\\}], "
3061 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3062 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3064 def TEX_UNIFIED_3D_S32_S32
3065 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3066 Int32Regs:$b, Int32Regs:$a),
3067 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3069 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3070 "[$t, \\{$x, $y, $z, $z\\}];",
3072 def TEX_UNIFIED_3D_S32_F32
3073 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3074 Int32Regs:$b, Int32Regs:$a),
3075 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3077 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3078 "[$t, \\{$x, $y, $z, $z\\}];",
3080 def TEX_UNIFIED_3D_S32_F32_LEVEL
3081 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3082 Int32Regs:$b, Int32Regs:$a),
3083 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3084 Float32Regs:$z, Float32Regs:$lod),
3085 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3086 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3088 def TEX_UNIFIED_3D_S32_F32_GRAD
3089 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3090 Int32Regs:$b, Int32Regs:$a),
3091 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3093 Float32Regs:$gradx0, Float32Regs:$gradx1,
3094 Float32Regs:$gradx2, Float32Regs:$grady0,
3095 Float32Regs:$grady1, Float32Regs:$grady2),
3096 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3097 "[$t, \\{$x, $y, $z, $z\\}], "
3098 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3099 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3101 def TEX_UNIFIED_3D_U32_S32
3102 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3103 Int32Regs:$b, Int32Regs:$a),
3104 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3106 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3107 "[$t, \\{$x, $y, $z, $z\\}];",
3109 def TEX_UNIFIED_3D_U32_F32
3110 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3111 Int32Regs:$b, Int32Regs:$a),
3112 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3114 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3115 "[$t, \\{$x, $y, $z, $z\\}];",
3117 def TEX_UNIFIED_3D_U32_F32_LEVEL
3118 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3119 Int32Regs:$b, Int32Regs:$a),
3120 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3121 Float32Regs:$z, Float32Regs:$lod),
3122 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3123 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3125 def TEX_UNIFIED_3D_U32_F32_GRAD
3126 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3127 Int32Regs:$b, Int32Regs:$a),
3128 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3130 Float32Regs:$gradx0, Float32Regs:$gradx1,
3131 Float32Regs:$gradx2, Float32Regs:$grady0,
3132 Float32Regs:$grady1, Float32Regs:$grady2),
3133 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3134 "[$t, \\{$x, $y, $z, $z\\}], "
3135 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3136 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3139 def TEX_UNIFIED_CUBE_F32_F32
3140 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3141 Float32Regs:$b, Float32Regs:$a),
3143 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3144 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3145 "[$t, \\{$x, $y, $z, $z\\}];",
3147 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3148 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3149 Float32Regs:$b, Float32Regs:$a),
3151 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3153 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3154 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3156 def TEX_UNIFIED_CUBE_S32_F32
3157 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3158 Int32Regs:$b, Int32Regs:$a),
3160 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3161 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3162 "[$t, \\{$x, $y, $z, $z\\}];",
3164 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3165 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3166 Int32Regs:$b, Int32Regs:$a),
3168 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3170 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3171 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3173 def TEX_UNIFIED_CUBE_U32_F32
3174 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3175 Int32Regs:$b, Int32Regs:$a),
3177 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3178 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3179 "[$t, \\{$x, $y, $z, $z\\}];",
3181 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3182 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3183 Int32Regs:$b, Int32Regs:$a),
3185 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3187 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3188 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3191 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3192 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3193 Float32Regs:$b, Float32Regs:$a),
3194 (ins Int64Regs:$t, Int32Regs:$l,
3195 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3196 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3197 "[$t, \\{$l, $x, $y, $z\\}];",
3199 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3200 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3201 Float32Regs:$b, Float32Regs:$a),
3202 (ins Int64Regs:$t, Int32Regs:$l,
3203 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3205 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3206 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3208 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3209 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3210 Int32Regs:$b, Int32Regs:$a),
3211 (ins Int64Regs:$t, Int32Regs:$l,
3212 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3213 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3214 "[$t, \\{$l, $x, $y, $z\\}];",
3216 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3217 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3218 Int32Regs:$b, Int32Regs:$a),
3219 (ins Int64Regs:$t, Int32Regs:$l,
3220 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3222 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3223 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3225 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3226 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3227 Int32Regs:$b, Int32Regs:$a),
3228 (ins Int64Regs:$t, Int32Regs:$l,
3229 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3230 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3231 "[$t, \\{$l, $x, $y, $z\\}];",
3233 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3234 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3235 Int32Regs:$b, Int32Regs:$a),
3236 (ins Int64Regs:$t, Int32Regs:$l,
3237 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3239 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3240 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3243 def TLD4_UNIFIED_R_2D_F32_F32
3244 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3245 Float32Regs:$v2, Float32Regs:$v3),
3246 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3247 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3248 "[$t, \\{$x, $y\\}];",
3250 def TLD4_UNIFIED_G_2D_F32_F32
3251 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3252 Float32Regs:$v2, Float32Regs:$v3),
3253 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3254 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3255 "[$t, \\{$x, $y\\}];",
3257 def TLD4_UNIFIED_B_2D_F32_F32
3258 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3259 Float32Regs:$v2, Float32Regs:$v3),
3260 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3261 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3262 "[$t, \\{$x, $y\\}];",
3264 def TLD4_UNIFIED_A_2D_F32_F32
3265 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3266 Float32Regs:$v2, Float32Regs:$v3),
3267 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3268 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3269 "[$t, \\{$x, $y\\}];",
3271 def TLD4_UNIFIED_R_2D_S32_F32
3272 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3273 Int32Regs:$v2, Int32Regs:$v3),
3274 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3275 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3276 "[$t, \\{$x, $y\\}];",
3278 def TLD4_UNIFIED_G_2D_S32_F32
3279 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3280 Int32Regs:$v2, Int32Regs:$v3),
3281 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3282 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3283 "[$t, \\{$x, $y\\}];",
3285 def TLD4_UNIFIED_B_2D_S32_F32
3286 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3287 Int32Regs:$v2, Int32Regs:$v3),
3288 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3289 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3290 "[$t, \\{$x, $y\\}];",
3292 def TLD4_UNIFIED_A_2D_S32_F32
3293 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3294 Int32Regs:$v2, Int32Regs:$v3),
3295 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3296 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3297 "[$t, \\{$x, $y\\}];",
3299 def TLD4_UNIFIED_R_2D_U32_F32
3300 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3301 Int32Regs:$v2, Int32Regs:$v3),
3302 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3303 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3304 "[$t, \\{$x, $y\\}];",
3306 def TLD4_UNIFIED_G_2D_U32_F32
3307 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3308 Int32Regs:$v2, Int32Regs:$v3),
3309 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3310 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3311 "[$t, \\{$x, $y\\}];",
3313 def TLD4_UNIFIED_B_2D_U32_F32
3314 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3315 Int32Regs:$v2, Int32Regs:$v3),
3316 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3317 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3318 "[$t, \\{$x, $y\\}];",
3320 def TLD4_UNIFIED_A_2D_U32_F32
3321 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3322 Int32Regs:$v2, Int32Regs:$v3),
3323 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3324 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3325 "[$t, \\{$x, $y\\}];",
3331 //=== Surface load instructions
3334 def SULD_1D_I8_CLAMP
3335 : NVPTXInst<(outs Int16Regs:$r),
3336 (ins Int64Regs:$s, Int32Regs:$x),
3337 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3339 def SULD_1D_I16_CLAMP
3340 : NVPTXInst<(outs Int16Regs:$r),
3341 (ins Int64Regs:$s, Int32Regs:$x),
3342 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3344 def SULD_1D_I32_CLAMP
3345 : NVPTXInst<(outs Int32Regs:$r),
3346 (ins Int64Regs:$s, Int32Regs:$x),
3347 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3349 def SULD_1D_I64_CLAMP
3350 : NVPTXInst<(outs Int64Regs:$r),
3351 (ins Int64Regs:$s, Int32Regs:$x),
3352 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3355 def SULD_1D_ARRAY_I8_CLAMP
3356 : NVPTXInst<(outs Int16Regs:$r),
3357 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3358 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3360 def SULD_1D_ARRAY_I16_CLAMP
3361 : NVPTXInst<(outs Int16Regs:$r),
3362 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3363 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3365 def SULD_1D_ARRAY_I32_CLAMP
3366 : NVPTXInst<(outs Int32Regs:$r),
3367 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3368 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3370 def SULD_1D_ARRAY_I64_CLAMP
3371 : NVPTXInst<(outs Int64Regs:$r),
3372 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3373 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3376 def SULD_2D_I8_CLAMP
3377 : NVPTXInst<(outs Int16Regs:$r),
3378 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3379 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3381 def SULD_2D_I16_CLAMP
3382 : NVPTXInst<(outs Int16Regs:$r),
3383 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3384 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3386 def SULD_2D_I32_CLAMP
3387 : NVPTXInst<(outs Int32Regs:$r),
3388 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3389 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3391 def SULD_2D_I64_CLAMP
3392 : NVPTXInst<(outs Int64Regs:$r),
3393 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3394 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3397 def SULD_2D_ARRAY_I8_CLAMP
3398 : NVPTXInst<(outs Int16Regs:$r),
3399 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3400 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3402 def SULD_2D_ARRAY_I16_CLAMP
3403 : NVPTXInst<(outs Int16Regs:$r),
3404 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3405 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3407 def SULD_2D_ARRAY_I32_CLAMP
3408 : NVPTXInst<(outs Int32Regs:$r),
3409 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3410 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3412 def SULD_2D_ARRAY_I64_CLAMP
3413 : NVPTXInst<(outs Int64Regs:$r),
3414 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3415 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3418 def SULD_3D_I8_CLAMP
3419 : NVPTXInst<(outs Int16Regs:$r),
3420 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3421 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3423 def SULD_3D_I16_CLAMP
3424 : NVPTXInst<(outs Int16Regs:$r),
3425 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3426 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3428 def SULD_3D_I32_CLAMP
3429 : NVPTXInst<(outs Int32Regs:$r),
3430 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3431 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3433 def SULD_3D_I64_CLAMP
3434 : NVPTXInst<(outs Int64Regs:$r),
3435 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3436 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3441 def SULD_1D_V2I8_CLAMP
3442 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3443 (ins Int64Regs:$s, Int32Regs:$x),
3444 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3446 def SULD_1D_V2I16_CLAMP
3447 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3448 (ins Int64Regs:$s, Int32Regs:$x),
3449 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3451 def SULD_1D_V2I32_CLAMP
3452 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3453 (ins Int64Regs:$s, Int32Regs:$x),
3454 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3456 def SULD_1D_V2I64_CLAMP
3457 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3458 (ins Int64Regs:$s, Int32Regs:$x),
3459 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3462 def SULD_1D_ARRAY_V2I8_CLAMP
3463 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3464 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3465 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3467 def SULD_1D_ARRAY_V2I16_CLAMP
3468 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3469 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3470 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3472 def SULD_1D_ARRAY_V2I32_CLAMP
3473 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3474 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3475 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3477 def SULD_1D_ARRAY_V2I64_CLAMP
3478 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3479 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3480 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3483 def SULD_2D_V2I8_CLAMP
3484 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3485 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3486 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3488 def SULD_2D_V2I16_CLAMP
3489 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3490 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3491 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3493 def SULD_2D_V2I32_CLAMP
3494 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3495 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3496 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3498 def SULD_2D_V2I64_CLAMP
3499 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3500 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3501 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3504 def SULD_2D_ARRAY_V2I8_CLAMP
3505 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3506 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3507 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3508 "[$s, \\{$l, $x, $y, $y\\}];",
3510 def SULD_2D_ARRAY_V2I16_CLAMP
3511 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3512 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3513 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3514 "[$s, \\{$l, $x, $y, $y\\}];",
3516 def SULD_2D_ARRAY_V2I32_CLAMP
3517 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3518 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3519 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3520 "[$s, \\{$l, $x, $y, $y\\}];",
3522 def SULD_2D_ARRAY_V2I64_CLAMP
3523 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3524 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3525 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3526 "[$s, \\{$l, $x, $y, $y\\}];",
3529 def SULD_3D_V2I8_CLAMP
3530 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3531 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3532 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3534 def SULD_3D_V2I16_CLAMP
3535 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3536 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3537 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3539 def SULD_3D_V2I32_CLAMP
3540 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3542 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3544 def SULD_3D_V2I64_CLAMP
3545 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3547 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3552 def SULD_1D_V4I8_CLAMP
3553 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3554 (ins Int64Regs:$s, Int32Regs:$x),
3555 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3557 def SULD_1D_V4I16_CLAMP
3558 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3559 (ins Int64Regs:$s, Int32Regs:$x),
3560 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3562 def SULD_1D_V4I32_CLAMP
3563 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3564 (ins Int64Regs:$s, Int32Regs:$x),
3565 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3568 def SULD_1D_ARRAY_V4I8_CLAMP
3569 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3570 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3571 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3572 "[$s, \\{$l, $x\\}];",
3574 def SULD_1D_ARRAY_V4I16_CLAMP
3575 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3576 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3577 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3578 "[$s, \\{$l, $x\\}];",
3580 def SULD_1D_ARRAY_V4I32_CLAMP
3581 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3582 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3583 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3584 "[$s, \\{$l, $x\\}];",
3587 def SULD_2D_V4I8_CLAMP
3588 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3589 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3590 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3592 def SULD_2D_V4I16_CLAMP
3593 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3594 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3595 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3597 def SULD_2D_V4I32_CLAMP
3598 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3599 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3600 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3603 def SULD_2D_ARRAY_V4I8_CLAMP
3604 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3605 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3606 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3607 "[$s, \\{$l, $x, $y, $y\\}];",
3609 def SULD_2D_ARRAY_V4I16_CLAMP
3610 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3611 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3612 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3613 "[$s, \\{$l, $x, $y, $y\\}];",
3615 def SULD_2D_ARRAY_V4I32_CLAMP
3616 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3617 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3618 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3619 "[$s, \\{$l, $x, $y, $y\\}];",
3623 def SULD_3D_V4I8_CLAMP
3624 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3625 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3626 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3627 "[$s, \\{$x, $y, $z, $z\\}];",
3629 def SULD_3D_V4I16_CLAMP
3630 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3631 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3632 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3633 "[$s, \\{$x, $y, $z, $z\\}];",
3635 def SULD_3D_V4I32_CLAMP
3636 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3637 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3638 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3639 "[$s, \\{$x, $y, $z, $z\\}];",
3647 : NVPTXInst<(outs Int16Regs:$r),
3648 (ins Int64Regs:$s, Int32Regs:$x),
3649 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3651 def SULD_1D_I16_TRAP
3652 : NVPTXInst<(outs Int16Regs:$r),
3653 (ins Int64Regs:$s, Int32Regs:$x),
3654 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3656 def SULD_1D_I32_TRAP
3657 : NVPTXInst<(outs Int32Regs:$r),
3658 (ins Int64Regs:$s, Int32Regs:$x),
3659 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3661 def SULD_1D_I64_TRAP
3662 : NVPTXInst<(outs Int64Regs:$r),
3663 (ins Int64Regs:$s, Int32Regs:$x),
3664 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3667 def SULD_1D_ARRAY_I8_TRAP
3668 : NVPTXInst<(outs Int16Regs:$r),
3669 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3670 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3672 def SULD_1D_ARRAY_I16_TRAP
3673 : NVPTXInst<(outs Int16Regs:$r),
3674 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3675 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3677 def SULD_1D_ARRAY_I32_TRAP
3678 : NVPTXInst<(outs Int32Regs:$r),
3679 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3680 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3682 def SULD_1D_ARRAY_I64_TRAP
3683 : NVPTXInst<(outs Int64Regs:$r),
3684 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3685 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3689 : NVPTXInst<(outs Int16Regs:$r),
3690 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3691 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3693 def SULD_2D_I16_TRAP
3694 : NVPTXInst<(outs Int16Regs:$r),
3695 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3696 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3698 def SULD_2D_I32_TRAP
3699 : NVPTXInst<(outs Int32Regs:$r),
3700 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3701 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3703 def SULD_2D_I64_TRAP
3704 : NVPTXInst<(outs Int64Regs:$r),
3705 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3706 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3709 def SULD_2D_ARRAY_I8_TRAP
3710 : NVPTXInst<(outs Int16Regs:$r),
3711 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3712 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3714 def SULD_2D_ARRAY_I16_TRAP
3715 : NVPTXInst<(outs Int16Regs:$r),
3716 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3717 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3719 def SULD_2D_ARRAY_I32_TRAP
3720 : NVPTXInst<(outs Int32Regs:$r),
3721 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3722 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3724 def SULD_2D_ARRAY_I64_TRAP
3725 : NVPTXInst<(outs Int64Regs:$r),
3726 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3727 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3731 : NVPTXInst<(outs Int16Regs:$r),
3732 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3733 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3735 def SULD_3D_I16_TRAP
3736 : NVPTXInst<(outs Int16Regs:$r),
3737 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3738 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3740 def SULD_3D_I32_TRAP
3741 : NVPTXInst<(outs Int32Regs:$r),
3742 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3743 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3745 def SULD_3D_I64_TRAP
3746 : NVPTXInst<(outs Int64Regs:$r),
3747 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3748 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3753 def SULD_1D_V2I8_TRAP
3754 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3755 (ins Int64Regs:$s, Int32Regs:$x),
3756 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3758 def SULD_1D_V2I16_TRAP
3759 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3760 (ins Int64Regs:$s, Int32Regs:$x),
3761 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3763 def SULD_1D_V2I32_TRAP
3764 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3765 (ins Int64Regs:$s, Int32Regs:$x),
3766 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3768 def SULD_1D_V2I64_TRAP
3769 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3770 (ins Int64Regs:$s, Int32Regs:$x),
3771 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3774 def SULD_1D_ARRAY_V2I8_TRAP
3775 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3776 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3777 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3779 def SULD_1D_ARRAY_V2I16_TRAP
3780 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3781 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3782 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3784 def SULD_1D_ARRAY_V2I32_TRAP
3785 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3786 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3787 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3789 def SULD_1D_ARRAY_V2I64_TRAP
3790 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3791 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3792 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3795 def SULD_2D_V2I8_TRAP
3796 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3797 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3798 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3800 def SULD_2D_V2I16_TRAP
3801 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3802 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3803 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3805 def SULD_2D_V2I32_TRAP
3806 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3807 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3808 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3810 def SULD_2D_V2I64_TRAP
3811 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3812 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3813 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3816 def SULD_2D_ARRAY_V2I8_TRAP
3817 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3818 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3819 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
3820 "[$s, \\{$l, $x, $y, $y\\}];",
3822 def SULD_2D_ARRAY_V2I16_TRAP
3823 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3824 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3825 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
3826 "[$s, \\{$l, $x, $y, $y\\}];",
3828 def SULD_2D_ARRAY_V2I32_TRAP
3829 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3830 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3831 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
3832 "[$s, \\{$l, $x, $y, $y\\}];",
3834 def SULD_2D_ARRAY_V2I64_TRAP
3835 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3836 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3837 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
3838 "[$s, \\{$l, $x, $y, $y\\}];",
3841 def SULD_3D_V2I8_TRAP
3842 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3843 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3844 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3846 def SULD_3D_V2I16_TRAP
3847 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3848 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3849 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3851 def SULD_3D_V2I32_TRAP
3852 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3853 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3854 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3856 def SULD_3D_V2I64_TRAP
3857 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3858 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3859 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3864 def SULD_1D_V4I8_TRAP
3865 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3866 (ins Int64Regs:$s, Int32Regs:$x),
3867 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3869 def SULD_1D_V4I16_TRAP
3870 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3871 (ins Int64Regs:$s, Int32Regs:$x),
3872 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3874 def SULD_1D_V4I32_TRAP
3875 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3876 (ins Int64Regs:$s, Int32Regs:$x),
3877 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3880 def SULD_1D_ARRAY_V4I8_TRAP
3881 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3882 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3883 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3884 "[$s, \\{$l, $x\\}];",
3886 def SULD_1D_ARRAY_V4I16_TRAP
3887 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3888 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3889 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3890 "[$s, \\{$l, $x\\}];",
3892 def SULD_1D_ARRAY_V4I32_TRAP
3893 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3894 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3895 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3896 "[$s, \\{$l, $x\\}];",
3899 def SULD_2D_V4I8_TRAP
3900 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3902 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3904 def SULD_2D_V4I16_TRAP
3905 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3906 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3907 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3909 def SULD_2D_V4I32_TRAP
3910 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3911 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3912 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3915 def SULD_2D_ARRAY_V4I8_TRAP
3916 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3917 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3918 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3919 "[$s, \\{$l, $x, $y, $y\\}];",
3921 def SULD_2D_ARRAY_V4I16_TRAP
3922 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3923 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3924 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3925 "[$s, \\{$l, $x, $y, $y\\}];",
3927 def SULD_2D_ARRAY_V4I32_TRAP
3928 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3929 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3930 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3931 "[$s, \\{$l, $x, $y, $y\\}];",
3935 def SULD_3D_V4I8_TRAP
3936 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3937 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3938 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3939 "[$s, \\{$x, $y, $z, $z\\}];",
3941 def SULD_3D_V4I16_TRAP
3942 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3944 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3945 "[$s, \\{$x, $y, $z, $z\\}];",
3947 def SULD_3D_V4I32_TRAP
3948 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3949 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3950 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3951 "[$s, \\{$x, $y, $z, $z\\}];",
3958 : NVPTXInst<(outs Int16Regs:$r),
3959 (ins Int64Regs:$s, Int32Regs:$x),
3960 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
3962 def SULD_1D_I16_ZERO
3963 : NVPTXInst<(outs Int16Regs:$r),
3964 (ins Int64Regs:$s, Int32Regs:$x),
3965 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
3967 def SULD_1D_I32_ZERO
3968 : NVPTXInst<(outs Int32Regs:$r),
3969 (ins Int64Regs:$s, Int32Regs:$x),
3970 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
3972 def SULD_1D_I64_ZERO
3973 : NVPTXInst<(outs Int64Regs:$r),
3974 (ins Int64Regs:$s, Int32Regs:$x),
3975 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
3978 def SULD_1D_ARRAY_I8_ZERO
3979 : NVPTXInst<(outs Int16Regs:$r),
3980 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3981 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3983 def SULD_1D_ARRAY_I16_ZERO
3984 : NVPTXInst<(outs Int16Regs:$r),
3985 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3986 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3988 def SULD_1D_ARRAY_I32_ZERO
3989 : NVPTXInst<(outs Int32Regs:$r),
3990 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3991 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3993 def SULD_1D_ARRAY_I64_ZERO
3994 : NVPTXInst<(outs Int64Regs:$r),
3995 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3996 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4000 : NVPTXInst<(outs Int16Regs:$r),
4001 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4002 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4004 def SULD_2D_I16_ZERO
4005 : NVPTXInst<(outs Int16Regs:$r),
4006 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4007 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4009 def SULD_2D_I32_ZERO
4010 : NVPTXInst<(outs Int32Regs:$r),
4011 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4012 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4014 def SULD_2D_I64_ZERO
4015 : NVPTXInst<(outs Int64Regs:$r),
4016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4017 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4020 def SULD_2D_ARRAY_I8_ZERO
4021 : NVPTXInst<(outs Int16Regs:$r),
4022 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4023 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4025 def SULD_2D_ARRAY_I16_ZERO
4026 : NVPTXInst<(outs Int16Regs:$r),
4027 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4028 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4030 def SULD_2D_ARRAY_I32_ZERO
4031 : NVPTXInst<(outs Int32Regs:$r),
4032 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4033 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4035 def SULD_2D_ARRAY_I64_ZERO
4036 : NVPTXInst<(outs Int64Regs:$r),
4037 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4038 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4042 : NVPTXInst<(outs Int16Regs:$r),
4043 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4044 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4046 def SULD_3D_I16_ZERO
4047 : NVPTXInst<(outs Int16Regs:$r),
4048 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4049 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4051 def SULD_3D_I32_ZERO
4052 : NVPTXInst<(outs Int32Regs:$r),
4053 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4054 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4056 def SULD_3D_I64_ZERO
4057 : NVPTXInst<(outs Int64Regs:$r),
4058 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4059 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4064 def SULD_1D_V2I8_ZERO
4065 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4066 (ins Int64Regs:$s, Int32Regs:$x),
4067 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4069 def SULD_1D_V2I16_ZERO
4070 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4071 (ins Int64Regs:$s, Int32Regs:$x),
4072 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4074 def SULD_1D_V2I32_ZERO
4075 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4076 (ins Int64Regs:$s, Int32Regs:$x),
4077 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4079 def SULD_1D_V2I64_ZERO
4080 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4081 (ins Int64Regs:$s, Int32Regs:$x),
4082 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4085 def SULD_1D_ARRAY_V2I8_ZERO
4086 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4087 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4088 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4090 def SULD_1D_ARRAY_V2I16_ZERO
4091 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4092 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4093 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4095 def SULD_1D_ARRAY_V2I32_ZERO
4096 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4097 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4098 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4100 def SULD_1D_ARRAY_V2I64_ZERO
4101 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4102 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4103 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4106 def SULD_2D_V2I8_ZERO
4107 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4108 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4109 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4111 def SULD_2D_V2I16_ZERO
4112 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4113 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4114 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4116 def SULD_2D_V2I32_ZERO
4117 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4118 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4119 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4121 def SULD_2D_V2I64_ZERO
4122 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4123 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4124 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4127 def SULD_2D_ARRAY_V2I8_ZERO
4128 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4129 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4130 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4131 "[$s, \\{$l, $x, $y, $y\\}];",
4133 def SULD_2D_ARRAY_V2I16_ZERO
4134 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4135 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4136 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4137 "[$s, \\{$l, $x, $y, $y\\}];",
4139 def SULD_2D_ARRAY_V2I32_ZERO
4140 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4141 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4142 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4143 "[$s, \\{$l, $x, $y, $y\\}];",
4145 def SULD_2D_ARRAY_V2I64_ZERO
4146 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4147 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4148 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4149 "[$s, \\{$l, $x, $y, $y\\}];",
4152 def SULD_3D_V2I8_ZERO
4153 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4154 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4155 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4157 def SULD_3D_V2I16_ZERO
4158 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4159 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4160 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4162 def SULD_3D_V2I32_ZERO
4163 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4164 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4165 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4167 def SULD_3D_V2I64_ZERO
4168 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4169 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4170 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4175 def SULD_1D_V4I8_ZERO
4176 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4177 (ins Int64Regs:$s, Int32Regs:$x),
4178 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4180 def SULD_1D_V4I16_ZERO
4181 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4182 (ins Int64Regs:$s, Int32Regs:$x),
4183 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4185 def SULD_1D_V4I32_ZERO
4186 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4187 (ins Int64Regs:$s, Int32Regs:$x),
4188 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4191 def SULD_1D_ARRAY_V4I8_ZERO
4192 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4193 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4194 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4195 "[$s, \\{$l, $x\\}];",
4197 def SULD_1D_ARRAY_V4I16_ZERO
4198 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4199 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4200 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4201 "[$s, \\{$l, $x\\}];",
4203 def SULD_1D_ARRAY_V4I32_ZERO
4204 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4205 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4206 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4207 "[$s, \\{$l, $x\\}];",
4210 def SULD_2D_V4I8_ZERO
4211 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4212 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4213 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4215 def SULD_2D_V4I16_ZERO
4216 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4217 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4218 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4220 def SULD_2D_V4I32_ZERO
4221 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4222 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4223 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4226 def SULD_2D_ARRAY_V4I8_ZERO
4227 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4228 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4229 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4230 "[$s, \\{$l, $x, $y, $y\\}];",
4232 def SULD_2D_ARRAY_V4I16_ZERO
4233 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4235 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4236 "[$s, \\{$l, $x, $y, $y\\}];",
4238 def SULD_2D_ARRAY_V4I32_ZERO
4239 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4240 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4241 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4242 "[$s, \\{$l, $x, $y, $y\\}];",
4246 def SULD_3D_V4I8_ZERO
4247 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4248 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4249 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4250 "[$s, \\{$x, $y, $z, $z\\}];",
4252 def SULD_3D_V4I16_ZERO
4253 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4254 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4255 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4256 "[$s, \\{$x, $y, $z, $z\\}];",
4258 def SULD_3D_V4I32_ZERO
4259 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4260 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4261 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4262 "[$s, \\{$x, $y, $z, $z\\}];",
4266 //-----------------------------------
4267 // Texture Query Intrinsics
4268 //-----------------------------------
4270 let IsSurfTexQuery = 1 in {
4271 def TXQ_CHANNEL_ORDER
4272 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4273 "txq.channel_order.b32 \t$d, [$a];",
4275 def TXQ_CHANNEL_DATA_TYPE
4276 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4277 "txq.channel_data_type.b32 \t$d, [$a];",
4280 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4281 "txq.width.b32 \t$d, [$a];",
4284 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4285 "txq.height.b32 \t$d, [$a];",
4288 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4289 "txq.depth.b32 \t$d, [$a];",
4292 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4293 "txq.array_size.b32 \t$d, [$a];",
4296 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4297 "txq.num_samples.b32 \t$d, [$a];",
4299 def TXQ_NUM_MIPMAP_LEVELS
4300 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4301 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4305 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4306 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4307 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4308 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4309 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4310 (TXQ_WIDTH Int64Regs:$a)>;
4311 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4312 (TXQ_HEIGHT Int64Regs:$a)>;
4313 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4314 (TXQ_DEPTH Int64Regs:$a)>;
4315 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4316 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4317 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4318 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4319 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4320 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4323 //-----------------------------------
4324 // Surface Query Intrinsics
4325 //-----------------------------------
4327 let IsSurfTexQuery = 1 in {
4328 def SUQ_CHANNEL_ORDER
4329 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4330 "suq.channel_order.b32 \t$d, [$a];",
4332 def SUQ_CHANNEL_DATA_TYPE
4333 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4334 "suq.channel_data_type.b32 \t$d, [$a];",
4337 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4338 "suq.width.b32 \t$d, [$a];",
4341 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4342 "suq.height.b32 \t$d, [$a];",
4345 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4346 "suq.depth.b32 \t$d, [$a];",
4349 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4350 "suq.array_size.b32 \t$d, [$a];",
4354 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4355 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4356 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4357 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4358 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4359 (SUQ_WIDTH Int64Regs:$a)>;
4360 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4361 (SUQ_HEIGHT Int64Regs:$a)>;
4362 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4363 (SUQ_DEPTH Int64Regs:$a)>;
4364 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4365 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4368 //===- Handle Query -------------------------------------------------------===//
4370 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4372 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4373 "istypep.samplerref \t$d, $a;",
4374 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4376 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4377 "istypep.surfref \t$d, $a;",
4378 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4380 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4381 "istypep.texref \t$d, $a;",
4382 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4384 //===- Surface Stores -----------------------------------------------------===//
4389 def SUST_B_1D_B8_CLAMP
4391 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4392 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4394 def SUST_B_1D_B16_CLAMP
4396 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4397 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4399 def SUST_B_1D_B32_CLAMP
4401 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4402 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4404 def SUST_B_1D_B64_CLAMP
4406 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4407 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4409 def SUST_B_1D_V2B8_CLAMP
4411 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4412 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4414 def SUST_B_1D_V2B16_CLAMP
4416 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4417 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4419 def SUST_B_1D_V2B32_CLAMP
4421 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4422 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4424 def SUST_B_1D_V2B64_CLAMP
4426 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4427 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4429 def SUST_B_1D_V4B8_CLAMP
4431 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4432 Int16Regs:$b, Int16Regs:$a),
4433 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4435 def SUST_B_1D_V4B16_CLAMP
4437 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4438 Int16Regs:$b, Int16Regs:$a),
4439 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4441 def SUST_B_1D_V4B32_CLAMP
4443 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4444 Int32Regs:$b, Int32Regs:$a),
4445 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4449 def SUST_B_1D_ARRAY_B8_CLAMP
4451 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4452 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4454 def SUST_B_1D_ARRAY_B16_CLAMP
4456 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4457 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4459 def SUST_B_1D_ARRAY_B32_CLAMP
4461 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4462 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4464 def SUST_B_1D_ARRAY_B64_CLAMP
4466 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4467 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4469 def SUST_B_1D_ARRAY_V2B8_CLAMP
4471 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4473 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4475 def SUST_B_1D_ARRAY_V2B16_CLAMP
4477 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4479 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4481 def SUST_B_1D_ARRAY_V2B32_CLAMP
4483 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4485 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4487 def SUST_B_1D_ARRAY_V2B64_CLAMP
4489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4491 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4493 def SUST_B_1D_ARRAY_V4B8_CLAMP
4495 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4496 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4497 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4498 "\\{$r, $g, $b, $a\\};",
4500 def SUST_B_1D_ARRAY_V4B16_CLAMP
4502 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4503 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4504 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4505 "\\{$r, $g, $b, $a\\};",
4507 def SUST_B_1D_ARRAY_V4B32_CLAMP
4509 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4510 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4511 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4512 "\\{$r, $g, $b, $a\\};",
4516 def SUST_B_2D_B8_CLAMP
4518 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4519 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4521 def SUST_B_2D_B16_CLAMP
4523 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4524 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4526 def SUST_B_2D_B32_CLAMP
4528 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4529 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4531 def SUST_B_2D_B64_CLAMP
4533 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4534 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4536 def SUST_B_2D_V2B8_CLAMP
4538 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4540 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4542 def SUST_B_2D_V2B16_CLAMP
4544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4546 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4548 def SUST_B_2D_V2B32_CLAMP
4550 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4552 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4554 def SUST_B_2D_V2B64_CLAMP
4556 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4558 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4560 def SUST_B_2D_V4B8_CLAMP
4562 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4563 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4564 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4565 "\\{$r, $g, $b, $a\\};",
4567 def SUST_B_2D_V4B16_CLAMP
4569 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4570 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4571 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4572 "\\{$r, $g, $b, $a\\};",
4574 def SUST_B_2D_V4B32_CLAMP
4576 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4577 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4578 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4579 "\\{$r, $g, $b, $a\\};",
4583 def SUST_B_2D_ARRAY_B8_CLAMP
4585 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4587 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4589 def SUST_B_2D_ARRAY_B16_CLAMP
4591 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4593 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4595 def SUST_B_2D_ARRAY_B32_CLAMP
4597 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4599 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4601 def SUST_B_2D_ARRAY_B64_CLAMP
4603 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4605 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4607 def SUST_B_2D_ARRAY_V2B8_CLAMP
4609 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4610 Int16Regs:$r, Int16Regs:$g),
4611 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4614 def SUST_B_2D_ARRAY_V2B16_CLAMP
4616 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4617 Int16Regs:$r, Int16Regs:$g),
4618 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4621 def SUST_B_2D_ARRAY_V2B32_CLAMP
4623 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4624 Int32Regs:$r, Int32Regs:$g),
4625 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4628 def SUST_B_2D_ARRAY_V2B64_CLAMP
4630 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4631 Int64Regs:$r, Int64Regs:$g),
4632 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4635 def SUST_B_2D_ARRAY_V4B8_CLAMP
4637 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4638 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4639 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4640 "\\{$r, $g, $b, $a\\};",
4642 def SUST_B_2D_ARRAY_V4B16_CLAMP
4644 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4645 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4646 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4647 "\\{$r, $g, $b, $a\\};",
4649 def SUST_B_2D_ARRAY_V4B32_CLAMP
4651 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4652 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4653 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4654 "\\{$r, $g, $b, $a\\};",
4658 def SUST_B_3D_B8_CLAMP
4660 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4662 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4664 def SUST_B_3D_B16_CLAMP
4666 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4668 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4670 def SUST_B_3D_B32_CLAMP
4672 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4674 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4676 def SUST_B_3D_B64_CLAMP
4678 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4680 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4682 def SUST_B_3D_V2B8_CLAMP
4684 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4685 Int16Regs:$r, Int16Regs:$g),
4686 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4689 def SUST_B_3D_V2B16_CLAMP
4691 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4692 Int16Regs:$r, Int16Regs:$g),
4693 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4696 def SUST_B_3D_V2B32_CLAMP
4698 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4699 Int32Regs:$r, Int32Regs:$g),
4700 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4703 def SUST_B_3D_V2B64_CLAMP
4705 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4706 Int64Regs:$r, Int64Regs:$g),
4707 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4710 def SUST_B_3D_V4B8_CLAMP
4712 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4713 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4714 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4715 "\\{$r, $g, $b, $a\\};",
4717 def SUST_B_3D_V4B16_CLAMP
4719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4720 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4721 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4722 "\\{$r, $g, $b, $a\\};",
4724 def SUST_B_3D_V4B32_CLAMP
4726 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4727 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4728 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4729 "\\{$r, $g, $b, $a\\};",
4734 def SUST_B_1D_B8_TRAP
4736 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4737 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4739 def SUST_B_1D_B16_TRAP
4741 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4742 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4744 def SUST_B_1D_B32_TRAP
4746 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4747 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4749 def SUST_B_1D_B64_TRAP
4751 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4752 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4754 def SUST_B_1D_V2B8_TRAP
4756 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4757 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4759 def SUST_B_1D_V2B16_TRAP
4761 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4762 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4764 def SUST_B_1D_V2B32_TRAP
4766 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4767 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4769 def SUST_B_1D_V2B64_TRAP
4771 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4772 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4774 def SUST_B_1D_V4B8_TRAP
4776 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4777 Int16Regs:$b, Int16Regs:$a),
4778 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4780 def SUST_B_1D_V4B16_TRAP
4782 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4783 Int16Regs:$b, Int16Regs:$a),
4784 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4786 def SUST_B_1D_V4B32_TRAP
4788 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4789 Int32Regs:$b, Int32Regs:$a),
4790 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4794 def SUST_B_1D_ARRAY_B8_TRAP
4796 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4797 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4799 def SUST_B_1D_ARRAY_B16_TRAP
4801 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4802 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4804 def SUST_B_1D_ARRAY_B32_TRAP
4806 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4807 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4809 def SUST_B_1D_ARRAY_B64_TRAP
4811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4812 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4814 def SUST_B_1D_ARRAY_V2B8_TRAP
4816 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4818 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4820 def SUST_B_1D_ARRAY_V2B16_TRAP
4822 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4824 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4826 def SUST_B_1D_ARRAY_V2B32_TRAP
4828 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4830 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4832 def SUST_B_1D_ARRAY_V2B64_TRAP
4834 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4836 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4838 def SUST_B_1D_ARRAY_V4B8_TRAP
4840 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4841 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4842 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
4843 "\\{$r, $g, $b, $a\\};",
4845 def SUST_B_1D_ARRAY_V4B16_TRAP
4847 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4848 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4849 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
4850 "\\{$r, $g, $b, $a\\};",
4852 def SUST_B_1D_ARRAY_V4B32_TRAP
4854 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4855 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4856 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
4857 "\\{$r, $g, $b, $a\\};",
4861 def SUST_B_2D_B8_TRAP
4863 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4864 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4866 def SUST_B_2D_B16_TRAP
4868 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4869 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4871 def SUST_B_2D_B32_TRAP
4873 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4874 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4876 def SUST_B_2D_B64_TRAP
4878 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4879 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4881 def SUST_B_2D_V2B8_TRAP
4883 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4885 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4887 def SUST_B_2D_V2B16_TRAP
4889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4891 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4893 def SUST_B_2D_V2B32_TRAP
4895 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4897 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4899 def SUST_B_2D_V2B64_TRAP
4901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4903 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4905 def SUST_B_2D_V4B8_TRAP
4907 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4908 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4909 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
4910 "\\{$r, $g, $b, $a\\};",
4912 def SUST_B_2D_V4B16_TRAP
4914 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4915 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4916 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
4917 "\\{$r, $g, $b, $a\\};",
4919 def SUST_B_2D_V4B32_TRAP
4921 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4922 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4923 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
4924 "\\{$r, $g, $b, $a\\};",
4928 def SUST_B_2D_ARRAY_B8_TRAP
4930 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4932 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4934 def SUST_B_2D_ARRAY_B16_TRAP
4936 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4938 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4940 def SUST_B_2D_ARRAY_B32_TRAP
4942 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4944 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4946 def SUST_B_2D_ARRAY_B64_TRAP
4948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4950 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4952 def SUST_B_2D_ARRAY_V2B8_TRAP
4954 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4955 Int16Regs:$r, Int16Regs:$g),
4956 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4959 def SUST_B_2D_ARRAY_V2B16_TRAP
4961 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4962 Int16Regs:$r, Int16Regs:$g),
4963 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4966 def SUST_B_2D_ARRAY_V2B32_TRAP
4968 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4969 Int32Regs:$r, Int32Regs:$g),
4970 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4973 def SUST_B_2D_ARRAY_V2B64_TRAP
4975 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4976 Int64Regs:$r, Int64Regs:$g),
4977 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4980 def SUST_B_2D_ARRAY_V4B8_TRAP
4982 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4983 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4984 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4985 "\\{$r, $g, $b, $a\\};",
4987 def SUST_B_2D_ARRAY_V4B16_TRAP
4989 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4990 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4991 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4992 "\\{$r, $g, $b, $a\\};",
4994 def SUST_B_2D_ARRAY_V4B32_TRAP
4996 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4997 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4998 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4999 "\\{$r, $g, $b, $a\\};",
5003 def SUST_B_3D_B8_TRAP
5005 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5007 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5009 def SUST_B_3D_B16_TRAP
5011 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5013 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5015 def SUST_B_3D_B32_TRAP
5017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5019 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5021 def SUST_B_3D_B64_TRAP
5023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5025 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5027 def SUST_B_3D_V2B8_TRAP
5029 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5030 Int16Regs:$r, Int16Regs:$g),
5031 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5034 def SUST_B_3D_V2B16_TRAP
5036 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5037 Int16Regs:$r, Int16Regs:$g),
5038 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5041 def SUST_B_3D_V2B32_TRAP
5043 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5044 Int32Regs:$r, Int32Regs:$g),
5045 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5048 def SUST_B_3D_V2B64_TRAP
5050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5051 Int64Regs:$r, Int64Regs:$g),
5052 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5055 def SUST_B_3D_V4B8_TRAP
5057 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5058 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5059 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5060 "\\{$r, $g, $b, $a\\};",
5062 def SUST_B_3D_V4B16_TRAP
5064 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5065 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5066 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5067 "\\{$r, $g, $b, $a\\};",
5069 def SUST_B_3D_V4B32_TRAP
5071 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5072 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5073 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5074 "\\{$r, $g, $b, $a\\};",
5079 def SUST_B_1D_B8_ZERO
5081 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5082 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5084 def SUST_B_1D_B16_ZERO
5086 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5087 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5089 def SUST_B_1D_B32_ZERO
5091 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5092 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5094 def SUST_B_1D_B64_ZERO
5096 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5097 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5099 def SUST_B_1D_V2B8_ZERO
5101 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5102 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5104 def SUST_B_1D_V2B16_ZERO
5106 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5107 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5109 def SUST_B_1D_V2B32_ZERO
5111 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5112 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5114 def SUST_B_1D_V2B64_ZERO
5116 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5117 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5119 def SUST_B_1D_V4B8_ZERO
5121 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5122 Int16Regs:$b, Int16Regs:$a),
5123 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5125 def SUST_B_1D_V4B16_ZERO
5127 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5128 Int16Regs:$b, Int16Regs:$a),
5129 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5131 def SUST_B_1D_V4B32_ZERO
5133 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5134 Int32Regs:$b, Int32Regs:$a),
5135 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5139 def SUST_B_1D_ARRAY_B8_ZERO
5141 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5142 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5144 def SUST_B_1D_ARRAY_B16_ZERO
5146 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5147 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5149 def SUST_B_1D_ARRAY_B32_ZERO
5151 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5152 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5154 def SUST_B_1D_ARRAY_B64_ZERO
5156 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5157 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5159 def SUST_B_1D_ARRAY_V2B8_ZERO
5161 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5163 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5165 def SUST_B_1D_ARRAY_V2B16_ZERO
5167 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5169 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5171 def SUST_B_1D_ARRAY_V2B32_ZERO
5173 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5175 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5177 def SUST_B_1D_ARRAY_V2B64_ZERO
5179 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5181 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5183 def SUST_B_1D_ARRAY_V4B8_ZERO
5185 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5186 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5187 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5188 "\\{$r, $g, $b, $a\\};",
5190 def SUST_B_1D_ARRAY_V4B16_ZERO
5192 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5193 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5194 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5195 "\\{$r, $g, $b, $a\\};",
5197 def SUST_B_1D_ARRAY_V4B32_ZERO
5199 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5200 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5201 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5202 "\\{$r, $g, $b, $a\\};",
5206 def SUST_B_2D_B8_ZERO
5208 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5209 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5211 def SUST_B_2D_B16_ZERO
5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5214 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5216 def SUST_B_2D_B32_ZERO
5218 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5219 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5221 def SUST_B_2D_B64_ZERO
5223 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5224 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5226 def SUST_B_2D_V2B8_ZERO
5228 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5230 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5232 def SUST_B_2D_V2B16_ZERO
5234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5236 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5238 def SUST_B_2D_V2B32_ZERO
5240 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5242 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5244 def SUST_B_2D_V2B64_ZERO
5246 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5248 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5250 def SUST_B_2D_V4B8_ZERO
5252 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5253 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5254 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5255 "\\{$r, $g, $b, $a\\};",
5257 def SUST_B_2D_V4B16_ZERO
5259 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5260 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5261 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5262 "\\{$r, $g, $b, $a\\};",
5264 def SUST_B_2D_V4B32_ZERO
5266 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5267 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5268 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5269 "\\{$r, $g, $b, $a\\};",
5273 def SUST_B_2D_ARRAY_B8_ZERO
5275 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5277 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5279 def SUST_B_2D_ARRAY_B16_ZERO
5281 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5283 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5285 def SUST_B_2D_ARRAY_B32_ZERO
5287 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5289 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5291 def SUST_B_2D_ARRAY_B64_ZERO
5293 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5295 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5297 def SUST_B_2D_ARRAY_V2B8_ZERO
5299 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5300 Int16Regs:$r, Int16Regs:$g),
5301 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5304 def SUST_B_2D_ARRAY_V2B16_ZERO
5306 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5307 Int16Regs:$r, Int16Regs:$g),
5308 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5311 def SUST_B_2D_ARRAY_V2B32_ZERO
5313 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5314 Int32Regs:$r, Int32Regs:$g),
5315 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5318 def SUST_B_2D_ARRAY_V2B64_ZERO
5320 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5321 Int64Regs:$r, Int64Regs:$g),
5322 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5325 def SUST_B_2D_ARRAY_V4B8_ZERO
5327 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5328 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5329 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5330 "\\{$r, $g, $b, $a\\};",
5332 def SUST_B_2D_ARRAY_V4B16_ZERO
5334 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5335 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5336 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5337 "\\{$r, $g, $b, $a\\};",
5339 def SUST_B_2D_ARRAY_V4B32_ZERO
5341 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5342 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5343 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5344 "\\{$r, $g, $b, $a\\};",
5348 def SUST_B_3D_B8_ZERO
5350 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5354 def SUST_B_3D_B16_ZERO
5356 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5358 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5360 def SUST_B_3D_B32_ZERO
5362 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5364 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5366 def SUST_B_3D_B64_ZERO
5368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5370 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5372 def SUST_B_3D_V2B8_ZERO
5374 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5375 Int16Regs:$r, Int16Regs:$g),
5376 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5379 def SUST_B_3D_V2B16_ZERO
5381 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5382 Int16Regs:$r, Int16Regs:$g),
5383 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5386 def SUST_B_3D_V2B32_ZERO
5388 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5389 Int32Regs:$r, Int32Regs:$g),
5390 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5393 def SUST_B_3D_V2B64_ZERO
5395 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5396 Int64Regs:$r, Int64Regs:$g),
5397 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5400 def SUST_B_3D_V4B8_ZERO
5402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5403 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5404 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5405 "\\{$r, $g, $b, $a\\};",
5407 def SUST_B_3D_V4B16_ZERO
5409 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5410 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5411 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5412 "\\{$r, $g, $b, $a\\};",
5414 def SUST_B_3D_V4B32_ZERO
5416 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5417 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5418 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5419 "\\{$r, $g, $b, $a\\};",
5426 def SUST_P_1D_B8_TRAP
5428 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5429 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5431 def SUST_P_1D_B16_TRAP
5433 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5434 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5436 def SUST_P_1D_B32_TRAP
5438 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5439 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5441 def SUST_P_1D_V2B8_TRAP
5443 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5444 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5446 def SUST_P_1D_V2B16_TRAP
5448 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5449 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5451 def SUST_P_1D_V2B32_TRAP
5453 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5454 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5456 def SUST_P_1D_V4B8_TRAP
5458 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5459 Int16Regs:$b, Int16Regs:$a),
5460 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5462 def SUST_P_1D_V4B16_TRAP
5464 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5465 Int16Regs:$b, Int16Regs:$a),
5466 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5468 def SUST_P_1D_V4B32_TRAP
5470 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5471 Int32Regs:$b, Int32Regs:$a),
5472 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5476 def SUST_P_1D_ARRAY_B8_TRAP
5478 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5479 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5481 def SUST_P_1D_ARRAY_B16_TRAP
5483 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5484 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5486 def SUST_P_1D_ARRAY_B32_TRAP
5488 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5489 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5491 def SUST_P_1D_ARRAY_V2B8_TRAP
5493 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5495 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5497 def SUST_P_1D_ARRAY_V2B16_TRAP
5499 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5501 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5503 def SUST_P_1D_ARRAY_V2B32_TRAP
5505 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5507 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5509 def SUST_P_1D_ARRAY_V4B8_TRAP
5511 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5512 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5513 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5514 "\\{$r, $g, $b, $a\\};",
5516 def SUST_P_1D_ARRAY_V4B16_TRAP
5518 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5519 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5521 "\\{$r, $g, $b, $a\\};",
5523 def SUST_P_1D_ARRAY_V4B32_TRAP
5525 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5526 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5527 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5528 "\\{$r, $g, $b, $a\\};",
5532 def SUST_P_2D_B8_TRAP
5534 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5535 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5537 def SUST_P_2D_B16_TRAP
5539 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5540 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5542 def SUST_P_2D_B32_TRAP
5544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5545 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5547 def SUST_P_2D_V2B8_TRAP
5549 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5551 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5553 def SUST_P_2D_V2B16_TRAP
5555 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5557 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5559 def SUST_P_2D_V2B32_TRAP
5561 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5563 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5565 def SUST_P_2D_V4B8_TRAP
5567 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5568 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5569 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5570 "\\{$r, $g, $b, $a\\};",
5572 def SUST_P_2D_V4B16_TRAP
5574 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5575 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5576 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5577 "\\{$r, $g, $b, $a\\};",
5579 def SUST_P_2D_V4B32_TRAP
5581 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5582 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5583 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5584 "\\{$r, $g, $b, $a\\};",
5588 def SUST_P_2D_ARRAY_B8_TRAP
5590 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5592 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5594 def SUST_P_2D_ARRAY_B16_TRAP
5596 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5598 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5600 def SUST_P_2D_ARRAY_B32_TRAP
5602 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5604 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5606 def SUST_P_2D_ARRAY_V2B8_TRAP
5608 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5609 Int16Regs:$r, Int16Regs:$g),
5610 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5613 def SUST_P_2D_ARRAY_V2B16_TRAP
5615 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5616 Int16Regs:$r, Int16Regs:$g),
5617 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5620 def SUST_P_2D_ARRAY_V2B32_TRAP
5622 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5623 Int32Regs:$r, Int32Regs:$g),
5624 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5627 def SUST_P_2D_ARRAY_V4B8_TRAP
5629 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5630 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5631 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5632 "\\{$r, $g, $b, $a\\};",
5634 def SUST_P_2D_ARRAY_V4B16_TRAP
5636 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5637 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5638 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5639 "\\{$r, $g, $b, $a\\};",
5641 def SUST_P_2D_ARRAY_V4B32_TRAP
5643 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5644 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5645 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5646 "\\{$r, $g, $b, $a\\};",
5650 def SUST_P_3D_B8_TRAP
5652 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5654 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5656 def SUST_P_3D_B16_TRAP
5658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5660 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5662 def SUST_P_3D_B32_TRAP
5664 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5666 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5668 def SUST_P_3D_V2B8_TRAP
5670 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5671 Int16Regs:$r, Int16Regs:$g),
5672 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5675 def SUST_P_3D_V2B16_TRAP
5677 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678 Int16Regs:$r, Int16Regs:$g),
5679 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5682 def SUST_P_3D_V2B32_TRAP
5684 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685 Int32Regs:$r, Int32Regs:$g),
5686 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5689 def SUST_P_3D_V4B8_TRAP
5691 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5692 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5693 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5694 "\\{$r, $g, $b, $a\\};",
5696 def SUST_P_3D_V4B16_TRAP
5698 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5699 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5700 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5701 "\\{$r, $g, $b, $a\\};",
5703 def SUST_P_3D_V4B32_TRAP
5705 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5706 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5707 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5708 "\\{$r, $g, $b, $a\\};",
5712 // Surface store instruction patterns
5713 // I'm not sure why we can't just include these in the instruction definitions,
5714 // but TableGen complains of type errors :(
5717 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5718 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5719 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5721 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5722 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5723 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5725 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5726 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5727 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5729 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5730 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5731 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5733 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5734 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5735 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5736 Int16Regs:$r, Int16Regs:$g)>;
5738 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5739 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5740 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5741 Int16Regs:$r, Int16Regs:$g)>;
5743 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5744 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5745 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5746 Int32Regs:$r, Int32Regs:$g)>;
5748 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5749 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5750 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5751 Int64Regs:$r, Int64Regs:$g)>;
5753 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5754 Int64Regs:$s, Int32Regs:$x,
5755 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5756 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5757 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5759 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5760 Int64Regs:$s, Int32Regs:$x,
5761 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5762 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5763 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5765 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5766 Int64Regs:$s, Int32Regs:$x,
5767 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5768 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5769 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5773 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5774 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5775 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5778 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5780 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5783 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5784 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5785 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5788 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5789 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5790 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5793 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5794 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5795 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5796 Int16Regs:$r, Int16Regs:$g)>;
5798 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5799 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5800 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801 Int16Regs:$r, Int16Regs:$g)>;
5803 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5804 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5805 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5806 Int32Regs:$r, Int32Regs:$g)>;
5808 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5809 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5810 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5811 Int64Regs:$r, Int64Regs:$g)>;
5813 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5814 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5815 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5816 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5817 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5819 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5820 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5821 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5822 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5823 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5825 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5826 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5827 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5828 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5829 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5833 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5834 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5835 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5838 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5839 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5840 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5843 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5844 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5845 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5848 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5849 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5850 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5853 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5854 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5855 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5856 Int16Regs:$r, Int16Regs:$g)>;
5858 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5859 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5860 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861 Int16Regs:$r, Int16Regs:$g)>;
5863 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5864 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5865 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5866 Int32Regs:$r, Int32Regs:$g)>;
5868 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5869 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5870 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5871 Int64Regs:$r, Int64Regs:$g)>;
5873 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5874 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5875 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5876 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5877 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5879 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5880 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5881 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5882 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5883 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5885 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5886 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5887 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5888 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5889 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5893 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5894 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5895 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
5896 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5899 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5900 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5901 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
5902 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5905 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5906 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5907 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
5908 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5911 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5912 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5913 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
5914 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5917 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5918 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5919 Int16Regs:$r, Int16Regs:$g),
5920 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
5921 Int32Regs:$x, Int32Regs:$y,
5922 Int16Regs:$r, Int16Regs:$g)>;
5924 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5925 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5926 Int16Regs:$r, Int16Regs:$g),
5927 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
5928 Int32Regs:$x, Int32Regs:$y,
5929 Int16Regs:$r, Int16Regs:$g)>;
5931 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5932 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5934 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5935 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5937 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5938 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5940 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
5941 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5943 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5944 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5945 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5946 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
5947 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5948 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5950 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5951 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5952 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5953 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
5954 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5955 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5957 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5958 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5959 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5960 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5961 Int32Regs:$x, Int32Regs:$y,
5962 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5966 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5969 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
5970 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5973 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5974 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5976 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
5977 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5980 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5981 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5983 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
5984 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5987 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5988 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5990 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
5991 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5994 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5995 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996 Int16Regs:$r, Int16Regs:$g),
5997 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
5998 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5999 Int16Regs:$r, Int16Regs:$g)>;
6001 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6002 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003 Int16Regs:$r, Int16Regs:$g),
6004 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6005 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6006 Int16Regs:$r, Int16Regs:$g)>;
6008 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6009 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010 Int32Regs:$r, Int32Regs:$g),
6011 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6012 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6013 Int32Regs:$r, Int32Regs:$g)>;
6015 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6016 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6017 Int64Regs:$r, Int64Regs:$g),
6018 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6019 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6020 Int64Regs:$r, Int64Regs:$g)>;
6022 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6023 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6024 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6025 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6026 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6027 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6029 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6030 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6031 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6032 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6033 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6034 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6036 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6037 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6038 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6039 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6040 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6041 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6045 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6046 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6047 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6049 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6050 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6051 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6053 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6054 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6055 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6057 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6058 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6059 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6061 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6062 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6063 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6064 Int16Regs:$r, Int16Regs:$g)>;
6066 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6067 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6068 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6069 Int16Regs:$r, Int16Regs:$g)>;
6071 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6072 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6073 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6074 Int32Regs:$r, Int32Regs:$g)>;
6076 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6077 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6078 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6079 Int64Regs:$r, Int64Regs:$g)>;
6081 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6082 Int64Regs:$s, Int32Regs:$x,
6083 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6084 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6085 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6087 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6088 Int64Regs:$s, Int32Regs:$x,
6089 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6090 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6091 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6093 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6094 Int64Regs:$s, Int32Regs:$x,
6095 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6096 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6097 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6101 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6103 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6106 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6108 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6113 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6116 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6118 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6121 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6123 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6124 Int16Regs:$r, Int16Regs:$g)>;
6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6128 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129 Int16Regs:$r, Int16Regs:$g)>;
6131 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6133 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134 Int32Regs:$r, Int32Regs:$g)>;
6136 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6137 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6138 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139 Int64Regs:$r, Int64Regs:$g)>;
6141 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6143 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6144 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6145 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6147 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6149 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6150 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6151 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6153 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6154 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6155 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6156 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6157 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6161 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6163 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6166 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6168 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6171 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6173 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6176 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6178 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6181 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6183 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184 Int16Regs:$r, Int16Regs:$g)>;
6186 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6188 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189 Int16Regs:$r, Int16Regs:$g)>;
6191 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6193 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194 Int32Regs:$r, Int32Regs:$g)>;
6196 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6198 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199 Int64Regs:$r, Int64Regs:$g)>;
6201 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6203 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6204 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6205 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6207 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6208 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6209 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6210 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6211 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6213 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6214 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6215 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6216 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6217 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6221 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6222 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6223 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6224 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6227 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6228 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6229 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6230 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6233 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6234 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6235 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6236 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6239 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6240 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6241 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6242 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6245 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6246 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6247 Int16Regs:$r, Int16Regs:$g),
6248 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6249 Int32Regs:$x, Int32Regs:$y,
6250 Int16Regs:$r, Int16Regs:$g)>;
6252 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6253 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6254 Int16Regs:$r, Int16Regs:$g),
6255 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6256 Int32Regs:$x, Int32Regs:$y,
6257 Int16Regs:$r, Int16Regs:$g)>;
6259 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6260 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6262 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6263 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6265 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6266 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6268 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6269 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6271 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6272 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6274 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6275 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6276 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6278 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6279 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6280 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6281 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6282 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6283 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6285 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6286 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6287 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6288 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6289 Int32Regs:$x, Int32Regs:$y,
6290 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6294 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6295 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6298 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6301 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6302 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6305 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6308 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6309 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6312 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6315 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6316 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6319 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6322 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6323 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324 Int16Regs:$r, Int16Regs:$g),
6325 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6326 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6327 Int16Regs:$r, Int16Regs:$g)>;
6329 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6330 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331 Int16Regs:$r, Int16Regs:$g),
6332 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6333 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6334 Int16Regs:$r, Int16Regs:$g)>;
6336 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6337 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338 Int32Regs:$r, Int32Regs:$g),
6339 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6340 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6341 Int32Regs:$r, Int32Regs:$g)>;
6343 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6344 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345 Int64Regs:$r, Int64Regs:$g),
6346 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6347 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6348 Int64Regs:$r, Int64Regs:$g)>;
6350 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6351 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6352 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6353 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6354 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6355 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6357 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6358 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6359 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6360 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6361 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6362 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6364 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6365 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6366 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6367 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6368 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6369 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6373 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6374 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6375 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6377 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6378 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6379 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6381 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6382 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6383 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6385 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6386 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6387 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6389 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6390 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6391 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6392 Int16Regs:$r, Int16Regs:$g)>;
6394 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6395 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6396 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6397 Int16Regs:$r, Int16Regs:$g)>;
6399 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6400 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6401 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6402 Int32Regs:$r, Int32Regs:$g)>;
6404 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6405 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6406 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6407 Int64Regs:$r, Int64Regs:$g)>;
6409 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6410 Int64Regs:$s, Int32Regs:$x,
6411 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6412 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6413 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6415 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6416 Int64Regs:$s, Int32Regs:$x,
6417 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6418 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6419 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6421 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6422 Int64Regs:$s, Int32Regs:$x,
6423 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6424 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6425 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6429 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6430 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6431 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6434 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6436 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6439 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6441 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6444 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6446 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6449 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6451 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6452 Int16Regs:$r, Int16Regs:$g)>;
6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6456 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457 Int16Regs:$r, Int16Regs:$g)>;
6459 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6461 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6462 Int32Regs:$r, Int32Regs:$g)>;
6464 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6465 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6466 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467 Int64Regs:$r, Int64Regs:$g)>;
6469 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6471 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6472 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6473 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6475 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6476 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6477 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6478 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6479 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6481 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6482 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6483 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6484 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6485 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6489 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6490 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6491 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6494 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6496 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6499 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6501 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6504 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6506 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6509 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6511 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6512 Int16Regs:$r, Int16Regs:$g)>;
6514 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6516 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517 Int16Regs:$r, Int16Regs:$g)>;
6519 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6521 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6522 Int32Regs:$r, Int32Regs:$g)>;
6524 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6526 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527 Int64Regs:$r, Int64Regs:$g)>;
6529 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6531 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6532 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6533 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6535 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6536 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6537 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6538 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6539 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6541 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6542 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6543 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6544 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6545 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6549 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6550 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6551 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6552 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6555 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6556 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6557 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6558 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6561 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6562 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6563 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6564 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6567 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6568 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6569 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6570 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6573 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6574 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6575 Int16Regs:$r, Int16Regs:$g),
6576 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6577 Int32Regs:$x, Int32Regs:$y,
6578 Int16Regs:$r, Int16Regs:$g)>;
6580 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6581 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6582 Int16Regs:$r, Int16Regs:$g),
6583 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6584 Int32Regs:$x, Int32Regs:$y,
6585 Int16Regs:$r, Int16Regs:$g)>;
6587 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6588 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6590 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6591 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6593 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6594 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6596 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6597 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6599 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6600 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6601 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6602 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6603 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6604 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6606 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6607 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6608 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6609 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6610 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6611 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6613 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6614 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6615 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6616 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6617 Int32Regs:$x, Int32Regs:$y,
6618 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6622 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6623 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6626 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6629 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6630 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6633 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6636 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6637 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6640 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6643 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6644 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6647 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6650 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6651 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6652 Int16Regs:$r, Int16Regs:$g),
6653 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6654 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6655 Int16Regs:$r, Int16Regs:$g)>;
6657 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6658 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6659 Int16Regs:$r, Int16Regs:$g),
6660 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6661 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6662 Int16Regs:$r, Int16Regs:$g)>;
6664 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6665 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6666 Int32Regs:$r, Int32Regs:$g),
6667 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6668 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6669 Int32Regs:$r, Int32Regs:$g)>;
6671 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6672 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6673 Int64Regs:$r, Int64Regs:$g),
6674 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6675 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6676 Int64Regs:$r, Int64Regs:$g)>;
6678 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6679 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6680 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6681 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6682 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6683 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6685 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6686 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6687 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6688 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6689 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6690 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6692 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6693 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6694 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6695 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6696 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6697 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6702 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6703 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6704 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6706 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6707 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6708 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6710 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6711 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6712 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6714 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6715 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6716 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6717 Int16Regs:$r, Int16Regs:$g)>;
6719 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6720 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6721 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6722 Int16Regs:$r, Int16Regs:$g)>;
6724 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6725 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6726 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6727 Int32Regs:$r, Int32Regs:$g)>;
6729 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6730 Int64Regs:$s, Int32Regs:$x,
6731 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6732 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6733 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6735 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6736 Int64Regs:$s, Int32Regs:$x,
6737 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6738 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6739 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6741 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6742 Int64Regs:$s, Int32Regs:$x,
6743 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6744 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6745 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6749 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6750 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6751 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6754 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6755 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6756 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6759 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6760 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6761 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6764 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6765 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6766 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6767 Int16Regs:$r, Int16Regs:$g)>;
6769 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6770 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6771 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6772 Int16Regs:$r, Int16Regs:$g)>;
6774 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6775 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6776 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6777 Int32Regs:$r, Int32Regs:$g)>;
6779 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6780 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6781 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6782 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6783 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6785 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6786 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6787 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6788 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6789 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6791 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6792 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6793 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6794 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6799 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6800 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6801 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6804 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6805 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6806 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6809 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6811 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6814 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6815 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6816 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817 Int16Regs:$r, Int16Regs:$g)>;
6819 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6820 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6821 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6822 Int16Regs:$r, Int16Regs:$g)>;
6824 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6825 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6826 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6827 Int32Regs:$r, Int32Regs:$g)>;
6829 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6830 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6831 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6832 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6833 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6835 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6836 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6837 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6838 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6839 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6841 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6842 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6843 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6844 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6849 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6850 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6851 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
6852 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6855 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6856 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6857 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
6858 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6861 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6862 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6863 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
6864 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6867 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6869 Int16Regs:$r, Int16Regs:$g),
6870 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6871 Int32Regs:$x, Int32Regs:$y,
6872 Int16Regs:$r, Int16Regs:$g)>;
6874 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6875 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6876 Int16Regs:$r, Int16Regs:$g),
6877 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6878 Int32Regs:$x, Int32Regs:$y,
6879 Int16Regs:$r, Int16Regs:$g)>;
6881 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6882 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6884 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6885 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6887 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6888 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6889 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6890 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6891 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6892 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6894 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6895 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6896 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6897 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6898 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6899 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6901 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6902 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6903 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6904 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6905 Int32Regs:$x, Int32Regs:$y,
6906 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6910 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6911 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6913 (SUST_P_3D_B8_TRAP Int64Regs:$s,
6914 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6917 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6918 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6920 (SUST_P_3D_B16_TRAP Int64Regs:$s,
6921 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6924 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6925 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6927 (SUST_P_3D_B32_TRAP Int64Regs:$s,
6928 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6931 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6932 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6933 Int16Regs:$r, Int16Regs:$g),
6934 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
6935 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6936 Int16Regs:$r, Int16Regs:$g)>;
6938 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6939 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6940 Int16Regs:$r, Int16Regs:$g),
6941 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
6942 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6943 Int16Regs:$r, Int16Regs:$g)>;
6945 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6946 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6947 Int32Regs:$r, Int32Regs:$g),
6948 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
6949 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6950 Int32Regs:$r, Int32Regs:$g)>;
6952 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6953 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6954 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6955 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
6956 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6957 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6959 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6960 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6961 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6962 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
6963 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6964 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6966 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6968 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6969 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
6970 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6971 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6973 //-----------------------------------
6974 // Read Special Registers
6975 //-----------------------------------
6977 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
6978 : NVPTXInst<(outs Int64Regs:$d), (ins),
6979 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
6980 [(set Int64Regs:$d, (intop))]>;
6982 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
6983 : NVPTXInst<(outs Int32Regs:$d), (ins),
6984 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
6985 [(set Int32Regs:$d, (intop))]>;
6987 // TODO Add read vector-version of special registers
6989 def INT_PTX_SREG_TID_X :
6990 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6991 def INT_PTX_SREG_TID_Y :
6992 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6993 def INT_PTX_SREG_TID_Z :
6994 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6995 def INT_PTX_SREG_TID_W :
6996 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6998 def INT_PTX_SREG_NTID_X :
6999 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7000 def INT_PTX_SREG_NTID_Y :
7001 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7002 def INT_PTX_SREG_NTID_Z :
7003 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7004 def INT_PTX_SREG_NTID_W :
7005 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7007 def INT_PTX_SREG_LANEID :
7008 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7009 def INT_PTX_SREG_WARPID :
7010 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7011 def INT_PTX_SREG_NWARPID :
7012 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7014 def INT_PTX_SREG_CTAID_X :
7015 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7016 def INT_PTX_SREG_CTAID_Y :
7017 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7018 def INT_PTX_SREG_CTAID_Z :
7019 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7020 def INT_PTX_SREG_CTAID_W :
7021 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7023 def INT_PTX_SREG_NCTAID_X :
7024 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7025 def INT_PTX_SREG_NCTAID_Y :
7026 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7027 def INT_PTX_SREG_NCTAID_Z :
7028 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7029 def INT_PTX_SREG_NCTAID_W :
7030 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7032 def INT_PTX_SREG_SMID :
7033 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7034 def INT_PTX_SREG_NSMID :
7035 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7036 def INT_PTX_SREG_GRIDID :
7037 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7039 def INT_PTX_SREG_LANEMASK_EQ :
7040 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7041 def INT_PTX_SREG_LANEMASK_LE :
7042 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7043 def INT_PTX_SREG_LANEMASK_LT :
7044 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7045 def INT_PTX_SREG_LANEMASK_GE :
7046 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7047 def INT_PTX_SREG_LANEMASK_GT :
7048 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7050 def INT_PTX_SREG_CLOCK :
7051 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7052 def INT_PTX_SREG_CLOCK64 :
7053 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7055 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7056 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7057 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7058 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7060 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7061 // handle the constant.
7062 def INT_PTX_SREG_WARPSIZE :
7063 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7064 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;