1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 def immFloat0 : PatLeaf<(fpimm), [{
11 float f = (float)N->getValueAPF().convertToFloat();
15 def immFloat1 : PatLeaf<(fpimm), [{
16 float f = (float)N->getValueAPF().convertToFloat();
20 def immDouble0 : PatLeaf<(fpimm), [{
21 double d = (double)N->getValueAPF().convertToDouble();
25 def immDouble1 : PatLeaf<(fpimm), [{
26 double d = (double)N->getValueAPF().convertToDouble();
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
38 [(int_nvvm_barrier0)]>;
39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
41 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
43 "bar.sync \t$src1, $src2;",
44 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
47 ".reg .pred \t%p1; \n\t",
48 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
49 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
51 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
54 ".reg .pred \t%p1; \n\t",
55 ".reg .pred \t%p2; \n\t",
56 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
57 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
58 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
60 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
63 ".reg .pred \t%p1; \n\t",
64 ".reg .pred \t%p2; \n\t",
65 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
66 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
67 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
69 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
72 [(int_nvvm_bar_sync imm:$i)]>;
74 // shfl.{up,down,bfly,idx}.b32
75 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
76 // The last two parameters to shfl can be regs or imms. ptxas is smart
77 // enough to inline constant registers, so strictly speaking we don't need to
78 // handle immediates here. But it's easy enough, and it makes our ptx more
82 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
83 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
84 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
88 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
89 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
90 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
94 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
95 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
96 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
100 (ins regclass:$src, i32imm:$offset, i32imm:$mask),
101 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
102 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
105 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
106 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
107 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
108 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
109 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
110 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
111 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
112 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
114 } // isConvergent = 1
117 //-----------------------------------
118 // Explicit Memory Fence Functions
119 //-----------------------------------
120 class MEMBAR<string StrOp, Intrinsic IntOP> :
121 NVPTXInst<(outs), (ins),
124 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
125 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
126 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
129 //-----------------------------------
131 //-----------------------------------
133 // Map min(1.0, max(0.0, x)) to sat(x)
134 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
136 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
137 // Same story for fmax, fmin.
139 def : Pat<(int_nvvm_fmin_f immFloat1,
140 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
141 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
142 def : Pat<(int_nvvm_fmin_f immFloat1,
143 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
144 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
145 def : Pat<(int_nvvm_fmin_f
146 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
147 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
148 def : Pat<(int_nvvm_fmin_f
149 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
150 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
152 def : Pat<(int_nvvm_fmin_d immDouble1,
153 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
154 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
155 def : Pat<(int_nvvm_fmin_d immDouble1,
156 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
157 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
158 def : Pat<(int_nvvm_fmin_d
159 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
160 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
161 def : Pat<(int_nvvm_fmin_d
162 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
163 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
166 // We need a full string for OpcStr here because we need to deal with case like
168 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
169 NVPTXRegClass src_regclass, Intrinsic IntOP>
170 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
172 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
174 // We need a full string for OpcStr here because we need to deal with the case
175 // like INT_PTX_NATIVE_POWR_F.
176 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
177 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
178 : NVPTXInst<(outs t_regclass:$dst),
179 (ins s0_regclass:$src0, s1_regclass:$src1),
181 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
183 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
184 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
185 NVPTXRegClass s2_regclass, Intrinsic IntOP>
186 : NVPTXInst<(outs t_regclass:$dst),
187 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
189 [(set t_regclass:$dst,
190 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
196 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
197 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
203 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
204 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
205 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
206 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
208 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
209 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
210 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
211 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
213 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
214 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
215 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
216 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
223 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
224 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
225 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
226 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
228 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
229 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
230 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
231 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
233 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
234 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
235 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
236 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
237 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
238 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
239 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
240 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
241 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
242 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
243 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
244 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
245 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
246 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
247 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
248 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
250 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
251 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
252 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
253 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
254 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
255 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
256 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
257 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
259 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
260 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
261 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
262 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
268 def INT_NVVM_DIV_APPROX_FTZ_F
269 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
270 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
271 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
272 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
274 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
275 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
276 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
277 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
278 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
279 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
280 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
281 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
282 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
283 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
284 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
285 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
286 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
287 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
288 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
289 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
291 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
292 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
293 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
294 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
295 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
296 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
297 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
298 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
304 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
305 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
306 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
307 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
313 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
314 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
315 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
316 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
317 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
318 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
320 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
321 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
322 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
323 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
324 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
325 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
331 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
332 Float32Regs, int_nvvm_fabs_ftz_f>;
333 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
334 Float32Regs, int_nvvm_fabs_f>;
336 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
337 Float64Regs, int_nvvm_fabs_d>;
343 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
344 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
345 def : Pat<(int_nvvm_round_f Float32Regs:$a),
346 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
347 def : Pat<(int_nvvm_round_d Float64Regs:$a),
348 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
354 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
355 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
356 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
357 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
358 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
359 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
365 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
366 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
367 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
368 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
369 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
370 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
376 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
377 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
378 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
379 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
380 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
381 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
383 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
384 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
385 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
386 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
387 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
388 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
394 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
395 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
396 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
397 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
399 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
400 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
401 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
402 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
408 def INT_NVVM_FMA_RN_FTZ_F
409 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
410 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
411 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
412 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
413 def INT_NVVM_FMA_RZ_FTZ_F
414 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
415 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
416 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
417 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
418 def INT_NVVM_FMA_RM_FTZ_F
419 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
420 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
421 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
422 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
423 def INT_NVVM_FMA_RP_FTZ_F
424 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
425 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
426 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
427 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
429 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
430 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
431 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
432 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
433 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
434 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
435 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
436 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
442 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
443 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
444 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
445 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
446 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
447 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
448 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
449 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
450 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
451 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
452 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
453 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
454 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
455 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
456 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
457 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
459 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
460 Float64Regs, int_nvvm_rcp_rn_d>;
461 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
462 Float64Regs, int_nvvm_rcp_rz_d>;
463 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
464 Float64Regs, int_nvvm_rcp_rm_d>;
465 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
466 Float64Regs, int_nvvm_rcp_rp_d>;
468 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
469 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
475 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
476 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
477 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
478 Float32Regs, int_nvvm_sqrt_rn_f>;
479 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
480 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
481 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
482 Float32Regs, int_nvvm_sqrt_rz_f>;
483 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
484 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
485 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
486 Float32Regs, int_nvvm_sqrt_rm_f>;
487 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
488 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
489 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
490 Float32Regs, int_nvvm_sqrt_rp_f>;
491 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
492 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
493 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
494 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
496 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
497 Float64Regs, int_nvvm_sqrt_rn_d>;
498 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
499 Float64Regs, int_nvvm_sqrt_rz_d>;
500 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
501 Float64Regs, int_nvvm_sqrt_rm_d>;
502 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
503 Float64Regs, int_nvvm_sqrt_rp_d>;
505 // nvvm_sqrt intrinsic
506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
507 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
508 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
509 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
510 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
511 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
512 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
513 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
519 def INT_NVVM_RSQRT_APPROX_FTZ_F
520 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
521 int_nvvm_rsqrt_approx_ftz_f>;
522 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
523 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
524 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
525 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
531 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
532 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
533 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
534 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
535 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
536 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
537 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
538 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
539 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
540 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
541 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
542 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
543 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
544 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
545 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
546 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
548 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
549 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
550 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
551 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
552 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
553 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
554 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
555 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
561 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
562 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
563 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
564 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
565 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
566 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
567 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
568 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
569 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
570 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
571 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
572 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
573 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
574 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
575 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
576 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
578 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
579 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
580 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
581 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
582 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
583 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
584 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
585 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
587 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
588 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
589 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
590 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
591 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
592 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
593 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
594 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
596 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
597 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
598 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
599 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
600 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
601 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
602 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
603 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
605 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
606 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
607 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
608 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
609 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
610 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
611 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
612 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
614 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
615 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
616 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
617 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
618 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
619 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
620 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
621 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
622 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
623 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
624 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
625 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
626 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
627 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
628 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
629 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
631 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
632 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
633 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
634 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
635 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
636 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
637 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
638 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
639 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
640 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
641 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
642 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
643 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
644 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
645 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
646 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
648 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
649 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
650 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
651 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
652 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
653 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
654 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
655 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
657 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
658 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
659 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
660 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
661 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
662 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
663 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
664 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
666 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
667 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
669 def INT_NVVM_D2I_LO : F_MATH_1<
671 ".reg .b32 %temp; \n\t",
672 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
674 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
675 def INT_NVVM_D2I_HI : F_MATH_1<
677 ".reg .b32 %temp; \n\t",
678 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
680 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
682 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
683 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
684 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
685 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
686 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
687 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
688 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
689 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
690 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
691 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
692 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
693 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
694 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
695 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
696 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
697 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
699 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
700 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
701 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
702 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
703 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
704 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
705 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
706 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
707 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
708 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
709 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
710 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
711 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
712 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
713 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
714 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
716 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
717 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
718 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
719 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
720 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
721 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
722 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
723 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
725 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
726 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
727 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
728 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
729 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
730 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
731 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
732 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
734 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
735 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
736 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
737 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
738 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
739 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
740 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
741 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
743 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
744 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
745 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
746 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
747 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
748 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
749 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
750 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
752 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
753 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
754 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
755 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
756 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
757 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
758 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
759 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
761 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
762 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
763 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
764 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
765 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
766 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
767 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
768 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
771 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
772 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
773 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
774 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
780 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
781 Float32Regs, int_nvvm_bitcast_f2i>;
782 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
783 Int32Regs, int_nvvm_bitcast_i2f>;
785 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
786 Int64Regs, int_nvvm_bitcast_ll2d>;
787 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
788 Float64Regs, int_nvvm_bitcast_d2ll>;
790 //-----------------------------------
792 //-----------------------------------
794 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
795 : PatFrag<ops, frag, [{
796 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
798 class ATOMIC_SHARED_CHK <dag ops, dag frag>
799 : PatFrag<ops, frag, [{
800 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
802 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
803 : PatFrag<ops, frag, [{
804 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
807 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
808 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
809 Operand IMMType, SDNode IMM, Predicate Pred> {
810 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
811 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
812 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
814 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
815 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
816 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
819 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
820 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
821 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
822 IntOp, IMMType, IMM, Pred>;
823 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
824 IntOp, IMMType, IMM, Pred>;
827 // has 2 operands, neg the second one
828 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
829 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
830 Operand IMMType, Predicate Pred> {
831 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
834 ".reg \t.s", TypeStr, " temp; \n\t",
835 "neg.s", TypeStr, " \ttemp, $b; \n\t",
836 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
838 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
841 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
842 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
844 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
845 IntOp, IMMType, Pred> ;
846 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
847 IntOp, IMMType, Pred> ;
851 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
852 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
853 Operand IMMType, Predicate Pred> {
854 def reg : NVPTXInst<(outs regclass:$dst),
855 (ins ptrclass:$addr, regclass:$b, regclass:$c),
856 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
857 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
860 def imm1 : NVPTXInst<(outs regclass:$dst),
861 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
862 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
863 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
866 def imm2 : NVPTXInst<(outs regclass:$dst),
867 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
868 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
869 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
872 def imm3 : NVPTXInst<(outs regclass:$dst),
873 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
874 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
875 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
878 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
879 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
880 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
881 IntOp, IMMType, Pred>;
882 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
883 IntOp, IMMType, Pred>;
888 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
889 (atomic_load_add_32 node:$a, node:$b)>;
890 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
891 (atomic_load_add_32 node:$a, node:$b)>;
892 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
893 (atomic_load_add_32 node:$a, node:$b)>;
894 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
895 (atomic_load_add_64 node:$a, node:$b)>;
896 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
897 (atomic_load_add_64 node:$a, node:$b)>;
898 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
899 (atomic_load_add_64 node:$a, node:$b)>;
900 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
901 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
902 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
903 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
904 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
905 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
907 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
908 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
909 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
910 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
911 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
912 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
913 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
914 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
916 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
917 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
918 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
919 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
920 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
921 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
922 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
923 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
925 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
926 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
927 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
928 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
929 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
930 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
934 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
935 (atomic_load_sub_32 node:$a, node:$b)>;
936 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
937 (atomic_load_sub_32 node:$a, node:$b)>;
938 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
939 (atomic_load_sub_32 node:$a, node:$b)>;
940 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
941 (atomic_load_sub_64 node:$a, node:$b)>;
942 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
943 (atomic_load_sub_64 node:$a, node:$b)>;
944 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
945 (atomic_load_sub_64 node:$a, node:$b)>;
947 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
948 atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
949 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
950 atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
951 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
952 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
953 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
954 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
955 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
956 atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
957 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
958 atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
959 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
960 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
961 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
962 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
966 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
967 (atomic_swap_32 node:$a, node:$b)>;
968 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
969 (atomic_swap_32 node:$a, node:$b)>;
970 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
971 (atomic_swap_32 node:$a, node:$b)>;
972 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
973 (atomic_swap_64 node:$a, node:$b)>;
974 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
975 (atomic_swap_64 node:$a, node:$b)>;
976 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
977 (atomic_swap_64 node:$a, node:$b)>;
979 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
980 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
981 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
982 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
983 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
984 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
985 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
986 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
987 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
988 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
989 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
990 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
991 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
992 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
993 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
994 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
998 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
999 , (atomic_load_max_32 node:$a, node:$b)>;
1000 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1001 (atomic_load_max_32 node:$a, node:$b)>;
1002 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1003 (atomic_load_max_32 node:$a, node:$b)>;
1004 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1005 , (atomic_load_max_64 node:$a, node:$b)>;
1006 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1007 (atomic_load_max_64 node:$a, node:$b)>;
1008 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1009 (atomic_load_max_64 node:$a, node:$b)>;
1010 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1011 (atomic_load_umax_32 node:$a, node:$b)>;
1012 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1013 (atomic_load_umax_32 node:$a, node:$b)>;
1014 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1015 (atomic_load_umax_32 node:$a, node:$b)>;
1016 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1017 (atomic_load_umax_64 node:$a, node:$b)>;
1018 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1019 (atomic_load_umax_64 node:$a, node:$b)>;
1020 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1021 (atomic_load_umax_64 node:$a, node:$b)>;
1023 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1024 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1025 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1026 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1027 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1028 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1029 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1030 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1031 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1032 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1033 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1034 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1035 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1036 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1037 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1038 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1039 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1040 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1041 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1042 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1043 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1044 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1045 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1046 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1047 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1048 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1049 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1050 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1051 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1052 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1053 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1054 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1058 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1059 (atomic_load_min_32 node:$a, node:$b)>;
1060 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1061 (atomic_load_min_32 node:$a, node:$b)>;
1062 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1063 (atomic_load_min_32 node:$a, node:$b)>;
1064 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1065 (atomic_load_min_64 node:$a, node:$b)>;
1066 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1067 (atomic_load_min_64 node:$a, node:$b)>;
1068 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1069 (atomic_load_min_64 node:$a, node:$b)>;
1070 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1071 (atomic_load_umin_32 node:$a, node:$b)>;
1072 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1073 (atomic_load_umin_32 node:$a, node:$b)>;
1074 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1075 (atomic_load_umin_32 node:$a, node:$b)>;
1076 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1077 (atomic_load_umin_64 node:$a, node:$b)>;
1078 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1079 (atomic_load_umin_64 node:$a, node:$b)>;
1080 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1081 (atomic_load_umin_64 node:$a, node:$b)>;
1083 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1084 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1085 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1086 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1087 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1088 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1089 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1090 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1091 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1092 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1093 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1094 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1095 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1096 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1097 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1098 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1099 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1100 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1101 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1102 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1103 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1104 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1105 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1106 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1107 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1108 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1109 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1110 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1111 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1112 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1113 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1114 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1116 // atom_inc atom_dec
1118 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1119 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1120 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1121 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1122 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1123 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1124 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1125 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1126 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1127 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1128 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1129 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1131 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1132 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1133 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1134 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1135 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1136 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1137 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1138 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1139 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1140 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1141 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1142 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1143 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1144 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1145 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1146 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1150 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1151 (atomic_load_and_32 node:$a, node:$b)>;
1152 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1153 (atomic_load_and_32 node:$a, node:$b)>;
1154 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1155 (atomic_load_and_32 node:$a, node:$b)>;
1156 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1157 (atomic_load_and_64 node:$a, node:$b)>;
1158 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1159 (atomic_load_and_64 node:$a, node:$b)>;
1160 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1161 (atomic_load_and_64 node:$a, node:$b)>;
1163 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1164 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1165 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1166 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1167 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1168 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1169 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1170 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1171 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1172 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1173 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1174 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1175 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1176 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1177 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1178 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1182 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1183 (atomic_load_or_32 node:$a, node:$b)>;
1184 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1185 (atomic_load_or_32 node:$a, node:$b)>;
1186 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1187 (atomic_load_or_32 node:$a, node:$b)>;
1188 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1189 (atomic_load_or_64 node:$a, node:$b)>;
1190 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1191 (atomic_load_or_64 node:$a, node:$b)>;
1192 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1193 (atomic_load_or_64 node:$a, node:$b)>;
1195 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1196 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1197 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1198 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1199 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1200 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1201 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1202 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1203 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1204 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1205 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1206 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1207 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1208 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1209 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1210 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1214 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1215 (atomic_load_xor_32 node:$a, node:$b)>;
1216 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1217 (atomic_load_xor_32 node:$a, node:$b)>;
1218 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1219 (atomic_load_xor_32 node:$a, node:$b)>;
1220 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1221 (atomic_load_xor_64 node:$a, node:$b)>;
1222 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1223 (atomic_load_xor_64 node:$a, node:$b)>;
1224 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1225 (atomic_load_xor_64 node:$a, node:$b)>;
1227 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1228 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1229 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1230 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1231 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1232 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1233 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1234 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1235 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1236 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1237 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1238 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1239 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1240 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1241 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1242 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1246 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1247 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1248 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1249 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1250 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1251 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1252 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1253 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1254 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1255 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1256 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1257 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1259 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1260 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1261 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1262 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1263 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1264 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1265 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1266 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1267 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1268 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1269 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1270 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1271 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1272 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1273 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1274 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1276 // Support for scoped atomic operations. Matches
1277 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1278 // and converts it into the appropriate instruction.
1279 // NOTE: not all possible combinations are implemented
1280 // 'space' is limited to generic as it's the only one needed to support CUDA.
1281 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1282 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1283 dag ins, dag Operands>
1284 : NVPTXInst<(outs regclass:$result), ins,
1286 [(set regclass:$result, Operands)]>,
1289 // Define instruction variants for all addressing modes.
1290 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1291 NVPTXRegClass regclass, Operand ImmType,
1292 SDNode Imm, ValueType ImmTy,
1293 list<Predicate> Preds> {
1294 let AddedComplexity = 1 in {
1295 def : ATOM23_impl<AsmStr, regclass, Preds,
1296 (ins Int32Regs:$src, regclass:$b),
1297 (Intr Int32Regs:$src, regclass:$b)>;
1298 def : ATOM23_impl<AsmStr, regclass, Preds,
1299 (ins Int64Regs:$src, regclass:$b),
1300 (Intr Int64Regs:$src, regclass:$b)>;
1302 // tablegen can't infer argument types from Intrinsic (though it can
1303 // from Instruction) so we have to enforce specific type on
1304 // immediates via explicit cast to ImmTy.
1305 def : ATOM23_impl<AsmStr, regclass, Preds,
1306 (ins Int32Regs:$src, ImmType:$b),
1307 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1308 def : ATOM23_impl<AsmStr, regclass, Preds,
1309 (ins Int64Regs:$src, ImmType:$b),
1310 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1313 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1314 NVPTXRegClass regclass, Operand ImmType,
1315 SDNode Imm, ValueType ImmTy,
1316 list<Predicate> Preds> {
1317 // Variants for register/immediate permutations of $b and $c
1318 let AddedComplexity = 2 in {
1319 def : ATOM23_impl<AsmStr, regclass, Preds,
1320 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1321 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1322 def : ATOM23_impl<AsmStr, regclass, Preds,
1323 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1324 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1326 let AddedComplexity = 1 in {
1327 def : ATOM23_impl<AsmStr, regclass, Preds,
1328 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1329 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1330 def : ATOM23_impl<AsmStr, regclass, Preds,
1331 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1332 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1333 def : ATOM23_impl<AsmStr, regclass, Preds,
1334 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1335 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1336 def : ATOM23_impl<AsmStr, regclass, Preds,
1337 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1338 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1340 def : ATOM23_impl<AsmStr, regclass, Preds,
1341 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1342 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1343 def : ATOM23_impl<AsmStr, regclass, Preds,
1344 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1345 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1348 // Constructs instrinsic name and instruction asm strings.
1349 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1350 string ScopeStr, string SpaceStr,
1351 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1352 ValueType ImmTy, list<Predicate> Preds> {
1353 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1354 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1355 # "." # OpStr # "." # TypeStr
1356 # " \t$result, [$src], $b;",
1358 "int_nvvm_atomic_" # OpStr
1359 # "_" # SpaceStr # "_" # IntTypeStr
1360 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1361 regclass, ImmType, Imm, ImmTy, Preds>;
1363 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1364 string ScopeStr, string SpaceStr,
1365 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1366 ValueType ImmTy, list<Predicate> Preds> {
1367 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1368 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1369 # "." # OpStr # "." # TypeStr
1370 # " \t$result, [$src], $b, $c;",
1372 "int_nvvm_atomic_" # OpStr
1373 # "_" # SpaceStr # "_" # IntTypeStr
1374 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1375 regclass, ImmType, Imm, ImmTy, Preds>;
1378 // Constructs variants for different address spaces.
1379 // For now we only need variants for generic space pointers.
1380 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1381 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1382 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1383 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1384 regclass, ImmType, Imm, ImmTy, Preds>;
1386 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1387 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1388 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1389 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1390 regclass, ImmType, Imm, ImmTy, Preds>;
1393 // Constructs variants for different scopes of atomic op.
1394 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1395 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1396 ValueType ImmTy, list<Predicate> Preds> {
1397 // .gpu scope is default and is currently covered by existing
1398 // atomics w/o explicitly specified scope.
1399 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1400 regclass, ImmType, Imm, ImmTy,
1401 !listconcat(Preds,[hasAtomScope])>;
1402 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1403 regclass, ImmType, Imm, ImmTy,
1404 !listconcat(Preds,[hasAtomScope])>;
1406 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1407 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1408 list<Predicate> Preds> {
1409 // No need to define ".gpu"-scoped atomics. They do the same thing
1410 // as the regular, non-scoped atomics defined elsewhere.
1411 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1412 regclass, ImmType, Imm, ImmTy,
1413 !listconcat(Preds,[hasAtomScope])>;
1414 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1415 regclass, ImmType, Imm, ImmTy,
1416 !listconcat(Preds,[hasAtomScope])>;
1420 multiclass ATOM2_add_impl<string OpStr> {
1421 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1422 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1423 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1424 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1426 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1430 // atom.{and,or,xor}
1431 multiclass ATOM2_bitwise_impl<string OpStr> {
1432 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1433 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1434 [hasAtomBitwise64]>;
1438 multiclass ATOM2_exch_impl<string OpStr> {
1439 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1440 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1444 multiclass ATOM2_minmax_impl<string OpStr> {
1445 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1446 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1447 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1449 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1454 multiclass ATOM2_incdec_impl<string OpStr> {
1455 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1459 multiclass ATOM3_cas_impl<string OpStr> {
1460 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1461 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1464 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1465 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1466 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1467 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1468 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1469 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1470 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1471 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1472 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1473 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1475 //-----------------------------------
1476 // Support for ldu on sm_20 or later
1477 //-----------------------------------
1479 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1480 // read-only in a kernel.
1484 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1485 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1486 !strconcat("ldu.global.", TyStr),
1487 []>, Requires<[hasLDU]>;
1488 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1489 !strconcat("ldu.global.", TyStr),
1490 []>, Requires<[hasLDU]>;
1491 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1492 !strconcat("ldu.global.", TyStr),
1493 []>, Requires<[hasLDU]>;
1494 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1495 !strconcat("ldu.global.", TyStr),
1496 []>, Requires<[hasLDU]>;
1497 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1498 !strconcat("ldu.global.", TyStr),
1499 []>, Requires<[hasLDU]>;
1502 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1503 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1504 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1505 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1506 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1507 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1508 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1509 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1510 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1511 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1515 // Elementized vector ldu
1516 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1517 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1518 (ins Int32Regs:$src),
1519 !strconcat("ldu.global.", TyStr), []>;
1520 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1521 (ins Int64Regs:$src),
1522 !strconcat("ldu.global.", TyStr), []>;
1523 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1525 !strconcat("ldu.global.", TyStr), []>;
1526 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1528 !strconcat("ldu.global.", TyStr), []>;
1529 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1531 !strconcat("ldu.global.", TyStr), []>;
1534 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1535 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1536 regclass:$dst4), (ins Int32Regs:$src),
1537 !strconcat("ldu.global.", TyStr), []>;
1538 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1539 regclass:$dst4), (ins Int64Regs:$src),
1540 !strconcat("ldu.global.", TyStr), []>;
1541 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1542 regclass:$dst4), (ins MEMri:$src),
1543 !strconcat("ldu.global.", TyStr), []>;
1544 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1545 regclass:$dst4), (ins MEMri64:$src),
1546 !strconcat("ldu.global.", TyStr), []>;
1547 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1548 regclass:$dst4), (ins imemAny:$src),
1549 !strconcat("ldu.global.", TyStr), []>;
1552 defm INT_PTX_LDU_G_v2i8_ELE
1553 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1554 defm INT_PTX_LDU_G_v2i16_ELE
1555 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1556 defm INT_PTX_LDU_G_v2i32_ELE
1557 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1558 defm INT_PTX_LDU_G_v2f16_ELE
1559 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1560 defm INT_PTX_LDU_G_v2f16x2_ELE
1561 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1562 defm INT_PTX_LDU_G_v2f32_ELE
1563 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1564 defm INT_PTX_LDU_G_v2i64_ELE
1565 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1566 defm INT_PTX_LDU_G_v2f64_ELE
1567 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1568 defm INT_PTX_LDU_G_v4i8_ELE
1569 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1570 defm INT_PTX_LDU_G_v4i16_ELE
1571 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1573 defm INT_PTX_LDU_G_v4i32_ELE
1574 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1576 defm INT_PTX_LDU_G_v4f16_ELE
1577 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1579 defm INT_PTX_LDU_G_v4f16x2_ELE
1580 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1582 defm INT_PTX_LDU_G_v4f32_ELE
1583 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1587 //-----------------------------------
1588 // Support for ldg on sm_35 or later
1589 //-----------------------------------
1591 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1592 // non-coherent texture cache, and therefore the values read must be read-only
1593 // during the lifetime of the kernel.
1595 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1596 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1597 !strconcat("ld.global.nc.", TyStr),
1598 []>, Requires<[hasLDG]>;
1599 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1600 !strconcat("ld.global.nc.", TyStr),
1601 []>, Requires<[hasLDG]>;
1602 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1603 !strconcat("ld.global.nc.", TyStr),
1604 []>, Requires<[hasLDG]>;
1605 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1606 !strconcat("ld.global.nc.", TyStr),
1607 []>, Requires<[hasLDG]>;
1608 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1609 !strconcat("ld.global.nc.", TyStr),
1610 []>, Requires<[hasLDG]>;
1613 defm INT_PTX_LDG_GLOBAL_i8
1614 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1615 defm INT_PTX_LDG_GLOBAL_i16
1616 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1617 defm INT_PTX_LDG_GLOBAL_i32
1618 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1619 defm INT_PTX_LDG_GLOBAL_i64
1620 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1621 defm INT_PTX_LDG_GLOBAL_f16
1622 : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1623 defm INT_PTX_LDG_GLOBAL_f16x2
1624 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1625 defm INT_PTX_LDG_GLOBAL_f32
1626 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1627 defm INT_PTX_LDG_GLOBAL_f64
1628 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1629 defm INT_PTX_LDG_GLOBAL_p32
1630 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1631 defm INT_PTX_LDG_GLOBAL_p64
1632 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1636 // Elementized vector ldg
1637 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1638 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1639 (ins Int32Regs:$src),
1640 !strconcat("ld.global.nc.", TyStr), []>;
1641 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1642 (ins Int64Regs:$src),
1643 !strconcat("ld.global.nc.", TyStr), []>;
1644 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1646 !strconcat("ld.global.nc.", TyStr), []>;
1647 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1649 !strconcat("ld.global.nc.", TyStr), []>;
1650 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1652 !strconcat("ld.global.nc.", TyStr), []>;
1655 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1656 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1657 regclass:$dst4), (ins Int32Regs:$src),
1658 !strconcat("ld.global.nc.", TyStr), []>;
1659 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1660 regclass:$dst4), (ins Int64Regs:$src),
1661 !strconcat("ld.global.nc.", TyStr), []>;
1662 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1663 regclass:$dst4), (ins MEMri:$src),
1664 !strconcat("ld.global.nc.", TyStr), []>;
1665 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1666 regclass:$dst4), (ins MEMri64:$src),
1667 !strconcat("ld.global.nc.", TyStr), []>;
1668 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1669 regclass:$dst4), (ins imemAny:$src),
1670 !strconcat("ld.global.nc.", TyStr), []>;
1673 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1674 defm INT_PTX_LDG_G_v2i8_ELE
1675 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1676 defm INT_PTX_LDG_G_v2i16_ELE
1677 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1678 defm INT_PTX_LDG_G_v2i32_ELE
1679 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1680 defm INT_PTX_LDG_G_v2f16_ELE
1681 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1682 defm INT_PTX_LDG_G_v2f16x2_ELE
1683 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1684 defm INT_PTX_LDG_G_v2f32_ELE
1685 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1686 defm INT_PTX_LDG_G_v2i64_ELE
1687 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1688 defm INT_PTX_LDG_G_v2f64_ELE
1689 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1690 defm INT_PTX_LDG_G_v4i8_ELE
1691 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1692 defm INT_PTX_LDG_G_v4i16_ELE
1693 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1694 defm INT_PTX_LDG_G_v4i32_ELE
1695 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1696 defm INT_PTX_LDG_G_v4f16_ELE
1697 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1698 defm INT_PTX_LDG_G_v4f16x2_ELE
1699 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1700 defm INT_PTX_LDG_G_v4f32_ELE
1701 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1704 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1705 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1706 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1707 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1708 Requires<[hasGenericLdSt]>;
1709 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1710 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1711 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1712 Requires<[hasGenericLdSt]>;
1714 // @TODO: Are these actually needed? I believe global addresses will be copied
1715 // to register values anyway.
1716 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1717 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1718 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1719 Requires<[hasGenericLdSt]>;
1720 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1721 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1722 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1723 Requires<[hasGenericLdSt]>;*/
1725 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1726 "mov.u32 \t$result, $src;",
1727 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1728 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1729 "mov.u64 \t$result, $src;",
1730 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1732 // @TODO: Are these actually needed? I believe global addresses will be copied
1733 // to register values anyway.
1734 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1735 "mov.u32 \t$result, $src;",
1736 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1737 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1738 "mov.u64 \t$result, $src;",
1739 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1742 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1743 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1744 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1745 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1746 Requires<[hasGenericLdSt]>;
1747 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1748 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1749 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1750 Requires<[hasGenericLdSt]>;
1751 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1752 "mov.u32 \t$result, $src;",
1753 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1754 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1755 "mov.u64 \t$result, $src;",
1756 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1759 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1760 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1761 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1762 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1764 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1765 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1766 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1767 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1770 // nvvm.ptr.gen.to.param
1771 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1772 (ins Int32Regs:$src),
1773 "mov.u32 \t$result, $src;",
1774 [(set Int32Regs:$result,
1775 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1776 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1777 (ins Int64Regs:$src),
1778 "mov.u64 \t$result, $src;",
1779 [(set Int64Regs:$result,
1780 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1783 // nvvm.move intrinsicc
1784 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1785 "mov.b16 \t$r, $s;",
1787 (int_nvvm_move_i16 Int16Regs:$s))]>;
1788 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1789 "mov.b32 \t$r, $s;",
1791 (int_nvvm_move_i32 Int32Regs:$s))]>;
1792 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1793 "mov.b64 \t$r, $s;",
1795 (int_nvvm_move_i64 Int64Regs:$s))]>;
1796 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1797 "mov.f32 \t$r, $s;",
1798 [(set Float32Regs:$r,
1799 (int_nvvm_move_float Float32Regs:$s))]>;
1800 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1801 "mov.f64 \t$r, $s;",
1802 [(set Float64Regs:$r,
1803 (int_nvvm_move_double Float64Regs:$s))]>;
1804 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1805 "mov.u32 \t$r, $s;",
1807 (int_nvvm_move_ptr Int32Regs:$s))]>;
1808 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1809 "mov.u64 \t$r, $s;",
1811 (int_nvvm_move_ptr Int64Regs:$s))]>;
1813 // @TODO: Are these actually needed, or will we always just see symbols
1814 // copied to registers first?
1815 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1816 "mov.u32 \t$r, $s;",
1818 (int_nvvm_move_ptr texternalsym:$s))]>;
1819 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1820 "mov.u64 \t$r, $s;",
1822 (int_nvvm_move_ptr texternalsym:$s))]>;*/
1825 // MoveParam %r1, param
1826 // ptr_local_to_gen %r2, %r1
1827 // ptr_gen_to_local %r3, %r2
1831 // @TODO: Revisit this. There is a type
1832 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1833 // instructions are not currently defined. However, we can use the ptr
1834 // variants and the asm printer will do the right thing.
1835 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1836 (MoveParam texternalsym:$src)))),
1837 (nvvm_move_ptr64 texternalsym:$src)>;
1838 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1839 (MoveParam texternalsym:$src)))),
1840 (nvvm_move_ptr32 texternalsym:$src)>;
1843 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1844 "mov.u64 \t$result, $src;", []>;
1846 //-----------------------------------
1847 // Compiler Error Warn
1848 // - Just ignore them in codegen
1849 //-----------------------------------
1851 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1852 "// llvm.nvvm.compiler.warn()",
1853 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1854 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1855 "// llvm.nvvm.compiler.warn()",
1856 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1857 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1858 "// llvm.nvvm.compiler.error()",
1859 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1860 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1861 "// llvm.nvvm.compiler.error()",
1862 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1867 def ISSPACEP_CONST_32
1868 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1869 "isspacep.const \t$d, $a;",
1870 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1871 Requires<[hasPTX31]>;
1872 def ISSPACEP_CONST_64
1873 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1874 "isspacep.const \t$d, $a;",
1875 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1876 Requires<[hasPTX31]>;
1877 def ISSPACEP_GLOBAL_32
1878 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1879 "isspacep.global \t$d, $a;",
1880 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1881 def ISSPACEP_GLOBAL_64
1882 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1883 "isspacep.global \t$d, $a;",
1884 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1885 def ISSPACEP_LOCAL_32
1886 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1887 "isspacep.local \t$d, $a;",
1888 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1889 def ISSPACEP_LOCAL_64
1890 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1891 "isspacep.local \t$d, $a;",
1892 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1893 def ISSPACEP_SHARED_32
1894 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1895 "isspacep.shared \t$d, $a;",
1896 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1897 def ISSPACEP_SHARED_64
1898 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1899 "isspacep.shared \t$d, $a;",
1900 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1903 // Special register reads
1904 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1905 (ins SpecialRegs:$r),
1906 "mov.b32 \t$d, $r;", []>;
1908 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1909 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1910 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1911 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1912 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1913 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1914 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1915 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1916 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1917 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1918 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1919 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
1920 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
1921 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
1922 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
1923 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
1924 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
1925 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
1926 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
1927 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
1928 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
1929 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
1930 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
1931 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
1932 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
1933 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
1934 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
1935 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
1936 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
1937 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
1938 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
1939 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
1942 // rotate builtin support
1944 def ROTATE_B32_HW_IMM
1945 : NVPTXInst<(outs Int32Regs:$dst),
1946 (ins Int32Regs:$src, i32imm:$amt),
1947 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1948 [(set Int32Regs:$dst,
1949 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
1950 Requires<[hasHWROT32]> ;
1952 def ROTATE_B32_HW_REG
1953 : NVPTXInst<(outs Int32Regs:$dst),
1954 (ins Int32Regs:$src, Int32Regs:$amt),
1955 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1956 [(set Int32Regs:$dst,
1957 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
1958 Requires<[hasHWROT32]> ;
1960 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
1961 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1962 Requires<[noHWROT32]> ;
1964 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
1965 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
1966 Requires<[noHWROT32]> ;
1968 let hasSideEffects = 0 in {
1969 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1970 !strconcat("{{\n\t",
1971 ".reg .b32 %dummy;\n\t",
1972 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
1976 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1977 !strconcat("{{\n\t",
1978 ".reg .b32 %dummy;\n\t",
1979 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
1984 let hasSideEffects = 0 in {
1986 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
1987 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
1990 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
1991 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
1992 (GET_LO_INT64 Int64Regs:$src))> ;
1994 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
1996 let hasSideEffects = 0 in {
1997 def SHF_L_WRAP_B32_IMM
1998 : NVPTXInst<(outs Int32Regs:$dst),
1999 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2000 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2001 Requires<[hasHWROT32]>;
2003 def SHF_L_WRAP_B32_REG
2004 : NVPTXInst<(outs Int32Regs:$dst),
2005 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2006 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2007 Requires<[hasHWROT32]>;
2009 def SHF_R_WRAP_B32_IMM
2010 : NVPTXInst<(outs Int32Regs:$dst),
2011 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2012 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2013 Requires<[hasHWROT32]>;
2015 def SHF_R_WRAP_B32_REG
2016 : NVPTXInst<(outs Int32Regs:$dst),
2017 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2018 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2019 Requires<[hasHWROT32]>;
2022 // HW version of rotate 64
2023 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2025 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2026 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2027 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2028 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2029 Requires<[hasHWROT32]>;
2031 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2033 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2034 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2035 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2036 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2037 Requires<[hasHWROT32]>;
2040 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2042 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2043 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2044 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2045 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2046 Requires<[hasHWROT32]>;
2048 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2050 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2051 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2052 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2053 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2054 Requires<[hasHWROT32]>;
2056 // SW version of rotate 64
2057 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2058 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2059 Requires<[noHWROT32]>;
2060 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2061 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2062 Requires<[noHWROT32]>;
2063 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2064 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2065 Requires<[noHWROT32]>;
2066 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2067 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2068 Requires<[noHWROT32]>;
2071 //-----------------------------------
2072 // Texture Intrinsics
2073 //-----------------------------------
2075 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2076 // also defined in NVPTXReplaceImageHandles.cpp
2078 // texmode_independent
2079 let IsTex = 1, IsTexModeUnified = 0 in {
2080 // Texture fetch instructions using handles
2082 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2083 Float32Regs:$b, Float32Regs:$a),
2084 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2085 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2088 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2089 Float32Regs:$b, Float32Regs:$a),
2090 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2091 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2093 def TEX_1D_F32_F32_LEVEL
2094 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2095 Float32Regs:$b, Float32Regs:$a),
2096 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2097 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2098 "[$t, $s, \\{$x\\}], $lod;",
2100 def TEX_1D_F32_F32_GRAD
2101 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2102 Float32Regs:$b, Float32Regs:$a),
2103 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2104 Float32Regs:$gradx, Float32Regs:$grady),
2105 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2106 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2109 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2110 Int32Regs:$b, Int32Regs:$a),
2111 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2112 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2115 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2116 Int32Regs:$b, Int32Regs:$a),
2117 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2118 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2120 def TEX_1D_S32_F32_LEVEL
2121 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2122 Int32Regs:$b, Int32Regs:$a),
2123 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2125 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2126 "[$t, $s, \\{$x\\}], $lod;",
2128 def TEX_1D_S32_F32_GRAD
2129 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2130 Int32Regs:$b, Int32Regs:$a),
2131 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2132 Float32Regs:$gradx, Float32Regs:$grady),
2133 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2134 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2137 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2138 Int32Regs:$b, Int32Regs:$a),
2139 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2140 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2143 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2144 Int32Regs:$b, Int32Regs:$a),
2145 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2146 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2148 def TEX_1D_U32_F32_LEVEL
2149 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2150 Int32Regs:$b, Int32Regs:$a),
2151 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2153 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2154 "[$t, $s, \\{$x\\}], $lod;",
2156 def TEX_1D_U32_F32_GRAD
2157 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2158 Int32Regs:$b, Int32Regs:$a),
2159 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2160 Float32Regs:$gradx, Float32Regs:$grady),
2161 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2162 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2165 def TEX_1D_ARRAY_F32_S32
2166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2167 Float32Regs:$b, Float32Regs:$a),
2168 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2169 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2170 "[$t, $s, \\{$l, $x\\}];",
2172 def TEX_1D_ARRAY_F32_F32
2173 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2174 Float32Regs:$b, Float32Regs:$a),
2175 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2176 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2177 "[$t, $s, \\{$l, $x\\}];",
2179 def TEX_1D_ARRAY_F32_F32_LEVEL
2180 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2181 Float32Regs:$b, Float32Regs:$a),
2182 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2184 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2185 "[$t, $s, \\{$l, $x\\}], $lod;",
2187 def TEX_1D_ARRAY_F32_F32_GRAD
2188 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2189 Float32Regs:$b, Float32Regs:$a),
2190 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2191 Float32Regs:$gradx, Float32Regs:$grady),
2192 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2193 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2195 def TEX_1D_ARRAY_S32_S32
2196 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2197 Int32Regs:$b, Int32Regs:$a),
2198 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2199 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2200 "[$t, $s, \\{$l, $x\\}];",
2202 def TEX_1D_ARRAY_S32_F32
2203 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2204 Int32Regs:$b, Int32Regs:$a),
2205 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2206 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2207 "[$t, $s, \\{$l, $x\\}];",
2209 def TEX_1D_ARRAY_S32_F32_LEVEL
2210 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2211 Int32Regs:$b, Int32Regs:$a),
2212 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2214 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2215 "[$t, $s, \\{$l, $x\\}], $lod;",
2217 def TEX_1D_ARRAY_S32_F32_GRAD
2218 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2219 Int32Regs:$b, Int32Regs:$a),
2220 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2221 Float32Regs:$gradx, Float32Regs:$grady),
2222 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2223 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2225 def TEX_1D_ARRAY_U32_S32
2226 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2227 Int32Regs:$b, Int32Regs:$a),
2228 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2229 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2230 "[$t, $s, \\{$l, $x\\}];",
2232 def TEX_1D_ARRAY_U32_F32
2233 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2234 Int32Regs:$b, Int32Regs:$a),
2235 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2236 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2237 "[$t, $s, \\{$l, $x\\}];",
2239 def TEX_1D_ARRAY_U32_F32_LEVEL
2240 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2241 Int32Regs:$b, Int32Regs:$a),
2242 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2244 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2245 "[$t, $s, \\{$l, $x\\}], $lod;",
2247 def TEX_1D_ARRAY_U32_F32_GRAD
2248 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2249 Int32Regs:$b, Int32Regs:$a),
2250 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2251 Float32Regs:$gradx, Float32Regs:$grady),
2252 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2253 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2257 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2258 Float32Regs:$b, Float32Regs:$a),
2259 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2260 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2261 "[$t, $s, \\{$x, $y\\}];",
2264 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2265 Float32Regs:$b, Float32Regs:$a),
2266 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2267 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2268 "[$t, $s, \\{$x, $y\\}];",
2270 def TEX_2D_F32_F32_LEVEL
2271 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2272 Float32Regs:$b, Float32Regs:$a),
2273 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2275 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2276 "[$t, $s, \\{$x, $y\\}], $lod;",
2278 def TEX_2D_F32_F32_GRAD
2279 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2280 Float32Regs:$b, Float32Regs:$a),
2281 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2282 Float32Regs:$gradx0, Float32Regs:$gradx1,
2283 Float32Regs:$grady0, Float32Regs:$grady1),
2284 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2285 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2286 "\\{$grady0, $grady1\\};",
2289 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2290 Int32Regs:$b, Int32Regs:$a),
2291 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2292 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2293 "[$t, $s, \\{$x, $y\\}];",
2296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2297 Int32Regs:$b, Int32Regs:$a),
2298 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2299 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2300 "[$t, $s, \\{$x, $y\\}];",
2302 def TEX_2D_S32_F32_LEVEL
2303 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2304 Int32Regs:$b, Int32Regs:$a),
2305 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2307 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2308 "[$t, $s, \\{$x, $y\\}], $lod;",
2310 def TEX_2D_S32_F32_GRAD
2311 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2312 Int32Regs:$b, Int32Regs:$a),
2313 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2314 Float32Regs:$gradx0, Float32Regs:$gradx1,
2315 Float32Regs:$grady0, Float32Regs:$grady1),
2316 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2317 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2318 "\\{$grady0, $grady1\\};",
2321 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2322 Int32Regs:$b, Int32Regs:$a),
2323 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2324 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2325 "[$t, $s, \\{$x, $y\\}];",
2328 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2329 Int32Regs:$b, Int32Regs:$a),
2330 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2331 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2332 "[$t, $s, \\{$x, $y\\}];",
2334 def TEX_2D_U32_F32_LEVEL
2335 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2336 Int32Regs:$b, Int32Regs:$a),
2337 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2339 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2340 "[$t, $s, \\{$x, $y\\}], $lod;",
2342 def TEX_2D_U32_F32_GRAD
2343 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2344 Int32Regs:$b, Int32Regs:$a),
2345 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2346 Float32Regs:$gradx0, Float32Regs:$gradx1,
2347 Float32Regs:$grady0, Float32Regs:$grady1),
2348 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2349 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2350 "\\{$grady0, $grady1\\};",
2353 def TEX_2D_ARRAY_F32_S32
2354 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2355 Float32Regs:$b, Float32Regs:$a),
2356 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2358 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2359 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2361 def TEX_2D_ARRAY_F32_F32
2362 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2363 Float32Regs:$b, Float32Regs:$a),
2364 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2366 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2367 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2369 def TEX_2D_ARRAY_F32_F32_LEVEL
2370 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2371 Float32Regs:$b, Float32Regs:$a),
2372 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2373 Float32Regs:$y, Float32Regs:$lod),
2374 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2375 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2377 def TEX_2D_ARRAY_F32_F32_GRAD
2378 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2379 Float32Regs:$b, Float32Regs:$a),
2380 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2381 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2382 Float32Regs:$grady0, Float32Regs:$grady1),
2383 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2384 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2385 "\\{$grady0, $grady1\\};",
2387 def TEX_2D_ARRAY_S32_S32
2388 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2389 Int32Regs:$b, Int32Regs:$a),
2390 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2392 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2393 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2395 def TEX_2D_ARRAY_S32_F32
2396 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2397 Int32Regs:$b, Int32Regs:$a),
2398 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2400 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2401 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2403 def TEX_2D_ARRAY_S32_F32_LEVEL
2404 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2405 Int32Regs:$b, Int32Regs:$a),
2406 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2407 Float32Regs:$y, Float32Regs:$lod),
2408 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2409 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2411 def TEX_2D_ARRAY_S32_F32_GRAD
2412 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2413 Int32Regs:$b, Int32Regs:$a),
2414 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2416 Float32Regs:$gradx0, Float32Regs:$gradx1,
2417 Float32Regs:$grady0, Float32Regs:$grady1),
2418 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2419 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2420 "\\{$grady0, $grady1\\};",
2422 def TEX_2D_ARRAY_U32_S32
2423 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2424 Int32Regs:$b, Int32Regs:$a),
2425 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2427 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2428 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2430 def TEX_2D_ARRAY_U32_F32
2431 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2432 Int32Regs:$b, Int32Regs:$a),
2433 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2435 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2436 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2438 def TEX_2D_ARRAY_U32_F32_LEVEL
2439 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2440 Int32Regs:$b, Int32Regs:$a),
2441 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2442 Float32Regs:$y, Float32Regs:$lod),
2443 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2444 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2446 def TEX_2D_ARRAY_U32_F32_GRAD
2447 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2448 Int32Regs:$b, Int32Regs:$a),
2449 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2451 Float32Regs:$gradx0, Float32Regs:$gradx1,
2452 Float32Regs:$grady0, Float32Regs:$grady1),
2453 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2454 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2455 "\\{$grady0, $grady1\\};",
2459 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2460 Float32Regs:$b, Float32Regs:$a),
2461 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2463 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2464 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2467 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2468 Float32Regs:$b, Float32Regs:$a),
2469 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2471 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2472 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2474 def TEX_3D_F32_F32_LEVEL
2475 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476 Float32Regs:$b, Float32Regs:$a),
2477 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2478 Float32Regs:$z, Float32Regs:$lod),
2479 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2480 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2482 def TEX_3D_F32_F32_GRAD
2483 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484 Float32Regs:$b, Float32Regs:$a),
2485 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2487 Float32Regs:$gradx0, Float32Regs:$gradx1,
2488 Float32Regs:$gradx2, Float32Regs:$grady0,
2489 Float32Regs:$grady1, Float32Regs:$grady2),
2490 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2491 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2492 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2493 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2496 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2497 Int32Regs:$b, Int32Regs:$a),
2498 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2500 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2501 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2504 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2505 Int32Regs:$b, Int32Regs:$a),
2506 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2508 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2509 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2511 def TEX_3D_S32_F32_LEVEL
2512 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2513 Int32Regs:$b, Int32Regs:$a),
2514 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2515 Float32Regs:$z, Float32Regs:$lod),
2516 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2517 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2519 def TEX_3D_S32_F32_GRAD
2520 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2521 Int32Regs:$b, Int32Regs:$a),
2522 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2524 Float32Regs:$gradx0, Float32Regs:$gradx1,
2525 Float32Regs:$gradx2, Float32Regs:$grady0,
2526 Float32Regs:$grady1, Float32Regs:$grady2),
2527 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2528 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2529 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2530 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2533 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2534 Int32Regs:$b, Int32Regs:$a),
2535 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2537 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2538 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2541 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2542 Int32Regs:$b, Int32Regs:$a),
2543 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2545 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2546 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2548 def TEX_3D_U32_F32_LEVEL
2549 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2550 Int32Regs:$b, Int32Regs:$a),
2551 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2552 Float32Regs:$z, Float32Regs:$lod),
2553 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2554 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2556 def TEX_3D_U32_F32_GRAD
2557 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2558 Int32Regs:$b, Int32Regs:$a),
2559 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2561 Float32Regs:$gradx0, Float32Regs:$gradx1,
2562 Float32Regs:$gradx2, Float32Regs:$grady0,
2563 Float32Regs:$grady1, Float32Regs:$grady2),
2564 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2565 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2566 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2567 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2570 def TEX_CUBE_F32_F32
2571 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2572 Float32Regs:$b, Float32Regs:$a),
2573 (ins Int64Regs:$t, Int64Regs:$s,
2574 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2575 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2576 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2578 def TEX_CUBE_F32_F32_LEVEL
2579 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2580 Float32Regs:$b, Float32Regs:$a),
2581 (ins Int64Regs:$t, Int64Regs:$s,
2582 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2584 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2585 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2587 def TEX_CUBE_S32_F32
2588 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2589 Int32Regs:$b, Int32Regs:$a),
2590 (ins Int64Regs:$t, Int64Regs:$s,
2591 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2592 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2593 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2595 def TEX_CUBE_S32_F32_LEVEL
2596 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2597 Int32Regs:$b, Int32Regs:$a),
2598 (ins Int64Regs:$t, Int64Regs:$s,
2599 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2601 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2602 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2604 def TEX_CUBE_U32_F32
2605 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606 Int32Regs:$b, Int32Regs:$a),
2607 (ins Int64Regs:$t, Int64Regs:$s,
2608 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2609 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2610 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2612 def TEX_CUBE_U32_F32_LEVEL
2613 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614 Int32Regs:$b, Int32Regs:$a),
2615 (ins Int64Regs:$t, Int64Regs:$s,
2616 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2618 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2619 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2622 def TEX_CUBE_ARRAY_F32_F32
2623 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2624 Float32Regs:$b, Float32Regs:$a),
2625 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2626 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2627 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2628 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2630 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2631 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2632 Float32Regs:$b, Float32Regs:$a),
2633 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2634 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2636 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2637 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2639 def TEX_CUBE_ARRAY_S32_F32
2640 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2641 Int32Regs:$b, Int32Regs:$a),
2642 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2643 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2644 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2645 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2647 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2648 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2649 Int32Regs:$b, Int32Regs:$a),
2650 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2651 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2653 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2654 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2656 def TEX_CUBE_ARRAY_U32_F32
2657 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2658 Int32Regs:$b, Int32Regs:$a),
2659 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2660 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2661 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2662 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2664 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2665 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2666 Int32Regs:$b, Int32Regs:$a),
2667 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2668 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2670 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2671 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2674 def TLD4_R_2D_F32_F32
2675 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2676 Float32Regs:$v2, Float32Regs:$v3),
2677 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2678 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2679 "[$t, $s, \\{$x, $y\\}];",
2681 def TLD4_G_2D_F32_F32
2682 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2683 Float32Regs:$v2, Float32Regs:$v3),
2684 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2685 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2686 "[$t, $s, \\{$x, $y\\}];",
2688 def TLD4_B_2D_F32_F32
2689 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2690 Float32Regs:$v2, Float32Regs:$v3),
2691 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2692 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2693 "[$t, $s, \\{$x, $y\\}];",
2695 def TLD4_A_2D_F32_F32
2696 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2697 Float32Regs:$v2, Float32Regs:$v3),
2698 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2699 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2700 "[$t, $s, \\{$x, $y\\}];",
2702 def TLD4_R_2D_S32_F32
2703 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2704 Int32Regs:$v2, Int32Regs:$v3),
2705 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2706 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2707 "[$t, $s, \\{$x, $y\\}];",
2709 def TLD4_G_2D_S32_F32
2710 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2711 Int32Regs:$v2, Int32Regs:$v3),
2712 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2713 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2714 "[$t, $s, \\{$x, $y\\}];",
2716 def TLD4_B_2D_S32_F32
2717 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2718 Int32Regs:$v2, Int32Regs:$v3),
2719 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2720 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2721 "[$t, $s, \\{$x, $y\\}];",
2723 def TLD4_A_2D_S32_F32
2724 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2725 Int32Regs:$v2, Int32Regs:$v3),
2726 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2727 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2728 "[$t, $s, \\{$x, $y\\}];",
2730 def TLD4_R_2D_U32_F32
2731 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2732 Int32Regs:$v2, Int32Regs:$v3),
2733 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2734 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2735 "[$t, $s, \\{$x, $y\\}];",
2737 def TLD4_G_2D_U32_F32
2738 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2739 Int32Regs:$v2, Int32Regs:$v3),
2740 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2741 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2742 "[$t, $s, \\{$x, $y\\}];",
2744 def TLD4_B_2D_U32_F32
2745 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2746 Int32Regs:$v2, Int32Regs:$v3),
2747 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2748 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2749 "[$t, $s, \\{$x, $y\\}];",
2751 def TLD4_A_2D_U32_F32
2752 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2753 Int32Regs:$v2, Int32Regs:$v3),
2754 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2755 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2756 "[$t, $s, \\{$x, $y\\}];",
2762 let IsTex = 1, IsTexModeUnified = 1 in {
2763 // Texture fetch instructions using handles
2764 def TEX_UNIFIED_1D_F32_S32
2765 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2766 Float32Regs:$b, Float32Regs:$a),
2767 (ins Int64Regs:$t, Int32Regs:$x),
2768 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2770 def TEX_UNIFIED_1D_F32_F32
2771 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2772 Float32Regs:$b, Float32Regs:$a),
2773 (ins Int64Regs:$t, Float32Regs:$x),
2774 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2776 def TEX_UNIFIED_1D_F32_F32_LEVEL
2777 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2778 Float32Regs:$b, Float32Regs:$a),
2779 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2780 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2781 "[$t, \\{$x\\}], $lod;",
2783 def TEX_UNIFIED_1D_F32_F32_GRAD
2784 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2785 Float32Regs:$b, Float32Regs:$a),
2786 (ins Int64Regs:$t, Float32Regs:$x,
2787 Float32Regs:$gradx, Float32Regs:$grady),
2788 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2789 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2791 def TEX_UNIFIED_1D_S32_S32
2792 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2793 Int32Regs:$b, Int32Regs:$a),
2794 (ins Int64Regs:$t, Int32Regs:$x),
2795 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2797 def TEX_UNIFIED_1D_S32_F32
2798 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2799 Int32Regs:$b, Int32Regs:$a),
2800 (ins Int64Regs:$t, Float32Regs:$x),
2801 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2803 def TEX_UNIFIED_1D_S32_F32_LEVEL
2804 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2805 Int32Regs:$b, Int32Regs:$a),
2806 (ins Int64Regs:$t, Float32Regs:$x,
2808 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2809 "[$t, \\{$x\\}], $lod;",
2811 def TEX_UNIFIED_1D_S32_F32_GRAD
2812 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2813 Int32Regs:$b, Int32Regs:$a),
2814 (ins Int64Regs:$t, Float32Regs:$x,
2815 Float32Regs:$gradx, Float32Regs:$grady),
2816 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2817 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2819 def TEX_UNIFIED_1D_U32_S32
2820 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2821 Int32Regs:$b, Int32Regs:$a),
2822 (ins Int64Regs:$t, Int32Regs:$x),
2823 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2825 def TEX_UNIFIED_1D_U32_F32
2826 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2827 Int32Regs:$b, Int32Regs:$a),
2828 (ins Int64Regs:$t, Float32Regs:$x),
2829 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2831 def TEX_UNIFIED_1D_U32_F32_LEVEL
2832 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2833 Int32Regs:$b, Int32Regs:$a),
2834 (ins Int64Regs:$t, Float32Regs:$x,
2836 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2837 "[$t, \\{$x\\}], $lod;",
2839 def TEX_UNIFIED_1D_U32_F32_GRAD
2840 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2841 Int32Regs:$b, Int32Regs:$a),
2842 (ins Int64Regs:$t, Float32Regs:$x,
2843 Float32Regs:$gradx, Float32Regs:$grady),
2844 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2845 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2848 def TEX_UNIFIED_1D_ARRAY_F32_S32
2849 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2850 Float32Regs:$b, Float32Regs:$a),
2851 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2852 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2853 "[$t, \\{$l, $x\\}];",
2855 def TEX_UNIFIED_1D_ARRAY_F32_F32
2856 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2857 Float32Regs:$b, Float32Regs:$a),
2858 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2859 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2860 "[$t, \\{$l, $x\\}];",
2862 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2863 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2864 Float32Regs:$b, Float32Regs:$a),
2865 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2867 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2868 "[$t, \\{$l, $x\\}], $lod;",
2870 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2871 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2872 Float32Regs:$b, Float32Regs:$a),
2873 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2874 Float32Regs:$gradx, Float32Regs:$grady),
2875 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2876 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2878 def TEX_UNIFIED_1D_ARRAY_S32_S32
2879 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2880 Int32Regs:$b, Int32Regs:$a),
2881 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2882 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2883 "[$t, \\{$l, $x\\}];",
2885 def TEX_UNIFIED_1D_ARRAY_S32_F32
2886 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2887 Int32Regs:$b, Int32Regs:$a),
2888 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2889 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2890 "[$t, \\{$l, $x\\}];",
2892 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2893 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2894 Int32Regs:$b, Int32Regs:$a),
2895 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2897 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2898 "[$t, \\{$l, $x\\}], $lod;",
2900 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2901 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2902 Int32Regs:$b, Int32Regs:$a),
2903 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2904 Float32Regs:$gradx, Float32Regs:$grady),
2905 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2906 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2908 def TEX_UNIFIED_1D_ARRAY_U32_S32
2909 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2910 Int32Regs:$b, Int32Regs:$a),
2911 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2912 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2913 "[$t, \\{$l, $x\\}];",
2915 def TEX_UNIFIED_1D_ARRAY_U32_F32
2916 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2917 Int32Regs:$b, Int32Regs:$a),
2918 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2919 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2920 "[$t, \\{$l, $x\\}];",
2922 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
2923 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2924 Int32Regs:$b, Int32Regs:$a),
2925 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2927 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2928 "[$t, \\{$l, $x\\}], $lod;",
2930 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
2931 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2932 Int32Regs:$b, Int32Regs:$a),
2933 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2934 Float32Regs:$gradx, Float32Regs:$grady),
2935 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2936 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2939 def TEX_UNIFIED_2D_F32_S32
2940 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2941 Float32Regs:$b, Float32Regs:$a),
2942 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2943 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2944 "[$t, \\{$x, $y\\}];",
2946 def TEX_UNIFIED_2D_F32_F32
2947 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2948 Float32Regs:$b, Float32Regs:$a),
2949 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2950 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2951 "[$t, \\{$x, $y\\}];",
2953 def TEX_UNIFIED_2D_F32_F32_LEVEL
2954 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2955 Float32Regs:$b, Float32Regs:$a),
2956 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2958 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2959 "[$t, \\{$x, $y\\}], $lod;",
2961 def TEX_UNIFIED_2D_F32_F32_GRAD
2962 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2963 Float32Regs:$b, Float32Regs:$a),
2964 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2965 Float32Regs:$gradx0, Float32Regs:$gradx1,
2966 Float32Regs:$grady0, Float32Regs:$grady1),
2967 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2968 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2969 "\\{$grady0, $grady1\\};",
2971 def TEX_UNIFIED_2D_S32_S32
2972 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2973 Int32Regs:$b, Int32Regs:$a),
2974 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2975 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2976 "[$t, \\{$x, $y\\}];",
2978 def TEX_UNIFIED_2D_S32_F32
2979 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2980 Int32Regs:$b, Int32Regs:$a),
2981 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2982 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2983 "[$t, \\{$x, $y\\}];",
2985 def TEX_UNIFIED_2D_S32_F32_LEVEL
2986 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2987 Int32Regs:$b, Int32Regs:$a),
2988 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2990 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2991 "[$t, \\{$x, $y\\}], $lod;",
2993 def TEX_UNIFIED_2D_S32_F32_GRAD
2994 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2995 Int32Regs:$b, Int32Regs:$a),
2996 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2997 Float32Regs:$gradx0, Float32Regs:$gradx1,
2998 Float32Regs:$grady0, Float32Regs:$grady1),
2999 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3000 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3001 "\\{$grady0, $grady1\\};",
3003 def TEX_UNIFIED_2D_U32_S32
3004 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3005 Int32Regs:$b, Int32Regs:$a),
3006 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3007 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3008 "[$t, \\{$x, $y\\}];",
3010 def TEX_UNIFIED_2D_U32_F32
3011 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3012 Int32Regs:$b, Int32Regs:$a),
3013 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3014 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3015 "[$t, \\{$x, $y\\}];",
3017 def TEX_UNIFIED_2D_U32_F32_LEVEL
3018 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3019 Int32Regs:$b, Int32Regs:$a),
3020 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3022 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3023 "[$t, \\{$x, $y\\}], $lod;",
3025 def TEX_UNIFIED_2D_U32_F32_GRAD
3026 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3027 Int32Regs:$b, Int32Regs:$a),
3028 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3029 Float32Regs:$gradx0, Float32Regs:$gradx1,
3030 Float32Regs:$grady0, Float32Regs:$grady1),
3031 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3032 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3033 "\\{$grady0, $grady1\\};",
3036 def TEX_UNIFIED_2D_ARRAY_F32_S32
3037 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3038 Float32Regs:$b, Float32Regs:$a),
3039 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3041 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3042 "[$t, \\{$l, $x, $y, $y\\}];",
3044 def TEX_UNIFIED_2D_ARRAY_F32_F32
3045 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3046 Float32Regs:$b, Float32Regs:$a),
3047 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3049 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3050 "[$t, \\{$l, $x, $y, $y\\}];",
3052 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3053 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3054 Float32Regs:$b, Float32Regs:$a),
3055 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3056 Float32Regs:$y, Float32Regs:$lod),
3057 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3058 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3060 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3061 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3062 Float32Regs:$b, Float32Regs:$a),
3063 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3064 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3065 Float32Regs:$grady0, Float32Regs:$grady1),
3066 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3067 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3068 "\\{$grady0, $grady1\\};",
3070 def TEX_UNIFIED_2D_ARRAY_S32_S32
3071 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3072 Int32Regs:$b, Int32Regs:$a),
3073 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3075 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3076 "[$t, \\{$l, $x, $y, $y\\}];",
3078 def TEX_UNIFIED_2D_ARRAY_S32_F32
3079 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3080 Int32Regs:$b, Int32Regs:$a),
3081 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3083 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3084 "[$t, \\{$l, $x, $y, $y\\}];",
3086 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3087 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3088 Int32Regs:$b, Int32Regs:$a),
3089 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3090 Float32Regs:$y, Float32Regs:$lod),
3091 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3092 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3094 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3095 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3096 Int32Regs:$b, Int32Regs:$a),
3097 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3099 Float32Regs:$gradx0, Float32Regs:$gradx1,
3100 Float32Regs:$grady0, Float32Regs:$grady1),
3101 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3102 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3103 "\\{$grady0, $grady1\\};",
3105 def TEX_UNIFIED_2D_ARRAY_U32_S32
3106 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3107 Int32Regs:$b, Int32Regs:$a),
3108 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3110 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3111 "[$t, \\{$l, $x, $y, $y\\}];",
3113 def TEX_UNIFIED_2D_ARRAY_U32_F32
3114 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3115 Int32Regs:$b, Int32Regs:$a),
3116 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3118 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3119 "[$t, \\{$l, $x, $y, $y\\}];",
3121 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3122 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3123 Int32Regs:$b, Int32Regs:$a),
3124 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3125 Float32Regs:$y, Float32Regs:$lod),
3126 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3127 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3129 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3130 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3131 Int32Regs:$b, Int32Regs:$a),
3132 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3134 Float32Regs:$gradx0, Float32Regs:$gradx1,
3135 Float32Regs:$grady0, Float32Regs:$grady1),
3136 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3137 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3138 "\\{$grady0, $grady1\\};",
3141 def TEX_UNIFIED_3D_F32_S32
3142 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3143 Float32Regs:$b, Float32Regs:$a),
3144 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3146 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3147 "[$t, \\{$x, $y, $z, $z\\}];",
3149 def TEX_UNIFIED_3D_F32_F32
3150 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3151 Float32Regs:$b, Float32Regs:$a),
3152 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3154 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3155 "[$t, \\{$x, $y, $z, $z\\}];",
3157 def TEX_UNIFIED_3D_F32_F32_LEVEL
3158 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159 Float32Regs:$b, Float32Regs:$a),
3160 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3161 Float32Regs:$z, Float32Regs:$lod),
3162 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3163 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3165 def TEX_UNIFIED_3D_F32_F32_GRAD
3166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167 Float32Regs:$b, Float32Regs:$a),
3168 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3170 Float32Regs:$gradx0, Float32Regs:$gradx1,
3171 Float32Regs:$gradx2, Float32Regs:$grady0,
3172 Float32Regs:$grady1, Float32Regs:$grady2),
3173 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3174 "[$t, \\{$x, $y, $z, $z\\}], "
3175 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3176 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3178 def TEX_UNIFIED_3D_S32_S32
3179 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3180 Int32Regs:$b, Int32Regs:$a),
3181 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3183 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3184 "[$t, \\{$x, $y, $z, $z\\}];",
3186 def TEX_UNIFIED_3D_S32_F32
3187 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3188 Int32Regs:$b, Int32Regs:$a),
3189 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3191 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3192 "[$t, \\{$x, $y, $z, $z\\}];",
3194 def TEX_UNIFIED_3D_S32_F32_LEVEL
3195 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3196 Int32Regs:$b, Int32Regs:$a),
3197 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3198 Float32Regs:$z, Float32Regs:$lod),
3199 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3200 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3202 def TEX_UNIFIED_3D_S32_F32_GRAD
3203 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3204 Int32Regs:$b, Int32Regs:$a),
3205 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3207 Float32Regs:$gradx0, Float32Regs:$gradx1,
3208 Float32Regs:$gradx2, Float32Regs:$grady0,
3209 Float32Regs:$grady1, Float32Regs:$grady2),
3210 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3211 "[$t, \\{$x, $y, $z, $z\\}], "
3212 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3213 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3215 def TEX_UNIFIED_3D_U32_S32
3216 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3217 Int32Regs:$b, Int32Regs:$a),
3218 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3220 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3221 "[$t, \\{$x, $y, $z, $z\\}];",
3223 def TEX_UNIFIED_3D_U32_F32
3224 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3225 Int32Regs:$b, Int32Regs:$a),
3226 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3228 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3229 "[$t, \\{$x, $y, $z, $z\\}];",
3231 def TEX_UNIFIED_3D_U32_F32_LEVEL
3232 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3233 Int32Regs:$b, Int32Regs:$a),
3234 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3235 Float32Regs:$z, Float32Regs:$lod),
3236 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3237 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3239 def TEX_UNIFIED_3D_U32_F32_GRAD
3240 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3241 Int32Regs:$b, Int32Regs:$a),
3242 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3244 Float32Regs:$gradx0, Float32Regs:$gradx1,
3245 Float32Regs:$gradx2, Float32Regs:$grady0,
3246 Float32Regs:$grady1, Float32Regs:$grady2),
3247 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3248 "[$t, \\{$x, $y, $z, $z\\}], "
3249 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3250 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3253 def TEX_UNIFIED_CUBE_F32_F32
3254 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3255 Float32Regs:$b, Float32Regs:$a),
3257 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3258 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3259 "[$t, \\{$x, $y, $z, $z\\}];",
3261 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3262 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3263 Float32Regs:$b, Float32Regs:$a),
3265 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3267 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3268 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3270 def TEX_UNIFIED_CUBE_S32_F32
3271 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3272 Int32Regs:$b, Int32Regs:$a),
3274 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3275 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3276 "[$t, \\{$x, $y, $z, $z\\}];",
3278 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3279 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3280 Int32Regs:$b, Int32Regs:$a),
3282 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3284 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3285 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3287 def TEX_UNIFIED_CUBE_U32_F32
3288 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289 Int32Regs:$b, Int32Regs:$a),
3291 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3292 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3293 "[$t, \\{$x, $y, $z, $z\\}];",
3295 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297 Int32Regs:$b, Int32Regs:$a),
3299 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3301 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3302 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3305 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3306 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3307 Float32Regs:$b, Float32Regs:$a),
3308 (ins Int64Regs:$t, Int32Regs:$l,
3309 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3310 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3311 "[$t, \\{$l, $x, $y, $z\\}];",
3313 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3314 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3315 Float32Regs:$b, Float32Regs:$a),
3316 (ins Int64Regs:$t, Int32Regs:$l,
3317 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3319 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3320 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3322 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3323 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3324 Int32Regs:$b, Int32Regs:$a),
3325 (ins Int64Regs:$t, Int32Regs:$l,
3326 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3327 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3328 "[$t, \\{$l, $x, $y, $z\\}];",
3330 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3331 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3332 Int32Regs:$b, Int32Regs:$a),
3333 (ins Int64Regs:$t, Int32Regs:$l,
3334 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3336 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3337 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3339 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3340 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3341 Int32Regs:$b, Int32Regs:$a),
3342 (ins Int64Regs:$t, Int32Regs:$l,
3343 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3344 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3345 "[$t, \\{$l, $x, $y, $z\\}];",
3347 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3348 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3349 Int32Regs:$b, Int32Regs:$a),
3350 (ins Int64Regs:$t, Int32Regs:$l,
3351 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3353 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3354 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3357 def TLD4_UNIFIED_R_2D_F32_F32
3358 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3359 Float32Regs:$v2, Float32Regs:$v3),
3360 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3361 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3362 "[$t, \\{$x, $y\\}];",
3364 def TLD4_UNIFIED_G_2D_F32_F32
3365 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3366 Float32Regs:$v2, Float32Regs:$v3),
3367 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3368 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3369 "[$t, \\{$x, $y\\}];",
3371 def TLD4_UNIFIED_B_2D_F32_F32
3372 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3373 Float32Regs:$v2, Float32Regs:$v3),
3374 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3375 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3376 "[$t, \\{$x, $y\\}];",
3378 def TLD4_UNIFIED_A_2D_F32_F32
3379 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3380 Float32Regs:$v2, Float32Regs:$v3),
3381 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3382 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3383 "[$t, \\{$x, $y\\}];",
3385 def TLD4_UNIFIED_R_2D_S32_F32
3386 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3387 Int32Regs:$v2, Int32Regs:$v3),
3388 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3389 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3390 "[$t, \\{$x, $y\\}];",
3392 def TLD4_UNIFIED_G_2D_S32_F32
3393 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3394 Int32Regs:$v2, Int32Regs:$v3),
3395 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3396 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3397 "[$t, \\{$x, $y\\}];",
3399 def TLD4_UNIFIED_B_2D_S32_F32
3400 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3401 Int32Regs:$v2, Int32Regs:$v3),
3402 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3403 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3404 "[$t, \\{$x, $y\\}];",
3406 def TLD4_UNIFIED_A_2D_S32_F32
3407 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3408 Int32Regs:$v2, Int32Regs:$v3),
3409 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3410 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3411 "[$t, \\{$x, $y\\}];",
3413 def TLD4_UNIFIED_R_2D_U32_F32
3414 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3415 Int32Regs:$v2, Int32Regs:$v3),
3416 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3417 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3418 "[$t, \\{$x, $y\\}];",
3420 def TLD4_UNIFIED_G_2D_U32_F32
3421 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3422 Int32Regs:$v2, Int32Regs:$v3),
3423 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3424 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3425 "[$t, \\{$x, $y\\}];",
3427 def TLD4_UNIFIED_B_2D_U32_F32
3428 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3429 Int32Regs:$v2, Int32Regs:$v3),
3430 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3431 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3432 "[$t, \\{$x, $y\\}];",
3434 def TLD4_UNIFIED_A_2D_U32_F32
3435 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3436 Int32Regs:$v2, Int32Regs:$v3),
3437 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3438 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3439 "[$t, \\{$x, $y\\}];",
3445 //=== Surface load instructions
3448 def SULD_1D_I8_CLAMP
3449 : NVPTXInst<(outs Int16Regs:$r),
3450 (ins Int64Regs:$s, Int32Regs:$x),
3451 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3453 def SULD_1D_I16_CLAMP
3454 : NVPTXInst<(outs Int16Regs:$r),
3455 (ins Int64Regs:$s, Int32Regs:$x),
3456 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3458 def SULD_1D_I32_CLAMP
3459 : NVPTXInst<(outs Int32Regs:$r),
3460 (ins Int64Regs:$s, Int32Regs:$x),
3461 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3463 def SULD_1D_I64_CLAMP
3464 : NVPTXInst<(outs Int64Regs:$r),
3465 (ins Int64Regs:$s, Int32Regs:$x),
3466 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3469 def SULD_1D_ARRAY_I8_CLAMP
3470 : NVPTXInst<(outs Int16Regs:$r),
3471 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3472 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3474 def SULD_1D_ARRAY_I16_CLAMP
3475 : NVPTXInst<(outs Int16Regs:$r),
3476 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3477 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3479 def SULD_1D_ARRAY_I32_CLAMP
3480 : NVPTXInst<(outs Int32Regs:$r),
3481 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3482 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3484 def SULD_1D_ARRAY_I64_CLAMP
3485 : NVPTXInst<(outs Int64Regs:$r),
3486 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3487 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3490 def SULD_2D_I8_CLAMP
3491 : NVPTXInst<(outs Int16Regs:$r),
3492 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3493 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3495 def SULD_2D_I16_CLAMP
3496 : NVPTXInst<(outs Int16Regs:$r),
3497 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3498 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3500 def SULD_2D_I32_CLAMP
3501 : NVPTXInst<(outs Int32Regs:$r),
3502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3503 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3505 def SULD_2D_I64_CLAMP
3506 : NVPTXInst<(outs Int64Regs:$r),
3507 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3508 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3511 def SULD_2D_ARRAY_I8_CLAMP
3512 : NVPTXInst<(outs Int16Regs:$r),
3513 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3514 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3516 def SULD_2D_ARRAY_I16_CLAMP
3517 : NVPTXInst<(outs Int16Regs:$r),
3518 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3519 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3521 def SULD_2D_ARRAY_I32_CLAMP
3522 : NVPTXInst<(outs Int32Regs:$r),
3523 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3524 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3526 def SULD_2D_ARRAY_I64_CLAMP
3527 : NVPTXInst<(outs Int64Regs:$r),
3528 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3529 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3532 def SULD_3D_I8_CLAMP
3533 : NVPTXInst<(outs Int16Regs:$r),
3534 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3535 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3537 def SULD_3D_I16_CLAMP
3538 : NVPTXInst<(outs Int16Regs:$r),
3539 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3540 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3542 def SULD_3D_I32_CLAMP
3543 : NVPTXInst<(outs Int32Regs:$r),
3544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3545 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3547 def SULD_3D_I64_CLAMP
3548 : NVPTXInst<(outs Int64Regs:$r),
3549 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3550 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3555 def SULD_1D_V2I8_CLAMP
3556 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3557 (ins Int64Regs:$s, Int32Regs:$x),
3558 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3560 def SULD_1D_V2I16_CLAMP
3561 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3562 (ins Int64Regs:$s, Int32Regs:$x),
3563 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3565 def SULD_1D_V2I32_CLAMP
3566 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3567 (ins Int64Regs:$s, Int32Regs:$x),
3568 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3570 def SULD_1D_V2I64_CLAMP
3571 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3572 (ins Int64Regs:$s, Int32Regs:$x),
3573 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3576 def SULD_1D_ARRAY_V2I8_CLAMP
3577 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3578 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3579 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3581 def SULD_1D_ARRAY_V2I16_CLAMP
3582 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3583 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3584 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3586 def SULD_1D_ARRAY_V2I32_CLAMP
3587 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3588 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3589 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3591 def SULD_1D_ARRAY_V2I64_CLAMP
3592 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3593 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3594 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3597 def SULD_2D_V2I8_CLAMP
3598 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3599 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3600 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3602 def SULD_2D_V2I16_CLAMP
3603 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3604 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3605 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3607 def SULD_2D_V2I32_CLAMP
3608 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3609 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3610 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3612 def SULD_2D_V2I64_CLAMP
3613 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3614 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3615 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3618 def SULD_2D_ARRAY_V2I8_CLAMP
3619 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3620 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3621 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3622 "[$s, \\{$l, $x, $y, $y\\}];",
3624 def SULD_2D_ARRAY_V2I16_CLAMP
3625 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3626 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3627 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3628 "[$s, \\{$l, $x, $y, $y\\}];",
3630 def SULD_2D_ARRAY_V2I32_CLAMP
3631 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3632 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3633 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3634 "[$s, \\{$l, $x, $y, $y\\}];",
3636 def SULD_2D_ARRAY_V2I64_CLAMP
3637 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3638 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3639 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3640 "[$s, \\{$l, $x, $y, $y\\}];",
3643 def SULD_3D_V2I8_CLAMP
3644 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3645 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3646 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3648 def SULD_3D_V2I16_CLAMP
3649 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3650 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3651 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3653 def SULD_3D_V2I32_CLAMP
3654 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3655 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3656 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3658 def SULD_3D_V2I64_CLAMP
3659 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3660 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3661 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3666 def SULD_1D_V4I8_CLAMP
3667 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3668 (ins Int64Regs:$s, Int32Regs:$x),
3669 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3671 def SULD_1D_V4I16_CLAMP
3672 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3673 (ins Int64Regs:$s, Int32Regs:$x),
3674 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3676 def SULD_1D_V4I32_CLAMP
3677 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3678 (ins Int64Regs:$s, Int32Regs:$x),
3679 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3682 def SULD_1D_ARRAY_V4I8_CLAMP
3683 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3684 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3685 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3686 "[$s, \\{$l, $x\\}];",
3688 def SULD_1D_ARRAY_V4I16_CLAMP
3689 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3690 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3692 "[$s, \\{$l, $x\\}];",
3694 def SULD_1D_ARRAY_V4I32_CLAMP
3695 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3696 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3697 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3698 "[$s, \\{$l, $x\\}];",
3701 def SULD_2D_V4I8_CLAMP
3702 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3703 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3704 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3706 def SULD_2D_V4I16_CLAMP
3707 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3708 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3709 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3711 def SULD_2D_V4I32_CLAMP
3712 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3714 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3717 def SULD_2D_ARRAY_V4I8_CLAMP
3718 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3719 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3720 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3721 "[$s, \\{$l, $x, $y, $y\\}];",
3723 def SULD_2D_ARRAY_V4I16_CLAMP
3724 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3725 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3726 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3727 "[$s, \\{$l, $x, $y, $y\\}];",
3729 def SULD_2D_ARRAY_V4I32_CLAMP
3730 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3731 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3732 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3733 "[$s, \\{$l, $x, $y, $y\\}];",
3737 def SULD_3D_V4I8_CLAMP
3738 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3739 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3740 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3741 "[$s, \\{$x, $y, $z, $z\\}];",
3743 def SULD_3D_V4I16_CLAMP
3744 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3745 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3746 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3747 "[$s, \\{$x, $y, $z, $z\\}];",
3749 def SULD_3D_V4I32_CLAMP
3750 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3751 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3752 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3753 "[$s, \\{$x, $y, $z, $z\\}];",
3761 : NVPTXInst<(outs Int16Regs:$r),
3762 (ins Int64Regs:$s, Int32Regs:$x),
3763 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3765 def SULD_1D_I16_TRAP
3766 : NVPTXInst<(outs Int16Regs:$r),
3767 (ins Int64Regs:$s, Int32Regs:$x),
3768 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3770 def SULD_1D_I32_TRAP
3771 : NVPTXInst<(outs Int32Regs:$r),
3772 (ins Int64Regs:$s, Int32Regs:$x),
3773 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3775 def SULD_1D_I64_TRAP
3776 : NVPTXInst<(outs Int64Regs:$r),
3777 (ins Int64Regs:$s, Int32Regs:$x),
3778 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3781 def SULD_1D_ARRAY_I8_TRAP
3782 : NVPTXInst<(outs Int16Regs:$r),
3783 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3784 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3786 def SULD_1D_ARRAY_I16_TRAP
3787 : NVPTXInst<(outs Int16Regs:$r),
3788 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3789 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3791 def SULD_1D_ARRAY_I32_TRAP
3792 : NVPTXInst<(outs Int32Regs:$r),
3793 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3794 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3796 def SULD_1D_ARRAY_I64_TRAP
3797 : NVPTXInst<(outs Int64Regs:$r),
3798 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3799 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3803 : NVPTXInst<(outs Int16Regs:$r),
3804 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3805 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3807 def SULD_2D_I16_TRAP
3808 : NVPTXInst<(outs Int16Regs:$r),
3809 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3810 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3812 def SULD_2D_I32_TRAP
3813 : NVPTXInst<(outs Int32Regs:$r),
3814 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3815 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3817 def SULD_2D_I64_TRAP
3818 : NVPTXInst<(outs Int64Regs:$r),
3819 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3820 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3823 def SULD_2D_ARRAY_I8_TRAP
3824 : NVPTXInst<(outs Int16Regs:$r),
3825 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3826 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3828 def SULD_2D_ARRAY_I16_TRAP
3829 : NVPTXInst<(outs Int16Regs:$r),
3830 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3831 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3833 def SULD_2D_ARRAY_I32_TRAP
3834 : NVPTXInst<(outs Int32Regs:$r),
3835 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3836 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3838 def SULD_2D_ARRAY_I64_TRAP
3839 : NVPTXInst<(outs Int64Regs:$r),
3840 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3841 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3845 : NVPTXInst<(outs Int16Regs:$r),
3846 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3847 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3849 def SULD_3D_I16_TRAP
3850 : NVPTXInst<(outs Int16Regs:$r),
3851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3852 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3854 def SULD_3D_I32_TRAP
3855 : NVPTXInst<(outs Int32Regs:$r),
3856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3857 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3859 def SULD_3D_I64_TRAP
3860 : NVPTXInst<(outs Int64Regs:$r),
3861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3862 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3867 def SULD_1D_V2I8_TRAP
3868 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3869 (ins Int64Regs:$s, Int32Regs:$x),
3870 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3872 def SULD_1D_V2I16_TRAP
3873 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3874 (ins Int64Regs:$s, Int32Regs:$x),
3875 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3877 def SULD_1D_V2I32_TRAP
3878 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3879 (ins Int64Regs:$s, Int32Regs:$x),
3880 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3882 def SULD_1D_V2I64_TRAP
3883 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3884 (ins Int64Regs:$s, Int32Regs:$x),
3885 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3888 def SULD_1D_ARRAY_V2I8_TRAP
3889 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3890 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3891 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3893 def SULD_1D_ARRAY_V2I16_TRAP
3894 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3895 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3896 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3898 def SULD_1D_ARRAY_V2I32_TRAP
3899 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3900 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3901 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3903 def SULD_1D_ARRAY_V2I64_TRAP
3904 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3905 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3906 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3909 def SULD_2D_V2I8_TRAP
3910 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3911 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3912 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3914 def SULD_2D_V2I16_TRAP
3915 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3916 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3917 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3919 def SULD_2D_V2I32_TRAP
3920 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3921 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3922 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3924 def SULD_2D_V2I64_TRAP
3925 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3926 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3927 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3930 def SULD_2D_ARRAY_V2I8_TRAP
3931 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3932 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3933 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
3934 "[$s, \\{$l, $x, $y, $y\\}];",
3936 def SULD_2D_ARRAY_V2I16_TRAP
3937 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3938 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3939 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
3940 "[$s, \\{$l, $x, $y, $y\\}];",
3942 def SULD_2D_ARRAY_V2I32_TRAP
3943 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3944 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3945 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
3946 "[$s, \\{$l, $x, $y, $y\\}];",
3948 def SULD_2D_ARRAY_V2I64_TRAP
3949 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3950 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3951 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
3952 "[$s, \\{$l, $x, $y, $y\\}];",
3955 def SULD_3D_V2I8_TRAP
3956 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3957 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3958 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3960 def SULD_3D_V2I16_TRAP
3961 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3962 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3963 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3965 def SULD_3D_V2I32_TRAP
3966 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3967 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3968 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3970 def SULD_3D_V2I64_TRAP
3971 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3972 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3973 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3978 def SULD_1D_V4I8_TRAP
3979 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3980 (ins Int64Regs:$s, Int32Regs:$x),
3981 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3983 def SULD_1D_V4I16_TRAP
3984 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3985 (ins Int64Regs:$s, Int32Regs:$x),
3986 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3988 def SULD_1D_V4I32_TRAP
3989 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3990 (ins Int64Regs:$s, Int32Regs:$x),
3991 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3994 def SULD_1D_ARRAY_V4I8_TRAP
3995 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3996 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3997 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3998 "[$s, \\{$l, $x\\}];",
4000 def SULD_1D_ARRAY_V4I16_TRAP
4001 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4004 "[$s, \\{$l, $x\\}];",
4006 def SULD_1D_ARRAY_V4I32_TRAP
4007 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4008 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4009 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4010 "[$s, \\{$l, $x\\}];",
4013 def SULD_2D_V4I8_TRAP
4014 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4015 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4016 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4018 def SULD_2D_V4I16_TRAP
4019 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4020 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4021 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4023 def SULD_2D_V4I32_TRAP
4024 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4025 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4026 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4029 def SULD_2D_ARRAY_V4I8_TRAP
4030 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4031 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4032 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4033 "[$s, \\{$l, $x, $y, $y\\}];",
4035 def SULD_2D_ARRAY_V4I16_TRAP
4036 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4037 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4038 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4039 "[$s, \\{$l, $x, $y, $y\\}];",
4041 def SULD_2D_ARRAY_V4I32_TRAP
4042 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4043 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4044 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4045 "[$s, \\{$l, $x, $y, $y\\}];",
4049 def SULD_3D_V4I8_TRAP
4050 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4051 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4052 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4053 "[$s, \\{$x, $y, $z, $z\\}];",
4055 def SULD_3D_V4I16_TRAP
4056 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4057 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4058 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4059 "[$s, \\{$x, $y, $z, $z\\}];",
4061 def SULD_3D_V4I32_TRAP
4062 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4063 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4064 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4065 "[$s, \\{$x, $y, $z, $z\\}];",
4072 : NVPTXInst<(outs Int16Regs:$r),
4073 (ins Int64Regs:$s, Int32Regs:$x),
4074 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4076 def SULD_1D_I16_ZERO
4077 : NVPTXInst<(outs Int16Regs:$r),
4078 (ins Int64Regs:$s, Int32Regs:$x),
4079 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4081 def SULD_1D_I32_ZERO
4082 : NVPTXInst<(outs Int32Regs:$r),
4083 (ins Int64Regs:$s, Int32Regs:$x),
4084 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4086 def SULD_1D_I64_ZERO
4087 : NVPTXInst<(outs Int64Regs:$r),
4088 (ins Int64Regs:$s, Int32Regs:$x),
4089 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4092 def SULD_1D_ARRAY_I8_ZERO
4093 : NVPTXInst<(outs Int16Regs:$r),
4094 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4095 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4097 def SULD_1D_ARRAY_I16_ZERO
4098 : NVPTXInst<(outs Int16Regs:$r),
4099 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4100 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4102 def SULD_1D_ARRAY_I32_ZERO
4103 : NVPTXInst<(outs Int32Regs:$r),
4104 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4105 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4107 def SULD_1D_ARRAY_I64_ZERO
4108 : NVPTXInst<(outs Int64Regs:$r),
4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4114 : NVPTXInst<(outs Int16Regs:$r),
4115 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4116 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4118 def SULD_2D_I16_ZERO
4119 : NVPTXInst<(outs Int16Regs:$r),
4120 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4121 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4123 def SULD_2D_I32_ZERO
4124 : NVPTXInst<(outs Int32Regs:$r),
4125 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4126 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4128 def SULD_2D_I64_ZERO
4129 : NVPTXInst<(outs Int64Regs:$r),
4130 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4134 def SULD_2D_ARRAY_I8_ZERO
4135 : NVPTXInst<(outs Int16Regs:$r),
4136 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4137 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4139 def SULD_2D_ARRAY_I16_ZERO
4140 : NVPTXInst<(outs Int16Regs:$r),
4141 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4142 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4144 def SULD_2D_ARRAY_I32_ZERO
4145 : NVPTXInst<(outs Int32Regs:$r),
4146 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4147 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4149 def SULD_2D_ARRAY_I64_ZERO
4150 : NVPTXInst<(outs Int64Regs:$r),
4151 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4152 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4156 : NVPTXInst<(outs Int16Regs:$r),
4157 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4158 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4160 def SULD_3D_I16_ZERO
4161 : NVPTXInst<(outs Int16Regs:$r),
4162 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4163 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4165 def SULD_3D_I32_ZERO
4166 : NVPTXInst<(outs Int32Regs:$r),
4167 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4168 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4170 def SULD_3D_I64_ZERO
4171 : NVPTXInst<(outs Int64Regs:$r),
4172 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4173 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178 def SULD_1D_V2I8_ZERO
4179 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4180 (ins Int64Regs:$s, Int32Regs:$x),
4181 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4183 def SULD_1D_V2I16_ZERO
4184 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4185 (ins Int64Regs:$s, Int32Regs:$x),
4186 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4188 def SULD_1D_V2I32_ZERO
4189 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4190 (ins Int64Regs:$s, Int32Regs:$x),
4191 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4193 def SULD_1D_V2I64_ZERO
4194 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4195 (ins Int64Regs:$s, Int32Regs:$x),
4196 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4199 def SULD_1D_ARRAY_V2I8_ZERO
4200 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4201 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4202 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4204 def SULD_1D_ARRAY_V2I16_ZERO
4205 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4206 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4207 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4209 def SULD_1D_ARRAY_V2I32_ZERO
4210 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4211 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4212 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4214 def SULD_1D_ARRAY_V2I64_ZERO
4215 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4216 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4217 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4220 def SULD_2D_V2I8_ZERO
4221 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4222 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4223 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4225 def SULD_2D_V2I16_ZERO
4226 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4227 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4228 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4230 def SULD_2D_V2I32_ZERO
4231 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4233 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4235 def SULD_2D_V2I64_ZERO
4236 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4237 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4238 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4241 def SULD_2D_ARRAY_V2I8_ZERO
4242 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4243 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4244 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4245 "[$s, \\{$l, $x, $y, $y\\}];",
4247 def SULD_2D_ARRAY_V2I16_ZERO
4248 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4249 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4250 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4251 "[$s, \\{$l, $x, $y, $y\\}];",
4253 def SULD_2D_ARRAY_V2I32_ZERO
4254 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4255 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4256 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4257 "[$s, \\{$l, $x, $y, $y\\}];",
4259 def SULD_2D_ARRAY_V2I64_ZERO
4260 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4261 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4262 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4263 "[$s, \\{$l, $x, $y, $y\\}];",
4266 def SULD_3D_V2I8_ZERO
4267 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4268 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4269 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4271 def SULD_3D_V2I16_ZERO
4272 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4273 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4274 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4276 def SULD_3D_V2I32_ZERO
4277 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4278 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4279 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4281 def SULD_3D_V2I64_ZERO
4282 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4283 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4284 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4289 def SULD_1D_V4I8_ZERO
4290 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4291 (ins Int64Regs:$s, Int32Regs:$x),
4292 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4294 def SULD_1D_V4I16_ZERO
4295 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4296 (ins Int64Regs:$s, Int32Regs:$x),
4297 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4299 def SULD_1D_V4I32_ZERO
4300 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4301 (ins Int64Regs:$s, Int32Regs:$x),
4302 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4305 def SULD_1D_ARRAY_V4I8_ZERO
4306 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4307 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4308 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4309 "[$s, \\{$l, $x\\}];",
4311 def SULD_1D_ARRAY_V4I16_ZERO
4312 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4313 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4315 "[$s, \\{$l, $x\\}];",
4317 def SULD_1D_ARRAY_V4I32_ZERO
4318 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4319 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4320 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4321 "[$s, \\{$l, $x\\}];",
4324 def SULD_2D_V4I8_ZERO
4325 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4326 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4327 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4329 def SULD_2D_V4I16_ZERO
4330 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4331 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4332 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4334 def SULD_2D_V4I32_ZERO
4335 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4336 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4337 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4340 def SULD_2D_ARRAY_V4I8_ZERO
4341 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4342 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4343 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4344 "[$s, \\{$l, $x, $y, $y\\}];",
4346 def SULD_2D_ARRAY_V4I16_ZERO
4347 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4348 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4349 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4350 "[$s, \\{$l, $x, $y, $y\\}];",
4352 def SULD_2D_ARRAY_V4I32_ZERO
4353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4354 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4355 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4356 "[$s, \\{$l, $x, $y, $y\\}];",
4360 def SULD_3D_V4I8_ZERO
4361 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4362 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4363 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4364 "[$s, \\{$x, $y, $z, $z\\}];",
4366 def SULD_3D_V4I16_ZERO
4367 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4369 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4370 "[$s, \\{$x, $y, $z, $z\\}];",
4372 def SULD_3D_V4I32_ZERO
4373 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4374 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4375 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4376 "[$s, \\{$x, $y, $z, $z\\}];",
4380 //-----------------------------------
4381 // Texture Query Intrinsics
4382 //-----------------------------------
4384 let IsSurfTexQuery = 1 in {
4385 def TXQ_CHANNEL_ORDER
4386 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4387 "txq.channel_order.b32 \t$d, [$a];",
4389 def TXQ_CHANNEL_DATA_TYPE
4390 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4391 "txq.channel_data_type.b32 \t$d, [$a];",
4394 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4395 "txq.width.b32 \t$d, [$a];",
4398 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4399 "txq.height.b32 \t$d, [$a];",
4402 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4403 "txq.depth.b32 \t$d, [$a];",
4406 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4407 "txq.array_size.b32 \t$d, [$a];",
4410 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4411 "txq.num_samples.b32 \t$d, [$a];",
4413 def TXQ_NUM_MIPMAP_LEVELS
4414 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4415 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4419 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4420 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4421 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4422 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4423 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4424 (TXQ_WIDTH Int64Regs:$a)>;
4425 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4426 (TXQ_HEIGHT Int64Regs:$a)>;
4427 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4428 (TXQ_DEPTH Int64Regs:$a)>;
4429 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4430 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4431 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4432 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4433 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4434 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4437 //-----------------------------------
4438 // Surface Query Intrinsics
4439 //-----------------------------------
4441 let IsSurfTexQuery = 1 in {
4442 def SUQ_CHANNEL_ORDER
4443 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4444 "suq.channel_order.b32 \t$d, [$a];",
4446 def SUQ_CHANNEL_DATA_TYPE
4447 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4448 "suq.channel_data_type.b32 \t$d, [$a];",
4451 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4452 "suq.width.b32 \t$d, [$a];",
4455 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4456 "suq.height.b32 \t$d, [$a];",
4459 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4460 "suq.depth.b32 \t$d, [$a];",
4463 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4464 "suq.array_size.b32 \t$d, [$a];",
4468 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4469 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4470 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4471 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4472 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4473 (SUQ_WIDTH Int64Regs:$a)>;
4474 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4475 (SUQ_HEIGHT Int64Regs:$a)>;
4476 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4477 (SUQ_DEPTH Int64Regs:$a)>;
4478 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4479 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4482 //===- Handle Query -------------------------------------------------------===//
4484 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4486 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4487 "istypep.samplerref \t$d, $a;",
4488 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4490 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4491 "istypep.surfref \t$d, $a;",
4492 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4494 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4495 "istypep.texref \t$d, $a;",
4496 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4498 //===- Surface Stores -----------------------------------------------------===//
4503 def SUST_B_1D_B8_CLAMP
4505 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4506 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4508 def SUST_B_1D_B16_CLAMP
4510 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4511 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4513 def SUST_B_1D_B32_CLAMP
4515 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4516 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4518 def SUST_B_1D_B64_CLAMP
4520 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4521 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4523 def SUST_B_1D_V2B8_CLAMP
4525 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4526 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4528 def SUST_B_1D_V2B16_CLAMP
4530 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4531 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4533 def SUST_B_1D_V2B32_CLAMP
4535 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4536 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4538 def SUST_B_1D_V2B64_CLAMP
4540 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4541 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4543 def SUST_B_1D_V4B8_CLAMP
4545 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4546 Int16Regs:$b, Int16Regs:$a),
4547 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4549 def SUST_B_1D_V4B16_CLAMP
4551 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4552 Int16Regs:$b, Int16Regs:$a),
4553 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4555 def SUST_B_1D_V4B32_CLAMP
4557 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4558 Int32Regs:$b, Int32Regs:$a),
4559 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4563 def SUST_B_1D_ARRAY_B8_CLAMP
4565 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4566 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4568 def SUST_B_1D_ARRAY_B16_CLAMP
4570 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4571 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4573 def SUST_B_1D_ARRAY_B32_CLAMP
4575 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4576 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4578 def SUST_B_1D_ARRAY_B64_CLAMP
4580 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4581 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4583 def SUST_B_1D_ARRAY_V2B8_CLAMP
4585 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4587 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4589 def SUST_B_1D_ARRAY_V2B16_CLAMP
4591 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4593 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4595 def SUST_B_1D_ARRAY_V2B32_CLAMP
4597 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4599 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4601 def SUST_B_1D_ARRAY_V2B64_CLAMP
4603 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4605 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4607 def SUST_B_1D_ARRAY_V4B8_CLAMP
4609 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4610 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4611 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4612 "\\{$r, $g, $b, $a\\};",
4614 def SUST_B_1D_ARRAY_V4B16_CLAMP
4616 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4617 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4618 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4619 "\\{$r, $g, $b, $a\\};",
4621 def SUST_B_1D_ARRAY_V4B32_CLAMP
4623 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4624 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4625 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4626 "\\{$r, $g, $b, $a\\};",
4630 def SUST_B_2D_B8_CLAMP
4632 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4633 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4635 def SUST_B_2D_B16_CLAMP
4637 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4638 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4640 def SUST_B_2D_B32_CLAMP
4642 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4643 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4645 def SUST_B_2D_B64_CLAMP
4647 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4648 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4650 def SUST_B_2D_V2B8_CLAMP
4652 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4654 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4656 def SUST_B_2D_V2B16_CLAMP
4658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4660 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4662 def SUST_B_2D_V2B32_CLAMP
4664 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4666 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4668 def SUST_B_2D_V2B64_CLAMP
4670 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4672 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4674 def SUST_B_2D_V4B8_CLAMP
4676 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4677 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4678 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4679 "\\{$r, $g, $b, $a\\};",
4681 def SUST_B_2D_V4B16_CLAMP
4683 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4684 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4685 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4686 "\\{$r, $g, $b, $a\\};",
4688 def SUST_B_2D_V4B32_CLAMP
4690 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4691 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4692 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4693 "\\{$r, $g, $b, $a\\};",
4697 def SUST_B_2D_ARRAY_B8_CLAMP
4699 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4701 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4703 def SUST_B_2D_ARRAY_B16_CLAMP
4705 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4707 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4709 def SUST_B_2D_ARRAY_B32_CLAMP
4711 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4713 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4715 def SUST_B_2D_ARRAY_B64_CLAMP
4717 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4719 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4721 def SUST_B_2D_ARRAY_V2B8_CLAMP
4723 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4724 Int16Regs:$r, Int16Regs:$g),
4725 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4728 def SUST_B_2D_ARRAY_V2B16_CLAMP
4730 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4731 Int16Regs:$r, Int16Regs:$g),
4732 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4735 def SUST_B_2D_ARRAY_V2B32_CLAMP
4737 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4738 Int32Regs:$r, Int32Regs:$g),
4739 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4742 def SUST_B_2D_ARRAY_V2B64_CLAMP
4744 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4745 Int64Regs:$r, Int64Regs:$g),
4746 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4749 def SUST_B_2D_ARRAY_V4B8_CLAMP
4751 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4752 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4753 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4754 "\\{$r, $g, $b, $a\\};",
4756 def SUST_B_2D_ARRAY_V4B16_CLAMP
4758 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4759 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4760 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4761 "\\{$r, $g, $b, $a\\};",
4763 def SUST_B_2D_ARRAY_V4B32_CLAMP
4765 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4766 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4767 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4768 "\\{$r, $g, $b, $a\\};",
4772 def SUST_B_3D_B8_CLAMP
4774 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4776 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4778 def SUST_B_3D_B16_CLAMP
4780 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4782 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4784 def SUST_B_3D_B32_CLAMP
4786 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4788 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4790 def SUST_B_3D_B64_CLAMP
4792 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4794 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4796 def SUST_B_3D_V2B8_CLAMP
4798 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4799 Int16Regs:$r, Int16Regs:$g),
4800 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4803 def SUST_B_3D_V2B16_CLAMP
4805 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4806 Int16Regs:$r, Int16Regs:$g),
4807 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4810 def SUST_B_3D_V2B32_CLAMP
4812 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4813 Int32Regs:$r, Int32Regs:$g),
4814 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4817 def SUST_B_3D_V2B64_CLAMP
4819 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4820 Int64Regs:$r, Int64Regs:$g),
4821 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4824 def SUST_B_3D_V4B8_CLAMP
4826 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4827 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4828 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4829 "\\{$r, $g, $b, $a\\};",
4831 def SUST_B_3D_V4B16_CLAMP
4833 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4834 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4835 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4836 "\\{$r, $g, $b, $a\\};",
4838 def SUST_B_3D_V4B32_CLAMP
4840 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4841 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4842 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4843 "\\{$r, $g, $b, $a\\};",
4848 def SUST_B_1D_B8_TRAP
4850 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4851 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4853 def SUST_B_1D_B16_TRAP
4855 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4856 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4858 def SUST_B_1D_B32_TRAP
4860 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4861 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4863 def SUST_B_1D_B64_TRAP
4865 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4866 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4868 def SUST_B_1D_V2B8_TRAP
4870 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4871 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4873 def SUST_B_1D_V2B16_TRAP
4875 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4876 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4878 def SUST_B_1D_V2B32_TRAP
4880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4881 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4883 def SUST_B_1D_V2B64_TRAP
4885 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4886 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4888 def SUST_B_1D_V4B8_TRAP
4890 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4891 Int16Regs:$b, Int16Regs:$a),
4892 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4894 def SUST_B_1D_V4B16_TRAP
4896 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4897 Int16Regs:$b, Int16Regs:$a),
4898 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4900 def SUST_B_1D_V4B32_TRAP
4902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4903 Int32Regs:$b, Int32Regs:$a),
4904 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4908 def SUST_B_1D_ARRAY_B8_TRAP
4910 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4911 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4913 def SUST_B_1D_ARRAY_B16_TRAP
4915 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4916 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4918 def SUST_B_1D_ARRAY_B32_TRAP
4920 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4921 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4923 def SUST_B_1D_ARRAY_B64_TRAP
4925 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4926 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4928 def SUST_B_1D_ARRAY_V2B8_TRAP
4930 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4932 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4934 def SUST_B_1D_ARRAY_V2B16_TRAP
4936 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4938 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4940 def SUST_B_1D_ARRAY_V2B32_TRAP
4942 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4944 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4946 def SUST_B_1D_ARRAY_V2B64_TRAP
4948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4950 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4952 def SUST_B_1D_ARRAY_V4B8_TRAP
4954 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4955 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4956 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
4957 "\\{$r, $g, $b, $a\\};",
4959 def SUST_B_1D_ARRAY_V4B16_TRAP
4961 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4962 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4963 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
4964 "\\{$r, $g, $b, $a\\};",
4966 def SUST_B_1D_ARRAY_V4B32_TRAP
4968 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4969 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4970 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
4971 "\\{$r, $g, $b, $a\\};",
4975 def SUST_B_2D_B8_TRAP
4977 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4978 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4980 def SUST_B_2D_B16_TRAP
4982 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4983 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4985 def SUST_B_2D_B32_TRAP
4987 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4988 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4990 def SUST_B_2D_B64_TRAP
4992 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4993 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4995 def SUST_B_2D_V2B8_TRAP
4997 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4999 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5001 def SUST_B_2D_V2B16_TRAP
5003 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5005 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5007 def SUST_B_2D_V2B32_TRAP
5009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5011 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5013 def SUST_B_2D_V2B64_TRAP
5015 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5017 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5019 def SUST_B_2D_V4B8_TRAP
5021 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5022 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5023 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5024 "\\{$r, $g, $b, $a\\};",
5026 def SUST_B_2D_V4B16_TRAP
5028 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5029 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5030 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5031 "\\{$r, $g, $b, $a\\};",
5033 def SUST_B_2D_V4B32_TRAP
5035 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5036 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5037 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5038 "\\{$r, $g, $b, $a\\};",
5042 def SUST_B_2D_ARRAY_B8_TRAP
5044 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5046 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5048 def SUST_B_2D_ARRAY_B16_TRAP
5050 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5052 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5054 def SUST_B_2D_ARRAY_B32_TRAP
5056 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5058 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5060 def SUST_B_2D_ARRAY_B64_TRAP
5062 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5064 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5066 def SUST_B_2D_ARRAY_V2B8_TRAP
5068 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5069 Int16Regs:$r, Int16Regs:$g),
5070 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5073 def SUST_B_2D_ARRAY_V2B16_TRAP
5075 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5076 Int16Regs:$r, Int16Regs:$g),
5077 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5080 def SUST_B_2D_ARRAY_V2B32_TRAP
5082 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5083 Int32Regs:$r, Int32Regs:$g),
5084 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5087 def SUST_B_2D_ARRAY_V2B64_TRAP
5089 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5090 Int64Regs:$r, Int64Regs:$g),
5091 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5094 def SUST_B_2D_ARRAY_V4B8_TRAP
5096 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5098 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5099 "\\{$r, $g, $b, $a\\};",
5101 def SUST_B_2D_ARRAY_V4B16_TRAP
5103 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5104 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5106 "\\{$r, $g, $b, $a\\};",
5108 def SUST_B_2D_ARRAY_V4B32_TRAP
5110 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5111 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5112 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5113 "\\{$r, $g, $b, $a\\};",
5117 def SUST_B_3D_B8_TRAP
5119 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5121 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5123 def SUST_B_3D_B16_TRAP
5125 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5127 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5129 def SUST_B_3D_B32_TRAP
5131 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5133 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5135 def SUST_B_3D_B64_TRAP
5137 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5139 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5141 def SUST_B_3D_V2B8_TRAP
5143 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5144 Int16Regs:$r, Int16Regs:$g),
5145 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5148 def SUST_B_3D_V2B16_TRAP
5150 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5151 Int16Regs:$r, Int16Regs:$g),
5152 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5155 def SUST_B_3D_V2B32_TRAP
5157 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5158 Int32Regs:$r, Int32Regs:$g),
5159 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5162 def SUST_B_3D_V2B64_TRAP
5164 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5165 Int64Regs:$r, Int64Regs:$g),
5166 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5169 def SUST_B_3D_V4B8_TRAP
5171 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5173 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5174 "\\{$r, $g, $b, $a\\};",
5176 def SUST_B_3D_V4B16_TRAP
5178 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5179 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5180 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5181 "\\{$r, $g, $b, $a\\};",
5183 def SUST_B_3D_V4B32_TRAP
5185 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5186 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5187 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5188 "\\{$r, $g, $b, $a\\};",
5193 def SUST_B_1D_B8_ZERO
5195 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5196 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5198 def SUST_B_1D_B16_ZERO
5200 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5201 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5203 def SUST_B_1D_B32_ZERO
5205 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5206 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5208 def SUST_B_1D_B64_ZERO
5210 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5211 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5213 def SUST_B_1D_V2B8_ZERO
5215 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5216 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5218 def SUST_B_1D_V2B16_ZERO
5220 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5221 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5223 def SUST_B_1D_V2B32_ZERO
5225 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5226 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5228 def SUST_B_1D_V2B64_ZERO
5230 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5231 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5233 def SUST_B_1D_V4B8_ZERO
5235 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5236 Int16Regs:$b, Int16Regs:$a),
5237 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5239 def SUST_B_1D_V4B16_ZERO
5241 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5242 Int16Regs:$b, Int16Regs:$a),
5243 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5245 def SUST_B_1D_V4B32_ZERO
5247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5248 Int32Regs:$b, Int32Regs:$a),
5249 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5253 def SUST_B_1D_ARRAY_B8_ZERO
5255 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5256 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5258 def SUST_B_1D_ARRAY_B16_ZERO
5260 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5261 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5263 def SUST_B_1D_ARRAY_B32_ZERO
5265 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5266 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5268 def SUST_B_1D_ARRAY_B64_ZERO
5270 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5271 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5273 def SUST_B_1D_ARRAY_V2B8_ZERO
5275 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5277 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5279 def SUST_B_1D_ARRAY_V2B16_ZERO
5281 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5283 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5285 def SUST_B_1D_ARRAY_V2B32_ZERO
5287 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5289 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5291 def SUST_B_1D_ARRAY_V2B64_ZERO
5293 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5295 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5297 def SUST_B_1D_ARRAY_V4B8_ZERO
5299 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5300 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5301 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5302 "\\{$r, $g, $b, $a\\};",
5304 def SUST_B_1D_ARRAY_V4B16_ZERO
5306 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5307 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5308 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5309 "\\{$r, $g, $b, $a\\};",
5311 def SUST_B_1D_ARRAY_V4B32_ZERO
5313 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5314 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5315 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5316 "\\{$r, $g, $b, $a\\};",
5320 def SUST_B_2D_B8_ZERO
5322 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5323 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5325 def SUST_B_2D_B16_ZERO
5327 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5328 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5330 def SUST_B_2D_B32_ZERO
5332 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5333 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5335 def SUST_B_2D_B64_ZERO
5337 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5338 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5340 def SUST_B_2D_V2B8_ZERO
5342 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5344 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5346 def SUST_B_2D_V2B16_ZERO
5348 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5350 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5352 def SUST_B_2D_V2B32_ZERO
5354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5356 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5358 def SUST_B_2D_V2B64_ZERO
5360 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5362 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5364 def SUST_B_2D_V4B8_ZERO
5366 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5367 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5368 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5369 "\\{$r, $g, $b, $a\\};",
5371 def SUST_B_2D_V4B16_ZERO
5373 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5374 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5375 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5376 "\\{$r, $g, $b, $a\\};",
5378 def SUST_B_2D_V4B32_ZERO
5380 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5381 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5382 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5383 "\\{$r, $g, $b, $a\\};",
5387 def SUST_B_2D_ARRAY_B8_ZERO
5389 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5391 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5393 def SUST_B_2D_ARRAY_B16_ZERO
5395 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5397 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5399 def SUST_B_2D_ARRAY_B32_ZERO
5401 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5403 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5405 def SUST_B_2D_ARRAY_B64_ZERO
5407 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5409 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5411 def SUST_B_2D_ARRAY_V2B8_ZERO
5413 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5414 Int16Regs:$r, Int16Regs:$g),
5415 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5418 def SUST_B_2D_ARRAY_V2B16_ZERO
5420 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5421 Int16Regs:$r, Int16Regs:$g),
5422 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5425 def SUST_B_2D_ARRAY_V2B32_ZERO
5427 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5428 Int32Regs:$r, Int32Regs:$g),
5429 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5432 def SUST_B_2D_ARRAY_V2B64_ZERO
5434 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5435 Int64Regs:$r, Int64Regs:$g),
5436 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5439 def SUST_B_2D_ARRAY_V4B8_ZERO
5441 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5442 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5443 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5444 "\\{$r, $g, $b, $a\\};",
5446 def SUST_B_2D_ARRAY_V4B16_ZERO
5448 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5449 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5450 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5451 "\\{$r, $g, $b, $a\\};",
5453 def SUST_B_2D_ARRAY_V4B32_ZERO
5455 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5456 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5457 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5458 "\\{$r, $g, $b, $a\\};",
5462 def SUST_B_3D_B8_ZERO
5464 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5466 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5468 def SUST_B_3D_B16_ZERO
5470 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5472 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5474 def SUST_B_3D_B32_ZERO
5476 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5478 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5480 def SUST_B_3D_B64_ZERO
5482 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5484 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5486 def SUST_B_3D_V2B8_ZERO
5488 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5489 Int16Regs:$r, Int16Regs:$g),
5490 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5493 def SUST_B_3D_V2B16_ZERO
5495 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5496 Int16Regs:$r, Int16Regs:$g),
5497 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5500 def SUST_B_3D_V2B32_ZERO
5502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5503 Int32Regs:$r, Int32Regs:$g),
5504 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5507 def SUST_B_3D_V2B64_ZERO
5509 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5510 Int64Regs:$r, Int64Regs:$g),
5511 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5514 def SUST_B_3D_V4B8_ZERO
5516 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5517 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5518 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5519 "\\{$r, $g, $b, $a\\};",
5521 def SUST_B_3D_V4B16_ZERO
5523 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5524 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5525 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5526 "\\{$r, $g, $b, $a\\};",
5528 def SUST_B_3D_V4B32_ZERO
5530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5532 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5533 "\\{$r, $g, $b, $a\\};",
5540 def SUST_P_1D_B8_TRAP
5542 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5543 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5545 def SUST_P_1D_B16_TRAP
5547 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5548 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5550 def SUST_P_1D_B32_TRAP
5552 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5553 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5555 def SUST_P_1D_V2B8_TRAP
5557 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5558 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5560 def SUST_P_1D_V2B16_TRAP
5562 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5563 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5565 def SUST_P_1D_V2B32_TRAP
5567 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5568 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5570 def SUST_P_1D_V4B8_TRAP
5572 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5573 Int16Regs:$b, Int16Regs:$a),
5574 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5576 def SUST_P_1D_V4B16_TRAP
5578 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5579 Int16Regs:$b, Int16Regs:$a),
5580 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5582 def SUST_P_1D_V4B32_TRAP
5584 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5585 Int32Regs:$b, Int32Regs:$a),
5586 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5590 def SUST_P_1D_ARRAY_B8_TRAP
5592 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5593 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5595 def SUST_P_1D_ARRAY_B16_TRAP
5597 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5598 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5600 def SUST_P_1D_ARRAY_B32_TRAP
5602 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5603 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5605 def SUST_P_1D_ARRAY_V2B8_TRAP
5607 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5609 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5611 def SUST_P_1D_ARRAY_V2B16_TRAP
5613 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5615 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5617 def SUST_P_1D_ARRAY_V2B32_TRAP
5619 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5621 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5623 def SUST_P_1D_ARRAY_V4B8_TRAP
5625 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5626 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5627 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5628 "\\{$r, $g, $b, $a\\};",
5630 def SUST_P_1D_ARRAY_V4B16_TRAP
5632 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5633 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5634 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5635 "\\{$r, $g, $b, $a\\};",
5637 def SUST_P_1D_ARRAY_V4B32_TRAP
5639 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5640 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5641 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5642 "\\{$r, $g, $b, $a\\};",
5646 def SUST_P_2D_B8_TRAP
5648 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5649 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5651 def SUST_P_2D_B16_TRAP
5653 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5654 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5656 def SUST_P_2D_B32_TRAP
5658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5659 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5661 def SUST_P_2D_V2B8_TRAP
5663 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5665 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5667 def SUST_P_2D_V2B16_TRAP
5669 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5671 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5673 def SUST_P_2D_V2B32_TRAP
5675 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5677 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5679 def SUST_P_2D_V4B8_TRAP
5681 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5682 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5683 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5684 "\\{$r, $g, $b, $a\\};",
5686 def SUST_P_2D_V4B16_TRAP
5688 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5689 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5690 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5691 "\\{$r, $g, $b, $a\\};",
5693 def SUST_P_2D_V4B32_TRAP
5695 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5696 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5697 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5698 "\\{$r, $g, $b, $a\\};",
5702 def SUST_P_2D_ARRAY_B8_TRAP
5704 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5706 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5708 def SUST_P_2D_ARRAY_B16_TRAP
5710 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5712 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5714 def SUST_P_2D_ARRAY_B32_TRAP
5716 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5718 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5720 def SUST_P_2D_ARRAY_V2B8_TRAP
5722 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5723 Int16Regs:$r, Int16Regs:$g),
5724 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5727 def SUST_P_2D_ARRAY_V2B16_TRAP
5729 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5730 Int16Regs:$r, Int16Regs:$g),
5731 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5734 def SUST_P_2D_ARRAY_V2B32_TRAP
5736 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5737 Int32Regs:$r, Int32Regs:$g),
5738 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5741 def SUST_P_2D_ARRAY_V4B8_TRAP
5743 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5744 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5745 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5746 "\\{$r, $g, $b, $a\\};",
5748 def SUST_P_2D_ARRAY_V4B16_TRAP
5750 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5751 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5752 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5753 "\\{$r, $g, $b, $a\\};",
5755 def SUST_P_2D_ARRAY_V4B32_TRAP
5757 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5758 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5759 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5760 "\\{$r, $g, $b, $a\\};",
5764 def SUST_P_3D_B8_TRAP
5766 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5768 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5770 def SUST_P_3D_B16_TRAP
5772 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5774 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5776 def SUST_P_3D_B32_TRAP
5778 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5780 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5782 def SUST_P_3D_V2B8_TRAP
5784 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5785 Int16Regs:$r, Int16Regs:$g),
5786 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5789 def SUST_P_3D_V2B16_TRAP
5791 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5792 Int16Regs:$r, Int16Regs:$g),
5793 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5796 def SUST_P_3D_V2B32_TRAP
5798 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5799 Int32Regs:$r, Int32Regs:$g),
5800 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5803 def SUST_P_3D_V4B8_TRAP
5805 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5806 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5807 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5808 "\\{$r, $g, $b, $a\\};",
5810 def SUST_P_3D_V4B16_TRAP
5812 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5813 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5814 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5815 "\\{$r, $g, $b, $a\\};",
5817 def SUST_P_3D_V4B32_TRAP
5819 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5820 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5821 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5822 "\\{$r, $g, $b, $a\\};",
5826 // Surface store instruction patterns
5827 // I'm not sure why we can't just include these in the instruction definitions,
5828 // but TableGen complains of type errors :(
5831 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5832 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5833 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5835 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5836 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5837 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5839 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5840 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5841 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5843 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5844 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5845 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5847 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5848 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5849 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5850 Int16Regs:$r, Int16Regs:$g)>;
5852 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5853 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5854 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5855 Int16Regs:$r, Int16Regs:$g)>;
5857 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5859 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5860 Int32Regs:$r, Int32Regs:$g)>;
5862 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5863 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5864 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5865 Int64Regs:$r, Int64Regs:$g)>;
5867 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5868 Int64Regs:$s, Int32Regs:$x,
5869 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5870 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5871 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5873 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5874 Int64Regs:$s, Int32Regs:$x,
5875 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5876 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5877 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5879 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5880 Int64Regs:$s, Int32Regs:$x,
5881 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5882 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5883 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5887 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5888 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5889 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5892 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5893 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5894 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5897 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5898 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5899 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5902 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5903 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5904 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5907 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5908 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5909 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5910 Int16Regs:$r, Int16Regs:$g)>;
5912 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5913 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5914 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5915 Int16Regs:$r, Int16Regs:$g)>;
5917 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5918 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5919 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5920 Int32Regs:$r, Int32Regs:$g)>;
5922 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5923 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5924 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5925 Int64Regs:$r, Int64Regs:$g)>;
5927 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5928 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5929 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5930 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5931 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5933 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5934 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5935 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5936 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5937 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5939 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5940 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5941 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5942 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5943 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5947 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5949 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5952 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5953 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5954 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5957 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5958 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5959 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5962 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5963 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5964 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5967 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5968 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5969 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5970 Int16Regs:$r, Int16Regs:$g)>;
5972 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5973 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5974 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5975 Int16Regs:$r, Int16Regs:$g)>;
5977 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5978 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5979 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5980 Int32Regs:$r, Int32Regs:$g)>;
5982 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5983 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5984 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5985 Int64Regs:$r, Int64Regs:$g)>;
5987 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5988 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5989 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5990 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5991 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5993 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5994 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5995 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5996 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5997 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5999 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6000 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6001 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6002 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6003 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6007 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6008 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6009 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6010 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6013 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6014 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6015 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6016 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6019 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6020 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6021 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6022 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6025 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6026 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6027 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6028 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6031 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6032 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6033 Int16Regs:$r, Int16Regs:$g),
6034 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6035 Int32Regs:$x, Int32Regs:$y,
6036 Int16Regs:$r, Int16Regs:$g)>;
6038 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6039 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6040 Int16Regs:$r, Int16Regs:$g),
6041 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6042 Int32Regs:$x, Int32Regs:$y,
6043 Int16Regs:$r, Int16Regs:$g)>;
6045 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6046 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6048 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6049 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6051 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6052 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6054 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6055 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6057 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6058 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6059 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6060 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6061 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6062 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6064 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6065 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6066 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6067 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6068 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6069 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6071 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6072 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6073 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6074 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6075 Int32Regs:$x, Int32Regs:$y,
6076 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6080 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6081 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6083 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6084 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6087 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6088 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6090 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6091 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6094 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6095 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6097 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6098 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6101 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6102 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6104 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6105 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6108 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6109 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6110 Int16Regs:$r, Int16Regs:$g),
6111 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6112 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6113 Int16Regs:$r, Int16Regs:$g)>;
6115 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6116 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6117 Int16Regs:$r, Int16Regs:$g),
6118 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6119 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6120 Int16Regs:$r, Int16Regs:$g)>;
6122 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6123 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6124 Int32Regs:$r, Int32Regs:$g),
6125 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6126 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6127 Int32Regs:$r, Int32Regs:$g)>;
6129 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6130 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6131 Int64Regs:$r, Int64Regs:$g),
6132 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6133 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6134 Int64Regs:$r, Int64Regs:$g)>;
6136 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6137 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6138 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6139 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6140 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6141 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6143 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6144 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6145 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6146 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6147 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6148 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6150 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6151 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6152 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6153 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6154 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6155 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6159 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6160 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6161 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6163 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6164 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6165 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6167 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6168 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6169 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6171 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6172 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6173 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6175 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6176 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6177 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6178 Int16Regs:$r, Int16Regs:$g)>;
6180 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6181 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6182 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6183 Int16Regs:$r, Int16Regs:$g)>;
6185 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6186 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6187 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6188 Int32Regs:$r, Int32Regs:$g)>;
6190 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6191 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6192 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6193 Int64Regs:$r, Int64Regs:$g)>;
6195 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6196 Int64Regs:$s, Int32Regs:$x,
6197 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6198 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6201 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6202 Int64Regs:$s, Int32Regs:$x,
6203 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6204 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6205 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6207 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6208 Int64Regs:$s, Int32Regs:$x,
6209 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6210 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6211 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6215 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6216 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6217 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6220 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6221 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6222 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6225 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6226 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6227 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6230 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6231 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6232 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6235 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6236 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6237 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6238 Int16Regs:$r, Int16Regs:$g)>;
6240 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6241 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6242 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6243 Int16Regs:$r, Int16Regs:$g)>;
6245 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6246 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6247 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6248 Int32Regs:$r, Int32Regs:$g)>;
6250 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6251 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6252 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6253 Int64Regs:$r, Int64Regs:$g)>;
6255 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6256 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6257 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6258 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6259 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6261 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6262 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6264 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6265 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6267 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6268 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6269 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6270 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6271 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6275 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6276 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6277 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6280 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6281 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6282 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6285 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6286 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6287 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6290 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6291 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6292 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6295 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6296 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6297 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6298 Int16Regs:$r, Int16Regs:$g)>;
6300 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6301 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6302 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6303 Int16Regs:$r, Int16Regs:$g)>;
6305 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6306 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6307 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6308 Int32Regs:$r, Int32Regs:$g)>;
6310 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6312 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6313 Int64Regs:$r, Int64Regs:$g)>;
6315 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6316 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6317 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6318 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6319 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6321 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6322 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6323 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6324 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6325 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6327 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6328 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6329 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6330 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6331 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6335 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6336 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6337 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6338 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6341 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6342 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6343 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6344 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6347 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6348 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6349 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6350 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6353 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6354 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6355 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6356 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6359 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6360 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6361 Int16Regs:$r, Int16Regs:$g),
6362 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6363 Int32Regs:$x, Int32Regs:$y,
6364 Int16Regs:$r, Int16Regs:$g)>;
6366 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6367 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6368 Int16Regs:$r, Int16Regs:$g),
6369 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6370 Int32Regs:$x, Int32Regs:$y,
6371 Int16Regs:$r, Int16Regs:$g)>;
6373 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6374 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6376 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6377 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6379 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6380 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6382 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6383 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6385 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6386 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6387 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6388 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6389 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6390 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6392 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6393 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6394 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6395 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6396 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6397 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6399 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6400 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6401 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6402 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6403 Int32Regs:$x, Int32Regs:$y,
6404 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6408 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6409 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6411 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6412 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6415 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6416 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6418 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6419 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6422 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6423 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6425 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6426 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6429 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6430 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6432 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6433 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6436 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6437 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6438 Int16Regs:$r, Int16Regs:$g),
6439 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6440 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6441 Int16Regs:$r, Int16Regs:$g)>;
6443 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6444 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6445 Int16Regs:$r, Int16Regs:$g),
6446 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6447 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6448 Int16Regs:$r, Int16Regs:$g)>;
6450 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6451 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6452 Int32Regs:$r, Int32Regs:$g),
6453 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6454 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6455 Int32Regs:$r, Int32Regs:$g)>;
6457 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6458 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6459 Int64Regs:$r, Int64Regs:$g),
6460 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6461 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6462 Int64Regs:$r, Int64Regs:$g)>;
6464 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6465 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6466 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6467 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6468 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6469 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6471 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6472 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6473 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6474 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6475 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6476 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6478 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6479 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6480 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6481 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6482 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6483 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6487 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6488 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6489 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6491 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6492 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6493 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6495 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6496 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6497 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6499 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6500 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6501 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6503 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6504 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6505 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6506 Int16Regs:$r, Int16Regs:$g)>;
6508 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6509 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6510 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6511 Int16Regs:$r, Int16Regs:$g)>;
6513 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6514 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6515 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6516 Int32Regs:$r, Int32Regs:$g)>;
6518 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6519 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6520 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6521 Int64Regs:$r, Int64Regs:$g)>;
6523 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6524 Int64Regs:$s, Int32Regs:$x,
6525 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6526 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6529 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6530 Int64Regs:$s, Int32Regs:$x,
6531 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6532 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6533 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6535 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6536 Int64Regs:$s, Int32Regs:$x,
6537 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6538 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6539 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6543 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6544 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6545 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6548 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6549 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6550 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6553 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6554 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6555 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6558 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6559 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6560 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6563 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6564 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6565 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6566 Int16Regs:$r, Int16Regs:$g)>;
6568 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6569 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6570 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6571 Int16Regs:$r, Int16Regs:$g)>;
6573 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6574 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6575 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6576 Int32Regs:$r, Int32Regs:$g)>;
6578 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6579 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6580 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6581 Int64Regs:$r, Int64Regs:$g)>;
6583 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6584 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6585 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6586 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6587 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6589 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6590 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6592 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6593 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6595 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6596 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6597 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6598 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6599 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6603 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6604 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6605 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6608 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6609 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6610 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6613 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6614 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6615 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6618 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6619 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6620 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6623 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6624 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6625 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6626 Int16Regs:$r, Int16Regs:$g)>;
6628 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6629 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6630 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6631 Int16Regs:$r, Int16Regs:$g)>;
6633 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6634 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6635 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6636 Int32Regs:$r, Int32Regs:$g)>;
6638 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6639 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6640 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6641 Int64Regs:$r, Int64Regs:$g)>;
6643 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6644 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6645 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6646 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6647 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6649 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6650 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6651 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6652 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6653 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6655 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6656 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6657 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6658 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6659 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6663 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6664 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6665 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6666 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6669 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6670 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6671 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6672 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6675 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6676 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6677 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6678 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6681 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6682 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6683 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6684 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6687 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6688 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6689 Int16Regs:$r, Int16Regs:$g),
6690 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6691 Int32Regs:$x, Int32Regs:$y,
6692 Int16Regs:$r, Int16Regs:$g)>;
6694 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6695 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6696 Int16Regs:$r, Int16Regs:$g),
6697 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6698 Int32Regs:$x, Int32Regs:$y,
6699 Int16Regs:$r, Int16Regs:$g)>;
6701 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6702 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6704 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6705 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6707 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6708 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6710 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6711 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6713 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6714 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6715 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6716 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6717 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6718 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6720 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6721 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6722 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6723 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6724 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6725 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6727 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6728 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6729 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6730 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6731 Int32Regs:$x, Int32Regs:$y,
6732 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6736 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6737 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6739 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6740 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6743 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6744 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6746 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6747 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6750 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6751 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6753 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6754 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6757 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6758 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6760 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6761 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6764 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6765 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6766 Int16Regs:$r, Int16Regs:$g),
6767 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6768 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6769 Int16Regs:$r, Int16Regs:$g)>;
6771 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6772 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6773 Int16Regs:$r, Int16Regs:$g),
6774 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6775 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6776 Int16Regs:$r, Int16Regs:$g)>;
6778 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6779 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6780 Int32Regs:$r, Int32Regs:$g),
6781 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6782 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6783 Int32Regs:$r, Int32Regs:$g)>;
6785 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6786 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6787 Int64Regs:$r, Int64Regs:$g),
6788 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6789 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6790 Int64Regs:$r, Int64Regs:$g)>;
6792 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6793 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6794 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6795 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6796 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6797 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6799 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6800 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6801 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6802 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6803 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6804 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6806 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6807 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6808 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6809 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6810 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6811 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6816 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6817 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6818 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6820 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6821 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6822 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6824 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6825 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6826 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6828 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6829 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6830 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6831 Int16Regs:$r, Int16Regs:$g)>;
6833 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6834 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6835 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6836 Int16Regs:$r, Int16Regs:$g)>;
6838 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6839 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6840 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6841 Int32Regs:$r, Int32Regs:$g)>;
6843 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6844 Int64Regs:$s, Int32Regs:$x,
6845 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6846 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6847 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6849 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6850 Int64Regs:$s, Int32Regs:$x,
6851 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6852 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6853 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6855 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6856 Int64Regs:$s, Int32Regs:$x,
6857 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6858 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6859 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6863 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6864 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6865 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6868 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6869 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6870 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6873 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6875 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6878 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6879 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6880 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6881 Int16Regs:$r, Int16Regs:$g)>;
6883 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6884 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6885 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6886 Int16Regs:$r, Int16Regs:$g)>;
6888 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6889 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6890 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6891 Int32Regs:$r, Int32Regs:$g)>;
6893 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6894 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6895 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6896 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6897 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6899 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6900 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6901 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6902 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6903 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6905 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6906 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6907 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6908 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6909 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6913 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6914 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6915 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6918 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6919 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6920 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6923 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6924 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6925 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6928 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6929 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6930 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6931 Int16Regs:$r, Int16Regs:$g)>;
6933 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6934 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6935 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6936 Int16Regs:$r, Int16Regs:$g)>;
6938 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6939 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6940 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6941 Int32Regs:$r, Int32Regs:$g)>;
6943 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6944 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6945 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6946 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6947 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6949 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6950 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6951 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6952 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6953 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6955 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6956 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6957 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6958 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6959 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6963 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6964 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6965 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
6966 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6969 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6970 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6971 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
6972 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6975 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6976 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6977 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
6978 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6981 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6982 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6983 Int16Regs:$r, Int16Regs:$g),
6984 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6985 Int32Regs:$x, Int32Regs:$y,
6986 Int16Regs:$r, Int16Regs:$g)>;
6988 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6989 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6990 Int16Regs:$r, Int16Regs:$g),
6991 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6992 Int32Regs:$x, Int32Regs:$y,
6993 Int16Regs:$r, Int16Regs:$g)>;
6995 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6996 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6998 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6999 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7001 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7002 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7003 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7004 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7005 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7006 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7008 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7009 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7011 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7012 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7015 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7016 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7017 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7018 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7019 Int32Regs:$x, Int32Regs:$y,
7020 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7024 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7025 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7027 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7028 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7031 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7032 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7034 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7035 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7038 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7039 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7041 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7042 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7045 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7046 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7047 Int16Regs:$r, Int16Regs:$g),
7048 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7049 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7050 Int16Regs:$r, Int16Regs:$g)>;
7052 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7053 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7054 Int16Regs:$r, Int16Regs:$g),
7055 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7056 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7057 Int16Regs:$r, Int16Regs:$g)>;
7059 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7060 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7061 Int32Regs:$r, Int32Regs:$g),
7062 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7063 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7064 Int32Regs:$r, Int32Regs:$g)>;
7066 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7067 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7068 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7069 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7070 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7071 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7073 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7074 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7075 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7076 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7077 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7078 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7080 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7081 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7082 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7083 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7084 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7087 //-----------------------------------
7088 // Read Special Registers
7089 //-----------------------------------
7091 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7092 : NVPTXInst<(outs Int64Regs:$d), (ins),
7093 !strconcat("mov.u64 \t$d, %", regname, ";"),
7094 [(set Int64Regs:$d, (intop))]>;
7096 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7097 : NVPTXInst<(outs Int32Regs:$d), (ins),
7098 !strconcat("mov.u32 \t$d, %", regname, ";"),
7099 [(set Int32Regs:$d, (intop))]>;
7101 // TODO Add read vector-version of special registers
7103 def INT_PTX_SREG_TID_X :
7104 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7105 def INT_PTX_SREG_TID_Y :
7106 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7107 def INT_PTX_SREG_TID_Z :
7108 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7109 def INT_PTX_SREG_TID_W :
7110 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7112 def INT_PTX_SREG_NTID_X :
7113 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7114 def INT_PTX_SREG_NTID_Y :
7115 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7116 def INT_PTX_SREG_NTID_Z :
7117 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7118 def INT_PTX_SREG_NTID_W :
7119 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7121 def INT_PTX_SREG_LANEID :
7122 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7123 def INT_PTX_SREG_WARPID :
7124 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7125 def INT_PTX_SREG_NWARPID :
7126 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7128 def INT_PTX_SREG_CTAID_X :
7129 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7130 def INT_PTX_SREG_CTAID_Y :
7131 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7132 def INT_PTX_SREG_CTAID_Z :
7133 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7134 def INT_PTX_SREG_CTAID_W :
7135 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7137 def INT_PTX_SREG_NCTAID_X :
7138 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7139 def INT_PTX_SREG_NCTAID_Y :
7140 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7141 def INT_PTX_SREG_NCTAID_Z :
7142 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7143 def INT_PTX_SREG_NCTAID_W :
7144 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7146 def INT_PTX_SREG_SMID :
7147 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7148 def INT_PTX_SREG_NSMID :
7149 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7150 def INT_PTX_SREG_GRIDID :
7151 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7153 def INT_PTX_SREG_LANEMASK_EQ :
7154 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7155 def INT_PTX_SREG_LANEMASK_LE :
7156 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7157 def INT_PTX_SREG_LANEMASK_LT :
7158 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7159 def INT_PTX_SREG_LANEMASK_GE :
7160 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7161 def INT_PTX_SREG_LANEMASK_GT :
7162 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7164 def INT_PTX_SREG_CLOCK :
7165 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7166 def INT_PTX_SREG_CLOCK64 :
7167 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7169 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7170 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7171 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7172 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7174 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7175 // handle the constant.
7176 def INT_PTX_SREG_WARPSIZE :
7177 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7178 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;