]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r303197, and update
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 def immFloat0 : PatLeaf<(fpimm), [{
11     float f = (float)N->getValueAPF().convertToFloat();
12     return (f==0.0f);
13 }]>;
14
15 def immFloat1 : PatLeaf<(fpimm), [{
16     float f = (float)N->getValueAPF().convertToFloat();
17     return (f==1.0f);
18 }]>;
19
20 def immDouble0 : PatLeaf<(fpimm), [{
21     double d = (double)N->getValueAPF().convertToDouble();
22     return (d==0.0);
23 }]>;
24
25 def immDouble1 : PatLeaf<(fpimm), [{
26     double d = (double)N->getValueAPF().convertToDouble();
27     return (d==1.0);
28 }]>;
29
30
31
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
37                   "bar.sync \t0;",
38       [(int_nvvm_barrier0)]>;
39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
40                   "bar.sync \t$src1;",
41       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
43                   "bar.sync \t$src1, $src2;",
44       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
46   !strconcat("{{ \n\t",
47              ".reg .pred \t%p1; \n\t",
48              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
49              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
50              "}}"),
51       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
53   !strconcat("{{ \n\t",
54              ".reg .pred \t%p1; \n\t",
55              ".reg .pred \t%p2; \n\t",
56              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
57              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
58              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
59              "}}"),
60       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
62   !strconcat("{{ \n\t",
63              ".reg .pred \t%p1; \n\t",
64              ".reg .pred \t%p2; \n\t",
65              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
66              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
67              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
68              "}}"),
69       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
70
71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
72                              [(int_nvvm_bar_sync imm:$i)]>;
73
74 // shfl.{up,down,bfly,idx}.b32
75 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
76   // The last two parameters to shfl can be regs or imms.  ptxas is smart
77   // enough to inline constant registers, so strictly speaking we don't need to
78   // handle immediates here.  But it's easy enough, and it makes our ptx more
79   // readable.
80   def reg : NVPTXInst<
81       (outs regclass:$dst),
82       (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
83       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
84       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
85
86   def imm1 : NVPTXInst<
87       (outs regclass:$dst),
88       (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
89       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
90       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
91
92   def imm2 : NVPTXInst<
93       (outs regclass:$dst),
94       (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
95       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
96       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
97
98   def imm3 : NVPTXInst<
99       (outs regclass:$dst),
100       (ins regclass:$src, i32imm:$offset, i32imm:$mask),
101       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
102       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
103 }
104
105 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
106 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
107 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
108 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
109 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
110 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
111 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
112 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
113
114 } // isConvergent = 1
115
116
117 //-----------------------------------
118 // Explicit Memory Fence Functions
119 //-----------------------------------
120 class MEMBAR<string StrOp, Intrinsic IntOP> :
121               NVPTXInst<(outs), (ins),
122             StrOp, [(IntOP)]>;
123
124 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
125 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
126 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
127
128
129 //-----------------------------------
130 // Math Functions
131 //-----------------------------------
132
133 // Map min(1.0, max(0.0, x)) to sat(x)
134 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
135 // NaN
136 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
137 // Same story for fmax, fmin.
138
139 def : Pat<(int_nvvm_fmin_f immFloat1,
140             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
141           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
142 def : Pat<(int_nvvm_fmin_f immFloat1,
143             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
144           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
145 def : Pat<(int_nvvm_fmin_f
146             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
147           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
148 def : Pat<(int_nvvm_fmin_f
149             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
150           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
151
152 def : Pat<(int_nvvm_fmin_d immDouble1,
153             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
154           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
155 def : Pat<(int_nvvm_fmin_d immDouble1,
156             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
157           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
158 def : Pat<(int_nvvm_fmin_d
159             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
160           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
161 def : Pat<(int_nvvm_fmin_d
162             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
163           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
164
165
166 // We need a full string for OpcStr here because we need to deal with case like
167 // INT_PTX_RECIP.
168 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
169   NVPTXRegClass src_regclass, Intrinsic IntOP>
170             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
171             OpcStr,
172         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
173
174 // We need a full string for OpcStr here because we need to deal with the case
175 // like INT_PTX_NATIVE_POWR_F.
176 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
177   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
178             : NVPTXInst<(outs t_regclass:$dst),
179               (ins s0_regclass:$src0, s1_regclass:$src1),
180             OpcStr,
181         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
182
183 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
184   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
185   NVPTXRegClass s2_regclass, Intrinsic IntOP>
186             : NVPTXInst<(outs t_regclass:$dst),
187               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
188             OpcStr,
189         [(set t_regclass:$dst,
190           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
191
192 //
193 // MISC
194 //
195
196 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
197   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
198
199 //
200 // Min Max
201 //
202
203 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
204   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
205 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
206   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
207
208 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
209   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
210 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
211   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
212
213 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
214   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
215 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
216   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
217
218
219 //
220 // Multiplication
221 //
222
223 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
224   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
225 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
226   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
227
228 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
229   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
230 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
231   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
232
233 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
234   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
235 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
236   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
237 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
238   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
239 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
240   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
241 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
242   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
243 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
244   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
245 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
246   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
247 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
248   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
249
250 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
251   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
252 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
253   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
254 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
255   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
256 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
257   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
258
259 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
260   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
261 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
262   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
263
264 //
265 // Div
266 //
267
268 def INT_NVVM_DIV_APPROX_FTZ_F
269   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
270     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
271 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
272   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
273
274 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
275   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
276 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
277   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
278 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
279   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
280 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
281   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
282 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
283   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
284 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
285   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
286 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
287   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
288 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
289   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
290
291 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
292   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
293 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
294   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
295 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
296   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
297 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
298   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
299
300 //
301 // Sad
302 //
303
304 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
305   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
306 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
307   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
308
309 //
310 // Floor  Ceil
311 //
312
313 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
314           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
315 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
316           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
317 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
318           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
319
320 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
321           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
322 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
323           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
324 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
325           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
326
327 //
328 // Abs
329 //
330
331 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
332   Float32Regs, int_nvvm_fabs_ftz_f>;
333 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
334   Float32Regs, int_nvvm_fabs_f>;
335
336 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
337   Float64Regs, int_nvvm_fabs_d>;
338
339 //
340 // Round
341 //
342
343 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
344           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
345 def : Pat<(int_nvvm_round_f Float32Regs:$a),
346           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
347 def : Pat<(int_nvvm_round_d Float64Regs:$a),
348           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
349
350 //
351 // Trunc
352 //
353
354 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
355           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
356 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
357           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
358 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
359           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
360
361 //
362 // Saturate
363 //
364
365 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
366           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
367 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
368           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
369 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
370           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
371
372 //
373 // Exp2  Log2
374 //
375
376 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
377   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
378 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
379   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
380 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
381   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
382
383 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
384   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
385 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
386   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
387 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
388   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
389
390 //
391 // Sin  Cos
392 //
393
394 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
395   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
396 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
397   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
398
399 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
400   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
401 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
402   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
403
404 //
405 // Fma
406 //
407
408 def INT_NVVM_FMA_RN_FTZ_F
409   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
410     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
411 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
412   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
413 def INT_NVVM_FMA_RZ_FTZ_F
414   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
415     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
416 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
417   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
418 def INT_NVVM_FMA_RM_FTZ_F
419   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
420     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
421 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
422   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
423 def INT_NVVM_FMA_RP_FTZ_F
424   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
425     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
426 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
427   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
428
429 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
430   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
431 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
432   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
433 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
434   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
435 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
436   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
437
438 //
439 // Rcp
440 //
441
442 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
443   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
444 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
445   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
446 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
447   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
448 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
449   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
450 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
451   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
452 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
453   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
454 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
455   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
456 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
457   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
458
459 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
460   Float64Regs, int_nvvm_rcp_rn_d>;
461 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
462   Float64Regs, int_nvvm_rcp_rz_d>;
463 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
464   Float64Regs, int_nvvm_rcp_rm_d>;
465 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
466   Float64Regs, int_nvvm_rcp_rp_d>;
467
468 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
469   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
470
471 //
472 // Sqrt
473 //
474
475 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
476   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
477 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
478   Float32Regs, int_nvvm_sqrt_rn_f>;
479 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
480   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
481 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
482   Float32Regs, int_nvvm_sqrt_rz_f>;
483 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
484   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
485 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
486   Float32Regs, int_nvvm_sqrt_rm_f>;
487 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
488   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
489 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
490   Float32Regs, int_nvvm_sqrt_rp_f>;
491 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
492   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
493 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
494   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
495
496 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
497   Float64Regs, int_nvvm_sqrt_rn_d>;
498 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
499   Float64Regs, int_nvvm_sqrt_rz_d>;
500 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
501   Float64Regs, int_nvvm_sqrt_rm_d>;
502 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
503   Float64Regs, int_nvvm_sqrt_rp_d>;
504
505 // nvvm_sqrt intrinsic
506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
507           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
508 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
509           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
510 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
511           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
512 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
513           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
514
515 //
516 // Rsqrt
517 //
518
519 def INT_NVVM_RSQRT_APPROX_FTZ_F
520   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
521     int_nvvm_rsqrt_approx_ftz_f>;
522 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
523   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
524 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
525   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
526
527 //
528 // Add
529 //
530
531 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
532   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
533 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
534   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
535 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
536   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
537 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
538   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
539 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
540   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
541 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
542   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
543 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
544   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
545 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
546   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
547
548 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
549   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
550 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
551   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
552 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
553   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
554 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
555   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
556
557 //
558 // Convert
559 //
560
561 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
562           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
563 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
564           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
565 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
566           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
567 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
568           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
569 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
570           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
571 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
572           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
573 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
574           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
575 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
576           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
577
578 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
579           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
580 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
581           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
582 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
583           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
584 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
585           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
586
587 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
588           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
589 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
590           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
591 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
592           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
593 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
594           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
595
596 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
597           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
598 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
599           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
600 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
601           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
602 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
603           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
604
605 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
606           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
607 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
608           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
609 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
610           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
611 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
612           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
613
614 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
615           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
616 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
617           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
618 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
619           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
620 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
621           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
622 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
623           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
624 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
625           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
626 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
627           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
628 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
629           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
630
631 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
632           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
633 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
634           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
635 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
636           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
637 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
638           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
639 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
640           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
641 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
642           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
643 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
644           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
645 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
646           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
647
648 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
649           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
650 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
651           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
652 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
653           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
654 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
655           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
656
657 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
658           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
659 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
660           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
661 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
662           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
663 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
664           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
665
666 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
667   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
668
669 def INT_NVVM_D2I_LO : F_MATH_1<
670   !strconcat("{{\n\t",
671              ".reg .b32 %temp; \n\t",
672              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
673              "}}"),
674   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
675 def INT_NVVM_D2I_HI : F_MATH_1<
676   !strconcat("{{\n\t",
677              ".reg .b32 %temp; \n\t",
678              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
679              "}}"),
680   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
681
682 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
683           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
684 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
685           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
686 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
687           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
688 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
689           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
690 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
691           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
692 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
693           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
694 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
695           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
696 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
697           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
698
699 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
700           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
701 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
702           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
703 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
704           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
705 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
706           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
707 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
708           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
709 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
710           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
711 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
712           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
713 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
714           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
715
716 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
717           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
718 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
719           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
720 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
721           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
722 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
723           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
724
725 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
726           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
727 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
728           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
729 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
730           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
731 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
732           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
733
734 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
735           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
736 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
737           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
738 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
739           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
740 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
741           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
742
743 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
744           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
745 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
746           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
747 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
748           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
749 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
750           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
751
752 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
753           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
754 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
755           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
756 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
757           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
758 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
759           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
760
761 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
762           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
763 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
764           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
765 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
766           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
767 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
768           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
769
770
771 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
772           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
773 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
774           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
775
776 //
777 // Bitcast
778 //
779
780 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
781   Float32Regs, int_nvvm_bitcast_f2i>;
782 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
783   Int32Regs, int_nvvm_bitcast_i2f>;
784
785 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
786   Int64Regs, int_nvvm_bitcast_ll2d>;
787 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
788   Float64Regs, int_nvvm_bitcast_d2ll>;
789
790 //-----------------------------------
791 // Atomic Functions
792 //-----------------------------------
793
794 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
795  : PatFrag<ops, frag, [{
796    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
797 }]>;
798 class ATOMIC_SHARED_CHK <dag ops, dag frag>
799  : PatFrag<ops, frag, [{
800    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
801 }]>;
802 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
803  : PatFrag<ops, frag, [{
804    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
805 }]>;
806
807 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
808   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
809   Operand IMMType, SDNode IMM, Predicate Pred> {
810   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
811     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
812     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
813   Requires<[Pred]>;
814   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
815     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
816     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
817   Requires<[Pred]>;
818 }
819 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
820   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
821   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
822     IntOp, IMMType, IMM, Pred>;
823   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
824     IntOp, IMMType, IMM, Pred>;
825 }
826
827 // has 2 operands, neg the second one
828 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
829   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
830   Operand IMMType, Predicate Pred> {
831   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
832     !strconcat(
833       "{{ \n\t",
834       ".reg \t.s", TypeStr, " temp; \n\t",
835       "neg.s", TypeStr, " \ttemp, $b; \n\t",
836       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
837       "}}"),
838     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
839   Requires<[Pred]>;
840 }
841 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
842   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
843   Predicate Pred> {
844  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
845    IntOp, IMMType, Pred> ;
846  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
847    IntOp, IMMType, Pred> ;
848 }
849
850 // has 3 operands
851 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
852   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
853   Operand IMMType, Predicate Pred> {
854   def reg : NVPTXInst<(outs regclass:$dst),
855     (ins ptrclass:$addr, regclass:$b, regclass:$c),
856     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
857     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
858   Requires<[Pred]>;
859
860   def imm1 : NVPTXInst<(outs regclass:$dst),
861     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
862     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
863     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
864   Requires<[Pred]>;
865
866   def imm2 : NVPTXInst<(outs regclass:$dst),
867     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
868     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
869     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
870   Requires<[Pred]>;
871
872   def imm3 : NVPTXInst<(outs regclass:$dst),
873     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
874     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
875     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
876   Requires<[Pred]>;
877 }
878 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
879   string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
880   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
881     IntOp, IMMType, Pred>;
882   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
883     IntOp, IMMType, Pred>;
884 }
885
886 // atom_add
887
888 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
889   (atomic_load_add_32 node:$a, node:$b)>;
890 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
891   (atomic_load_add_32 node:$a, node:$b)>;
892 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
893   (atomic_load_add_32 node:$a, node:$b)>;
894 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
895   (atomic_load_add_64 node:$a, node:$b)>;
896 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
897   (atomic_load_add_64 node:$a, node:$b)>;
898 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
899   (atomic_load_add_64 node:$a, node:$b)>;
900 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
901   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
902 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
903   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
904 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
905   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
906
907 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
908   atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
909 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
910   atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
911 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
912   atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
913 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
914   ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
915
916 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
917   atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
918 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
919   atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
920 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
921   atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
922 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
923   ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
924
925 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
926   atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
927 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
928   atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
929 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
930   atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
931
932 // atom_sub
933
934 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
935   (atomic_load_sub_32 node:$a, node:$b)>;
936 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
937   (atomic_load_sub_32 node:$a, node:$b)>;
938 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
939   (atomic_load_sub_32 node:$a, node:$b)>;
940 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
941   (atomic_load_sub_64 node:$a, node:$b)>;
942 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
943   (atomic_load_sub_64 node:$a, node:$b)>;
944 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
945   (atomic_load_sub_64 node:$a, node:$b)>;
946
947 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
948   atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
949 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
950   atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
951 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
952   atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
953 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
954   ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
955 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
956   atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
957 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
958   atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
959 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
960   atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
961 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
962   ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
963
964 // atom_swap
965
966 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
967   (atomic_swap_32 node:$a, node:$b)>;
968 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
969   (atomic_swap_32 node:$a, node:$b)>;
970 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
971   (atomic_swap_32 node:$a, node:$b)>;
972 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
973   (atomic_swap_64 node:$a, node:$b)>;
974 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
975   (atomic_swap_64 node:$a, node:$b)>;
976 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
977   (atomic_swap_64 node:$a, node:$b)>;
978
979 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
980   atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
981 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
982   atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
983 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
984   atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
985 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
986   ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
987 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
988   atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
989 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
990   atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
991 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
992   atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
993 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
994   ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
995
996 // atom_max
997
998 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
999   , (atomic_load_max_32 node:$a, node:$b)>;
1000 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1001   (atomic_load_max_32 node:$a, node:$b)>;
1002 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1003   (atomic_load_max_32 node:$a, node:$b)>;
1004 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1005   , (atomic_load_max_64 node:$a, node:$b)>;
1006 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1007   (atomic_load_max_64 node:$a, node:$b)>;
1008 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1009   (atomic_load_max_64 node:$a, node:$b)>;
1010 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1011   (atomic_load_umax_32 node:$a, node:$b)>;
1012 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1013   (atomic_load_umax_32 node:$a, node:$b)>;
1014 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1015   (atomic_load_umax_32 node:$a, node:$b)>;
1016 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1017   (atomic_load_umax_64 node:$a, node:$b)>;
1018 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1019   (atomic_load_umax_64 node:$a, node:$b)>;
1020 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1021   (atomic_load_umax_64 node:$a, node:$b)>;
1022
1023 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1024   ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1025 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1026   ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1027 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1028   atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1029 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1030   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1031 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1032   ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1033 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1034   ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1035 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1036   atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1037 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1038   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1039 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1040   ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1041 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1042   ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1043 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1044   atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1045 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1046   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1047 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1048   ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1049 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1050   ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1051 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1052   atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1053 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1054   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1055
1056 // atom_min
1057
1058 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1059   (atomic_load_min_32 node:$a, node:$b)>;
1060 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1061   (atomic_load_min_32 node:$a, node:$b)>;
1062 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1063   (atomic_load_min_32 node:$a, node:$b)>;
1064 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1065   (atomic_load_min_64 node:$a, node:$b)>;
1066 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1067   (atomic_load_min_64 node:$a, node:$b)>;
1068 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1069   (atomic_load_min_64 node:$a, node:$b)>;
1070 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1071   (atomic_load_umin_32 node:$a, node:$b)>;
1072 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1073   (atomic_load_umin_32 node:$a, node:$b)>;
1074 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1075   (atomic_load_umin_32 node:$a, node:$b)>;
1076 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1077   (atomic_load_umin_64 node:$a, node:$b)>;
1078 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1079   (atomic_load_umin_64 node:$a, node:$b)>;
1080 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1081   (atomic_load_umin_64 node:$a, node:$b)>;
1082
1083 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1084   ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1085 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1086   ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1087 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1088   atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1089 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1090   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1091 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1092   ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1093 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1094   ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1095 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1096   atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1097 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1098   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1099 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1100   ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1101 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1102   ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1103 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1104   atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1105 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1106   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1107 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1108   ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1109 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1110   ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1111 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1112   atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1113 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1114   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1115
1116 // atom_inc  atom_dec
1117
1118 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1119   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1120 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1121   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1122 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1123   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1124 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1125   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1126 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1127   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1128 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1129   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1130
1131 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1132   atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1133 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1134   atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1135 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1136   atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1137 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1138   ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1139 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1140   atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1141 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1142   atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1143 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1144   atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1145 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1146   ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1147
1148 // atom_and
1149
1150 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1151   (atomic_load_and_32 node:$a, node:$b)>;
1152 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1153   (atomic_load_and_32 node:$a, node:$b)>;
1154 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1155   (atomic_load_and_32 node:$a, node:$b)>;
1156 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1157   (atomic_load_and_64 node:$a, node:$b)>;
1158 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1159   (atomic_load_and_64 node:$a, node:$b)>;
1160 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1161   (atomic_load_and_64 node:$a, node:$b)>;
1162
1163 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1164   atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1165 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1166   atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1167 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1168   atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1169 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1170   ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1171 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1172   atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1173 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1174   atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1175 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1176   atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1177 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1178   ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1179
1180 // atom_or
1181
1182 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1183   (atomic_load_or_32 node:$a, node:$b)>;
1184 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1185   (atomic_load_or_32 node:$a, node:$b)>;
1186 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1187   (atomic_load_or_32 node:$a, node:$b)>;
1188 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1189   (atomic_load_or_64 node:$a, node:$b)>;
1190 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1191   (atomic_load_or_64 node:$a, node:$b)>;
1192 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1193   (atomic_load_or_64 node:$a, node:$b)>;
1194
1195 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1196   atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1197 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1198   atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1199 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1200   ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1201 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1202   atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1203 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1204   atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1205 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1206   atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1207 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1208   ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1209 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1210   atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1211
1212 // atom_xor
1213
1214 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1215   (atomic_load_xor_32 node:$a, node:$b)>;
1216 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1217   (atomic_load_xor_32 node:$a, node:$b)>;
1218 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1219   (atomic_load_xor_32 node:$a, node:$b)>;
1220 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1221   (atomic_load_xor_64 node:$a, node:$b)>;
1222 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1223   (atomic_load_xor_64 node:$a, node:$b)>;
1224 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1225   (atomic_load_xor_64 node:$a, node:$b)>;
1226
1227 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1228   atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1229 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1230   atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1231 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1232   atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1233 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1234   ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1235 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1236   atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1237 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1238   atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1239 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1240   atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1241 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1242   ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1243
1244 // atom_cas
1245
1246 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1247   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1248 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1249   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1250 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1251   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1252 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1253   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1254 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1255   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1256 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1257   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1258
1259 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1260   atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1261 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1262   atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1263 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1264   atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1265 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1266   ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1267 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1268   atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1269 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1270   atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1271 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1272   atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1273 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1274   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1275
1276 // Support for scoped atomic operations.  Matches
1277 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1278 // and converts it into the appropriate instruction.
1279 // NOTE: not all possible combinations are implemented
1280 //  'space' is limited to generic as it's the only one needed to support CUDA.
1281 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1282 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1283                   dag ins, dag Operands>
1284       : NVPTXInst<(outs regclass:$result), ins,
1285                   AsmStr,
1286                   [(set regclass:$result, Operands)]>,
1287         Requires<Preds>;
1288
1289 // Define instruction variants for all addressing modes.
1290 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1291                        NVPTXRegClass regclass, Operand ImmType,
1292                        SDNode Imm, ValueType ImmTy,
1293                        list<Predicate> Preds> {
1294   let AddedComplexity = 1 in {
1295     def : ATOM23_impl<AsmStr, regclass, Preds,
1296                       (ins Int32Regs:$src, regclass:$b),
1297                       (Intr Int32Regs:$src, regclass:$b)>;
1298     def : ATOM23_impl<AsmStr, regclass, Preds,
1299                       (ins Int64Regs:$src, regclass:$b),
1300                       (Intr Int64Regs:$src, regclass:$b)>;
1301   }
1302   // tablegen can't infer argument types from Intrinsic (though it can
1303   // from Instruction) so we have to enforce specific type on
1304   // immediates via explicit cast to ImmTy.
1305   def : ATOM23_impl<AsmStr, regclass, Preds,
1306                     (ins Int32Regs:$src, ImmType:$b),
1307                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1308   def : ATOM23_impl<AsmStr, regclass, Preds,
1309                     (ins Int64Regs:$src, ImmType:$b),
1310                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1311 }
1312
1313 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1314                        NVPTXRegClass regclass, Operand ImmType,
1315                        SDNode Imm, ValueType ImmTy,
1316                        list<Predicate> Preds> {
1317   // Variants for register/immediate permutations of $b and $c
1318   let AddedComplexity = 2 in {
1319     def : ATOM23_impl<AsmStr, regclass, Preds,
1320                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1321                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1322     def : ATOM23_impl<AsmStr, regclass, Preds,
1323                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1324                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1325   }
1326   let AddedComplexity = 1 in {
1327     def : ATOM23_impl<AsmStr, regclass, Preds,
1328                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1329                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1330     def : ATOM23_impl<AsmStr, regclass, Preds,
1331                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1332                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1333     def : ATOM23_impl<AsmStr, regclass, Preds,
1334                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1335                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1336     def : ATOM23_impl<AsmStr, regclass, Preds,
1337                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1338                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1339   }
1340   def : ATOM23_impl<AsmStr, regclass, Preds,
1341                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1342                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1343   def : ATOM23_impl<AsmStr, regclass, Preds,
1344                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1345                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1346 }
1347
1348 // Constructs instrinsic name and instruction asm strings.
1349 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1350                        string ScopeStr, string SpaceStr,
1351                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1352                        ValueType ImmTy, list<Predicate> Preds> {
1353   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1354                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1355                             # "." # OpStr # "." # TypeStr
1356                             # " \t$result, [$src], $b;",
1357                      !cast<Intrinsic>(
1358                             "int_nvvm_atomic_" # OpStr
1359                             # "_" # SpaceStr # "_" # IntTypeStr
1360                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1361                      regclass, ImmType, Imm, ImmTy, Preds>;
1362 }
1363 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1364                        string ScopeStr, string SpaceStr,
1365                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1366                        ValueType ImmTy, list<Predicate> Preds> {
1367   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1368                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1369                             # "." # OpStr # "." # TypeStr
1370                             # " \t$result, [$src], $b, $c;",
1371                      !cast<Intrinsic>(
1372                             "int_nvvm_atomic_" # OpStr
1373                             # "_" # SpaceStr # "_" # IntTypeStr
1374                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1375                      regclass, ImmType, Imm, ImmTy, Preds>;
1376 }
1377
1378 // Constructs variants for different address spaces.
1379 // For now we only need variants for generic space pointers.
1380 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1381                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1382                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1383    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1384                             regclass, ImmType, Imm, ImmTy, Preds>;
1385 }
1386 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1387                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1388                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1389    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1390                             regclass, ImmType, Imm, ImmTy, Preds>;
1391 }
1392
1393 // Constructs variants for different scopes of atomic op.
1394 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1395                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1396                        ValueType ImmTy, list<Predicate> Preds> {
1397    // .gpu scope is default and is currently covered by existing
1398    // atomics w/o explicitly specified scope.
1399    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1400                            regclass, ImmType, Imm, ImmTy,
1401                            !listconcat(Preds,[hasAtomScope])>;
1402    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1403                            regclass, ImmType, Imm, ImmTy,
1404                            !listconcat(Preds,[hasAtomScope])>;
1405 }
1406 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1407            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1408            list<Predicate> Preds> {
1409    // No need to define ".gpu"-scoped atomics.  They do the same thing
1410    // as the regular, non-scoped atomics defined elsewhere.
1411    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1412                            regclass, ImmType, Imm, ImmTy,
1413                            !listconcat(Preds,[hasAtomScope])>;
1414    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1415                            regclass, ImmType, Imm, ImmTy,
1416                            !listconcat(Preds,[hasAtomScope])>;
1417 }
1418
1419 // atom.add
1420 multiclass ATOM2_add_impl<string OpStr> {
1421    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1422    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1423    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1424    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1425                             [hasAtomAddF32]>;
1426    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1427                             [hasAtomAddF64]>;
1428 }
1429
1430 // atom.{and,or,xor}
1431 multiclass ATOM2_bitwise_impl<string OpStr> {
1432    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1433    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1434                             [hasAtomBitwise64]>;
1435 }
1436
1437 // atom.exch
1438 multiclass ATOM2_exch_impl<string OpStr> {
1439    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1440    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1441 }
1442
1443 // atom.{min,max}
1444 multiclass ATOM2_minmax_impl<string OpStr> {
1445    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1446    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1447    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1448                             [hasAtomMinMax64]>;
1449    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1450                             [hasAtomMinMax64]>;
1451 }
1452
1453 // atom.{inc,dec}
1454 multiclass ATOM2_incdec_impl<string OpStr> {
1455    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1456 }
1457
1458 // atom.cas
1459 multiclass ATOM3_cas_impl<string OpStr> {
1460    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1461    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1462 }
1463
1464 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1465 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1466 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1467 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1468 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1469 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1470 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1471 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1472 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1473 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1474
1475 //-----------------------------------
1476 // Support for ldu on sm_20 or later
1477 //-----------------------------------
1478
1479 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1480 // read-only in a kernel.
1481
1482 // Scalar
1483
1484 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1485   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1486                !strconcat("ldu.global.", TyStr),
1487                       []>, Requires<[hasLDU]>;
1488   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1489                !strconcat("ldu.global.", TyStr),
1490                         []>, Requires<[hasLDU]>;
1491  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1492                !strconcat("ldu.global.", TyStr),
1493                       []>, Requires<[hasLDU]>;
1494  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1495                !strconcat("ldu.global.", TyStr),
1496                       []>, Requires<[hasLDU]>;
1497  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1498                !strconcat("ldu.global.", TyStr),
1499                         []>, Requires<[hasLDU]>;
1500 }
1501
1502 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1503 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1504 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1505 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1506 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1507 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1508 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1509 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1510 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1511 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1512
1513 // vector
1514
1515 // Elementized vector ldu
1516 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1517  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1518                      (ins Int32Regs:$src),
1519                      !strconcat("ldu.global.", TyStr), []>;
1520  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1521                      (ins Int64Regs:$src),
1522                      !strconcat("ldu.global.", TyStr), []>;
1523  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1524                      (ins MEMri:$src),
1525                      !strconcat("ldu.global.", TyStr), []>;
1526  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1527                      (ins MEMri64:$src),
1528                      !strconcat("ldu.global.", TyStr), []>;
1529  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1530                      (ins imemAny:$src),
1531                      !strconcat("ldu.global.", TyStr), []>;
1532 }
1533
1534 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1535  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1536                             regclass:$dst4), (ins Int32Regs:$src), 
1537                !strconcat("ldu.global.", TyStr), []>;
1538  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1539                             regclass:$dst4), (ins Int64Regs:$src), 
1540                !strconcat("ldu.global.", TyStr), []>;
1541  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1542                             regclass:$dst4), (ins MEMri:$src), 
1543                !strconcat("ldu.global.", TyStr), []>;
1544  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1545                             regclass:$dst4), (ins MEMri64:$src), 
1546                !strconcat("ldu.global.", TyStr), []>;
1547  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1548                             regclass:$dst4), (ins imemAny:$src), 
1549                !strconcat("ldu.global.", TyStr), []>;
1550 }
1551
1552 defm INT_PTX_LDU_G_v2i8_ELE
1553   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1554 defm INT_PTX_LDU_G_v2i16_ELE
1555   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1556 defm INT_PTX_LDU_G_v2i32_ELE
1557   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1558 defm INT_PTX_LDU_G_v2f16_ELE
1559   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1560 defm INT_PTX_LDU_G_v2f16x2_ELE
1561   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1562 defm INT_PTX_LDU_G_v2f32_ELE
1563   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1564 defm INT_PTX_LDU_G_v2i64_ELE
1565   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1566 defm INT_PTX_LDU_G_v2f64_ELE
1567   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1568 defm INT_PTX_LDU_G_v4i8_ELE
1569   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1570 defm INT_PTX_LDU_G_v4i16_ELE
1571   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1572     Int16Regs>;
1573 defm INT_PTX_LDU_G_v4i32_ELE
1574   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1575     Int32Regs>;
1576 defm INT_PTX_LDU_G_v4f16_ELE
1577   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1578     Float16Regs>;
1579 defm INT_PTX_LDU_G_v4f16x2_ELE
1580   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1581     Float16x2Regs>;
1582 defm INT_PTX_LDU_G_v4f32_ELE
1583   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1584     Float32Regs>;
1585
1586
1587 //-----------------------------------
1588 // Support for ldg on sm_35 or later 
1589 //-----------------------------------
1590
1591 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1592 // non-coherent texture cache, and therefore the values read must be read-only
1593 // during the lifetime of the kernel.
1594
1595 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1596   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1597                !strconcat("ld.global.nc.", TyStr),
1598                       []>, Requires<[hasLDG]>;
1599   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1600                !strconcat("ld.global.nc.", TyStr),
1601                         []>, Requires<[hasLDG]>;
1602  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1603                !strconcat("ld.global.nc.", TyStr),
1604                       []>, Requires<[hasLDG]>;
1605  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1606                !strconcat("ld.global.nc.", TyStr),
1607                       []>, Requires<[hasLDG]>;
1608  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1609                !strconcat("ld.global.nc.", TyStr),
1610                         []>, Requires<[hasLDG]>;
1611 }
1612
1613 defm INT_PTX_LDG_GLOBAL_i8
1614   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1615 defm INT_PTX_LDG_GLOBAL_i16
1616   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1617 defm INT_PTX_LDG_GLOBAL_i32
1618   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1619 defm INT_PTX_LDG_GLOBAL_i64
1620   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1621 defm INT_PTX_LDG_GLOBAL_f16
1622   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1623 defm INT_PTX_LDG_GLOBAL_f16x2
1624   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1625 defm INT_PTX_LDG_GLOBAL_f32
1626   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1627 defm INT_PTX_LDG_GLOBAL_f64
1628   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1629 defm INT_PTX_LDG_GLOBAL_p32
1630   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1631 defm INT_PTX_LDG_GLOBAL_p64
1632   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1633
1634 // vector
1635
1636 // Elementized vector ldg 
1637 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1638  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1639                      (ins Int32Regs:$src),
1640                      !strconcat("ld.global.nc.", TyStr), []>;
1641  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1642                      (ins Int64Regs:$src),
1643                      !strconcat("ld.global.nc.", TyStr), []>;
1644  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1645                      (ins MEMri:$src),
1646                      !strconcat("ld.global.nc.", TyStr), []>;
1647  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1648                      (ins MEMri64:$src),
1649                      !strconcat("ld.global.nc.", TyStr), []>;
1650  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1651                      (ins imemAny:$src),
1652                      !strconcat("ld.global.nc.", TyStr), []>;
1653 }
1654
1655 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1656   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1657                               regclass:$dst4), (ins Int32Regs:$src), 
1658                !strconcat("ld.global.nc.", TyStr), []>;
1659   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1660                                regclass:$dst4), (ins Int64Regs:$src), 
1661                !strconcat("ld.global.nc.", TyStr), []>;
1662   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1663                               regclass:$dst4), (ins MEMri:$src), 
1664                !strconcat("ld.global.nc.", TyStr), []>;
1665   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1666                               regclass:$dst4), (ins MEMri64:$src), 
1667                !strconcat("ld.global.nc.", TyStr), []>;
1668   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1669                              regclass:$dst4), (ins imemAny:$src), 
1670                !strconcat("ld.global.nc.", TyStr), []>;
1671 }
1672
1673 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1674 defm INT_PTX_LDG_G_v2i8_ELE
1675   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1676 defm INT_PTX_LDG_G_v2i16_ELE
1677   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1678 defm INT_PTX_LDG_G_v2i32_ELE
1679   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1680 defm INT_PTX_LDG_G_v2f16_ELE
1681   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1682 defm INT_PTX_LDG_G_v2f16x2_ELE
1683   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1684 defm INT_PTX_LDG_G_v2f32_ELE
1685   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1686 defm INT_PTX_LDG_G_v2i64_ELE
1687   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1688 defm INT_PTX_LDG_G_v2f64_ELE
1689   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1690 defm INT_PTX_LDG_G_v4i8_ELE
1691   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1692 defm INT_PTX_LDG_G_v4i16_ELE
1693   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1694 defm INT_PTX_LDG_G_v4i32_ELE
1695   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1696 defm INT_PTX_LDG_G_v4f16_ELE
1697   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1698 defm INT_PTX_LDG_G_v4f16x2_ELE
1699   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1700 defm INT_PTX_LDG_G_v4f32_ELE
1701   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1702
1703
1704 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1705    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1706           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1707       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1708    Requires<[hasGenericLdSt]>;
1709    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1710           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1711       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1712    Requires<[hasGenericLdSt]>;
1713
1714 // @TODO: Are these actually needed?  I believe global addresses will be copied
1715 // to register values anyway.
1716    /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1717           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1718       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1719       Requires<[hasGenericLdSt]>;
1720    def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1721           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1722       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1723       Requires<[hasGenericLdSt]>;*/
1724
1725    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1726           "mov.u32 \t$result, $src;",
1727       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1728    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1729           "mov.u64 \t$result, $src;",
1730       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1731
1732 // @TODO: Are these actually needed?  I believe global addresses will be copied
1733 // to register values anyway.
1734    /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1735           "mov.u32 \t$result, $src;",
1736       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1737    def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1738           "mov.u64 \t$result, $src;",
1739       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1740 }
1741
1742 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1743    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1744           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1745       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1746    Requires<[hasGenericLdSt]>;
1747    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1748           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1749       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1750    Requires<[hasGenericLdSt]>;
1751    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1752           "mov.u32 \t$result, $src;",
1753       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1754    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1755           "mov.u64 \t$result, $src;",
1756       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1757 }
1758
1759 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1760 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1761 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1762 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1763
1764 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1765 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1766 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1767 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1768
1769
1770 // nvvm.ptr.gen.to.param
1771 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1772   (ins Int32Regs:$src),
1773                         "mov.u32 \t$result, $src;",
1774                               [(set Int32Regs:$result,
1775                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1776 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1777   (ins Int64Regs:$src),
1778                         "mov.u64 \t$result, $src;",
1779                               [(set Int64Regs:$result,
1780                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1781
1782
1783 // nvvm.move intrinsicc
1784 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1785                              "mov.b16 \t$r, $s;",
1786                              [(set Int16Regs:$r,
1787                                (int_nvvm_move_i16 Int16Regs:$s))]>;
1788 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1789                              "mov.b32 \t$r, $s;",
1790                              [(set Int32Regs:$r,
1791                                (int_nvvm_move_i32 Int32Regs:$s))]>;
1792 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1793                              "mov.b64 \t$r, $s;",
1794                              [(set Int64Regs:$r,
1795                                (int_nvvm_move_i64 Int64Regs:$s))]>;
1796 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1797                              "mov.f32 \t$r, $s;",
1798                              [(set Float32Regs:$r,
1799                                (int_nvvm_move_float Float32Regs:$s))]>;
1800 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1801                              "mov.f64 \t$r, $s;",
1802                              [(set Float64Regs:$r,
1803                                (int_nvvm_move_double Float64Regs:$s))]>;
1804 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1805                              "mov.u32 \t$r, $s;",
1806                              [(set Int32Regs:$r,
1807                                (int_nvvm_move_ptr Int32Regs:$s))]>;
1808 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1809                              "mov.u64 \t$r, $s;",
1810                              [(set Int64Regs:$r,
1811                                (int_nvvm_move_ptr Int64Regs:$s))]>;
1812
1813 // @TODO: Are these actually needed, or will we always just see symbols
1814 // copied to registers first?
1815 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1816                              "mov.u32 \t$r, $s;",
1817                              [(set Int32Regs:$r,
1818                              (int_nvvm_move_ptr texternalsym:$s))]>;
1819 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1820                              "mov.u64 \t$r, $s;",
1821                              [(set Int64Regs:$r,
1822                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
1823
1824
1825 // MoveParam        %r1, param
1826 // ptr_local_to_gen %r2, %r1
1827 // ptr_gen_to_local %r3, %r2
1828 // ->
1829 // mov %r1, param
1830
1831 // @TODO: Revisit this.  There is a type
1832 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1833 // instructions are not currently defined. However, we can use the ptr
1834 // variants and the asm printer will do the right thing.
1835 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1836                 (MoveParam texternalsym:$src)))),
1837                (nvvm_move_ptr64  texternalsym:$src)>;
1838 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1839                 (MoveParam texternalsym:$src)))),
1840                (nvvm_move_ptr32  texternalsym:$src)>;
1841
1842 def texsurf_handles
1843   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1844               "mov.u64 \t$result, $src;", []>;
1845
1846 //-----------------------------------
1847 // Compiler Error Warn
1848 // - Just ignore them in codegen
1849 //-----------------------------------
1850
1851 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1852                 "// llvm.nvvm.compiler.warn()",
1853                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1854 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1855                 "// llvm.nvvm.compiler.warn()",
1856                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1857 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1858                 "// llvm.nvvm.compiler.error()",
1859                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1860 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1861                 "// llvm.nvvm.compiler.error()",
1862                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1863
1864
1865 // isspacep
1866
1867 def ISSPACEP_CONST_32
1868   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1869               "isspacep.const \t$d, $a;",
1870               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1871     Requires<[hasPTX31]>;
1872 def ISSPACEP_CONST_64
1873   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1874               "isspacep.const \t$d, $a;",
1875               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1876     Requires<[hasPTX31]>;
1877 def ISSPACEP_GLOBAL_32
1878   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1879               "isspacep.global \t$d, $a;",
1880               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1881 def ISSPACEP_GLOBAL_64
1882   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1883               "isspacep.global \t$d, $a;",
1884               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1885 def ISSPACEP_LOCAL_32
1886   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1887               "isspacep.local \t$d, $a;",
1888               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1889 def ISSPACEP_LOCAL_64
1890   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1891               "isspacep.local \t$d, $a;",
1892               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1893 def ISSPACEP_SHARED_32
1894   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1895               "isspacep.shared \t$d, $a;",
1896               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1897 def ISSPACEP_SHARED_64
1898   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1899               "isspacep.shared \t$d, $a;",
1900               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1901
1902
1903 // Special register reads
1904 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1905                             (ins SpecialRegs:$r),
1906                             "mov.b32 \t$d, $r;", []>;
1907
1908 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1909 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1910 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1911 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1912 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1913 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1914 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1915 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1916 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1917 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1918 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1919 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
1920 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
1921 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
1922 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
1923 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
1924 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
1925 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
1926 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
1927 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
1928 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
1929 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
1930 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
1931 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
1932 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
1933 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
1934 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
1935 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
1936 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
1937 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
1938 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
1939 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
1940
1941
1942 // rotate builtin support
1943
1944 def ROTATE_B32_HW_IMM
1945   : NVPTXInst<(outs Int32Regs:$dst),
1946               (ins  Int32Regs:$src, i32imm:$amt),
1947               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1948               [(set Int32Regs:$dst,
1949                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
1950               Requires<[hasHWROT32]> ;
1951
1952 def ROTATE_B32_HW_REG
1953   : NVPTXInst<(outs Int32Regs:$dst),
1954               (ins  Int32Regs:$src, Int32Regs:$amt),
1955               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1956               [(set Int32Regs:$dst,
1957                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
1958               Requires<[hasHWROT32]> ;
1959
1960 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
1961           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1962       Requires<[noHWROT32]> ;
1963
1964 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
1965           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
1966       Requires<[noHWROT32]> ;
1967
1968 let hasSideEffects = 0 in {
1969   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1970     !strconcat("{{\n\t",
1971                ".reg .b32 %dummy;\n\t",
1972                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
1973                "}}"),
1974           []> ;
1975
1976   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1977     !strconcat("{{\n\t",
1978                ".reg .b32 %dummy;\n\t",
1979                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
1980                "}}"),
1981           []> ;
1982 }
1983
1984 let hasSideEffects = 0 in {
1985   def PACK_TWO_INT32
1986     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
1987                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
1988 }
1989
1990 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
1991           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
1992                           (GET_LO_INT64 Int64Regs:$src))> ;
1993
1994 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
1995 // no side effects.
1996 let hasSideEffects = 0 in {
1997   def SHF_L_WRAP_B32_IMM
1998     : NVPTXInst<(outs Int32Regs:$dst),
1999                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2000                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2001       Requires<[hasHWROT32]>;
2002
2003   def SHF_L_WRAP_B32_REG
2004     : NVPTXInst<(outs Int32Regs:$dst),
2005                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2006                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2007       Requires<[hasHWROT32]>;
2008
2009   def SHF_R_WRAP_B32_IMM
2010     : NVPTXInst<(outs Int32Regs:$dst),
2011                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2012                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2013       Requires<[hasHWROT32]>;
2014
2015   def SHF_R_WRAP_B32_REG
2016     : NVPTXInst<(outs Int32Regs:$dst),
2017                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2018                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2019       Requires<[hasHWROT32]>;
2020 }
2021
2022 // HW version of rotate 64
2023 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2024           (PACK_TWO_INT32
2025             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2026                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2027             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2028                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2029       Requires<[hasHWROT32]>;
2030
2031 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2032           (PACK_TWO_INT32
2033             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2034                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2035             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2036                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2037       Requires<[hasHWROT32]>;
2038
2039
2040 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2041           (PACK_TWO_INT32
2042             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2043                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2044             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2045                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2046       Requires<[hasHWROT32]>;
2047
2048 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2049           (PACK_TWO_INT32
2050             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2051                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2052             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2053                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2054       Requires<[hasHWROT32]>;
2055
2056 // SW version of rotate 64
2057 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2058           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2059       Requires<[noHWROT32]>;
2060 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2061           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2062       Requires<[noHWROT32]>;
2063 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2064           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2065       Requires<[noHWROT32]>;
2066 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2067           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2068       Requires<[noHWROT32]>;
2069
2070
2071 //-----------------------------------
2072 // Texture Intrinsics
2073 //-----------------------------------
2074
2075 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2076 // also defined in NVPTXReplaceImageHandles.cpp
2077
2078 // texmode_independent
2079 let IsTex = 1, IsTexModeUnified = 0 in {
2080 // Texture fetch instructions using handles
2081 def TEX_1D_F32_S32
2082   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2083                     Float32Regs:$b, Float32Regs:$a),
2084               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2085               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2086               []>;
2087 def TEX_1D_F32_F32
2088   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2089                     Float32Regs:$b, Float32Regs:$a),
2090               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2091               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2092               []>;
2093 def TEX_1D_F32_F32_LEVEL
2094   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2095                     Float32Regs:$b, Float32Regs:$a),
2096               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2097               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2098               "[$t, $s, \\{$x\\}], $lod;",
2099               []>;
2100 def TEX_1D_F32_F32_GRAD
2101   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2102                     Float32Regs:$b, Float32Regs:$a),
2103               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2104                    Float32Regs:$gradx, Float32Regs:$grady),
2105               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2106               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2107               []>;
2108 def TEX_1D_S32_S32
2109   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2110                     Int32Regs:$b, Int32Regs:$a),
2111               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2112               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2113               []>;
2114 def TEX_1D_S32_F32
2115   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2116                     Int32Regs:$b, Int32Regs:$a),
2117               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2118               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2119               []>;
2120 def TEX_1D_S32_F32_LEVEL
2121   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2122                     Int32Regs:$b, Int32Regs:$a),
2123               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2124                    Float32Regs:$lod),
2125               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2126               "[$t, $s, \\{$x\\}], $lod;",
2127               []>;
2128 def TEX_1D_S32_F32_GRAD
2129   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2130                     Int32Regs:$b, Int32Regs:$a),
2131               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2132                    Float32Regs:$gradx, Float32Regs:$grady),
2133               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2134               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2135               []>;
2136 def TEX_1D_U32_S32
2137   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2138                     Int32Regs:$b, Int32Regs:$a),
2139               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2140               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2141               []>;
2142 def TEX_1D_U32_F32
2143   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2144                     Int32Regs:$b, Int32Regs:$a),
2145               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2146               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2147               []>;
2148 def TEX_1D_U32_F32_LEVEL
2149   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2150                     Int32Regs:$b, Int32Regs:$a),
2151               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2152                    Float32Regs:$lod),
2153               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2154               "[$t, $s, \\{$x\\}], $lod;",
2155               []>;
2156 def TEX_1D_U32_F32_GRAD
2157   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2158                     Int32Regs:$b, Int32Regs:$a),
2159               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2160                    Float32Regs:$gradx, Float32Regs:$grady),
2161               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2162               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2163               []>;
2164
2165 def TEX_1D_ARRAY_F32_S32
2166   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2167                     Float32Regs:$b, Float32Regs:$a),
2168               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2169               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2170               "[$t, $s, \\{$l, $x\\}];",
2171               []>;
2172 def TEX_1D_ARRAY_F32_F32
2173   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2174                     Float32Regs:$b, Float32Regs:$a),
2175               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2176               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2177               "[$t, $s, \\{$l, $x\\}];",
2178               []>;
2179 def TEX_1D_ARRAY_F32_F32_LEVEL
2180   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2181                     Float32Regs:$b, Float32Regs:$a),
2182               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2183                    Float32Regs:$lod),
2184               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2185               "[$t, $s, \\{$l, $x\\}], $lod;",
2186               []>;
2187 def TEX_1D_ARRAY_F32_F32_GRAD
2188   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2189                     Float32Regs:$b, Float32Regs:$a),
2190               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2191                    Float32Regs:$gradx, Float32Regs:$grady),
2192               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2193               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2194               []>;
2195 def TEX_1D_ARRAY_S32_S32
2196   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2197                     Int32Regs:$b, Int32Regs:$a),
2198               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2199               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2200               "[$t, $s, \\{$l, $x\\}];",
2201               []>;
2202 def TEX_1D_ARRAY_S32_F32
2203   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2204                     Int32Regs:$b, Int32Regs:$a),
2205               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2206               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2207               "[$t, $s, \\{$l, $x\\}];",
2208               []>;
2209 def TEX_1D_ARRAY_S32_F32_LEVEL
2210   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2211                     Int32Regs:$b, Int32Regs:$a),
2212               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2213                    Float32Regs:$lod),
2214               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2215               "[$t, $s, \\{$l, $x\\}], $lod;",
2216               []>;
2217 def TEX_1D_ARRAY_S32_F32_GRAD
2218   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2219                     Int32Regs:$b, Int32Regs:$a),
2220               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2221                    Float32Regs:$gradx, Float32Regs:$grady),
2222               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2223               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2224               []>;
2225 def TEX_1D_ARRAY_U32_S32
2226   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2227                     Int32Regs:$b, Int32Regs:$a),
2228               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2229               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2230               "[$t, $s, \\{$l, $x\\}];",
2231               []>;
2232 def TEX_1D_ARRAY_U32_F32
2233   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2234                     Int32Regs:$b, Int32Regs:$a),
2235               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2236               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2237               "[$t, $s, \\{$l, $x\\}];",
2238               []>;
2239 def TEX_1D_ARRAY_U32_F32_LEVEL
2240   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2241                     Int32Regs:$b, Int32Regs:$a),
2242               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2243                    Float32Regs:$lod),
2244               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2245               "[$t, $s, \\{$l, $x\\}], $lod;",
2246               []>;
2247 def TEX_1D_ARRAY_U32_F32_GRAD
2248   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2249                     Int32Regs:$b, Int32Regs:$a),
2250               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2251                    Float32Regs:$gradx, Float32Regs:$grady),
2252               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2253               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2254               []>;
2255
2256 def TEX_2D_F32_S32
2257   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2258                     Float32Regs:$b, Float32Regs:$a),
2259               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2260               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2261               "[$t, $s, \\{$x, $y\\}];",
2262               []>;
2263 def TEX_2D_F32_F32
2264   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2265                     Float32Regs:$b, Float32Regs:$a),
2266               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2267               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2268               "[$t, $s, \\{$x, $y\\}];",
2269               []>;
2270 def TEX_2D_F32_F32_LEVEL
2271   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2272                     Float32Regs:$b, Float32Regs:$a),
2273               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2274                    Float32Regs:$lod),
2275               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2276               "[$t, $s, \\{$x, $y\\}], $lod;",
2277               []>;
2278 def TEX_2D_F32_F32_GRAD
2279   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2280                     Float32Regs:$b, Float32Regs:$a),
2281               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2282                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2283                    Float32Regs:$grady0, Float32Regs:$grady1),
2284               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2285               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2286               "\\{$grady0, $grady1\\};",
2287               []>;
2288 def TEX_2D_S32_S32
2289   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2290                     Int32Regs:$b, Int32Regs:$a),
2291               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2292               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2293               "[$t, $s, \\{$x, $y\\}];",
2294               []>;
2295 def TEX_2D_S32_F32
2296   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2297                     Int32Regs:$b, Int32Regs:$a),
2298               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2299               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2300               "[$t, $s, \\{$x, $y\\}];",
2301               []>;
2302 def TEX_2D_S32_F32_LEVEL
2303   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2304                     Int32Regs:$b, Int32Regs:$a),
2305               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2306                    Float32Regs:$lod),
2307               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2308               "[$t, $s, \\{$x, $y\\}], $lod;",
2309               []>;
2310 def TEX_2D_S32_F32_GRAD
2311   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2312                     Int32Regs:$b, Int32Regs:$a),
2313               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2314                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2315                    Float32Regs:$grady0, Float32Regs:$grady1),
2316               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2317               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2318               "\\{$grady0, $grady1\\};",
2319               []>;
2320 def TEX_2D_U32_S32
2321   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2322                     Int32Regs:$b, Int32Regs:$a),
2323               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2324               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2325               "[$t, $s, \\{$x, $y\\}];",
2326               []>;
2327 def TEX_2D_U32_F32
2328   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2329                     Int32Regs:$b, Int32Regs:$a),
2330               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2331               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2332               "[$t, $s, \\{$x, $y\\}];",
2333               []>;
2334 def TEX_2D_U32_F32_LEVEL
2335   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2336                     Int32Regs:$b, Int32Regs:$a),
2337               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2338                    Float32Regs:$lod),
2339               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2340               "[$t, $s, \\{$x, $y\\}], $lod;",
2341               []>;
2342 def TEX_2D_U32_F32_GRAD
2343   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2344                     Int32Regs:$b, Int32Regs:$a),
2345               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2346                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2347                    Float32Regs:$grady0, Float32Regs:$grady1),
2348               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2349               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2350               "\\{$grady0, $grady1\\};",
2351               []>;
2352
2353 def TEX_2D_ARRAY_F32_S32
2354   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2355                     Float32Regs:$b, Float32Regs:$a),
2356               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2357                    Int32Regs:$y),
2358               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2359               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2360               []>;
2361 def TEX_2D_ARRAY_F32_F32
2362   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2363                     Float32Regs:$b, Float32Regs:$a),
2364               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2365                    Float32Regs:$y),
2366               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2367               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2368               []>;
2369 def TEX_2D_ARRAY_F32_F32_LEVEL
2370   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2371                     Float32Regs:$b, Float32Regs:$a),
2372               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2373                    Float32Regs:$y, Float32Regs:$lod),
2374               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2375               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2376               []>;
2377 def TEX_2D_ARRAY_F32_F32_GRAD
2378   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2379                     Float32Regs:$b, Float32Regs:$a),
2380               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2381                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2382                    Float32Regs:$grady0, Float32Regs:$grady1),
2383               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2384               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2385               "\\{$grady0, $grady1\\};",
2386               []>;
2387 def TEX_2D_ARRAY_S32_S32
2388   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2389                     Int32Regs:$b, Int32Regs:$a),
2390               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2391                    Int32Regs:$y),
2392               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2393               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2394               []>;
2395 def TEX_2D_ARRAY_S32_F32
2396   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2397                     Int32Regs:$b, Int32Regs:$a),
2398               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2399                    Float32Regs:$y),
2400               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2401               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2402               []>;
2403 def TEX_2D_ARRAY_S32_F32_LEVEL
2404   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2405                     Int32Regs:$b, Int32Regs:$a),
2406               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2407                    Float32Regs:$y, Float32Regs:$lod),
2408               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2409               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2410               []>;
2411 def TEX_2D_ARRAY_S32_F32_GRAD
2412   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2413                     Int32Regs:$b, Int32Regs:$a),
2414               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2415                    Float32Regs:$y,
2416                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2417                    Float32Regs:$grady0, Float32Regs:$grady1),
2418               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2419               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2420               "\\{$grady0, $grady1\\};",
2421               []>;
2422 def TEX_2D_ARRAY_U32_S32
2423   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2424                     Int32Regs:$b, Int32Regs:$a),
2425               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2426                    Int32Regs:$y),
2427               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2428               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2429               []>;
2430 def TEX_2D_ARRAY_U32_F32
2431   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2432                     Int32Regs:$b, Int32Regs:$a),
2433               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2434                    Float32Regs:$y),
2435               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2436               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2437               []>;
2438 def TEX_2D_ARRAY_U32_F32_LEVEL
2439   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2440                     Int32Regs:$b, Int32Regs:$a),
2441               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2442                    Float32Regs:$y, Float32Regs:$lod),
2443               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2444               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2445               []>;
2446 def TEX_2D_ARRAY_U32_F32_GRAD
2447   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2448                     Int32Regs:$b, Int32Regs:$a),
2449               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2450                    Float32Regs:$y,
2451                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2452                    Float32Regs:$grady0, Float32Regs:$grady1),
2453               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2454               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2455               "\\{$grady0, $grady1\\};",
2456               []>;
2457
2458 def TEX_3D_F32_S32
2459   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2460                     Float32Regs:$b, Float32Regs:$a),
2461               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2462                    Int32Regs:$z),
2463               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2464               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2465               []>;
2466 def TEX_3D_F32_F32
2467   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2468                     Float32Regs:$b, Float32Regs:$a),
2469               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2470                    Float32Regs:$z),
2471               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2472               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2473               []>;
2474 def TEX_3D_F32_F32_LEVEL
2475   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476                     Float32Regs:$b, Float32Regs:$a),
2477               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2478                    Float32Regs:$z, Float32Regs:$lod),
2479               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2480               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2481               []>;
2482 def TEX_3D_F32_F32_GRAD
2483   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484                     Float32Regs:$b, Float32Regs:$a),
2485               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2486                    Float32Regs:$z,
2487                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2488                    Float32Regs:$gradx2, Float32Regs:$grady0,
2489                    Float32Regs:$grady1, Float32Regs:$grady2),
2490               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2491               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2492               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2493               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2494               []>;
2495 def TEX_3D_S32_S32
2496   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2497                     Int32Regs:$b, Int32Regs:$a),
2498               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2499                    Int32Regs:$z),
2500               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2501               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2502               []>;
2503 def TEX_3D_S32_F32
2504   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2505                     Int32Regs:$b, Int32Regs:$a),
2506               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2507                    Float32Regs:$z),
2508               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2509               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2510               []>;
2511 def TEX_3D_S32_F32_LEVEL
2512   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2513                     Int32Regs:$b, Int32Regs:$a),
2514               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2515                    Float32Regs:$z, Float32Regs:$lod),
2516               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2517               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2518               []>;
2519 def TEX_3D_S32_F32_GRAD
2520   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2521                     Int32Regs:$b, Int32Regs:$a),
2522               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2523                    Float32Regs:$z,
2524                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2525                    Float32Regs:$gradx2, Float32Regs:$grady0,
2526                    Float32Regs:$grady1, Float32Regs:$grady2),
2527               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2528               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2529               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2530               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2531               []>;
2532 def TEX_3D_U32_S32
2533   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2534                     Int32Regs:$b, Int32Regs:$a),
2535               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2536                    Int32Regs:$z),
2537               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2538               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2539               []>;
2540 def TEX_3D_U32_F32
2541   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2542                     Int32Regs:$b, Int32Regs:$a),
2543               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2544                    Float32Regs:$z),
2545               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2546               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2547               []>;
2548 def TEX_3D_U32_F32_LEVEL
2549   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2550                     Int32Regs:$b, Int32Regs:$a),
2551               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2552                    Float32Regs:$z, Float32Regs:$lod),
2553               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2554               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2555               []>;
2556 def TEX_3D_U32_F32_GRAD
2557   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2558                     Int32Regs:$b, Int32Regs:$a),
2559               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2560                    Float32Regs:$z,
2561                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2562                    Float32Regs:$gradx2, Float32Regs:$grady0,
2563                    Float32Regs:$grady1, Float32Regs:$grady2),
2564               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2565               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2566               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2567               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2568               []>;
2569
2570 def TEX_CUBE_F32_F32
2571   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2572                     Float32Regs:$b, Float32Regs:$a),
2573               (ins Int64Regs:$t, Int64Regs:$s,
2574                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2575               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2576               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2577               []>;
2578 def TEX_CUBE_F32_F32_LEVEL
2579   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2580                     Float32Regs:$b, Float32Regs:$a),
2581               (ins Int64Regs:$t, Int64Regs:$s,
2582                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2583                    Float32Regs:$lod),
2584               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2585               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2586               []>;
2587 def TEX_CUBE_S32_F32
2588   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2589                     Int32Regs:$b, Int32Regs:$a),
2590               (ins Int64Regs:$t, Int64Regs:$s,
2591                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2592               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2593               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2594               []>;
2595 def TEX_CUBE_S32_F32_LEVEL
2596   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2597                     Int32Regs:$b, Int32Regs:$a),
2598               (ins Int64Regs:$t, Int64Regs:$s,
2599                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2600                    Float32Regs:$lod),
2601               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2602               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2603               []>;
2604 def TEX_CUBE_U32_F32
2605   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606                     Int32Regs:$b, Int32Regs:$a),
2607               (ins Int64Regs:$t, Int64Regs:$s,
2608                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2609               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2610               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2611               []>;
2612 def TEX_CUBE_U32_F32_LEVEL
2613   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614                     Int32Regs:$b, Int32Regs:$a),
2615               (ins Int64Regs:$t, Int64Regs:$s,
2616                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2617                    Float32Regs:$lod),
2618               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2619               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2620               []>;
2621
2622 def TEX_CUBE_ARRAY_F32_F32
2623   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2624                     Float32Regs:$b, Float32Regs:$a),
2625               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2626                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2627               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2628               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2629               []>;
2630 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2631   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2632                     Float32Regs:$b, Float32Regs:$a),
2633               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2634                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2635                    Float32Regs:$lod),
2636               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2637               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2638               []>;
2639 def TEX_CUBE_ARRAY_S32_F32
2640   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2641                     Int32Regs:$b, Int32Regs:$a),
2642               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2643                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2644               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2645               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2646               []>;
2647 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2648   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2649                     Int32Regs:$b, Int32Regs:$a),
2650               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2651                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2652                    Float32Regs:$lod),
2653               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2654               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2655               []>;
2656 def TEX_CUBE_ARRAY_U32_F32
2657   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2658                     Int32Regs:$b, Int32Regs:$a),
2659               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2660                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2661               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2662               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2663               []>;
2664 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2665   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2666                     Int32Regs:$b, Int32Regs:$a),
2667               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2668                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2669                    Float32Regs:$lod),
2670               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2671               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2672               []>;
2673
2674 def TLD4_R_2D_F32_F32
2675   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2676                     Float32Regs:$v2, Float32Regs:$v3),
2677               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2678               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2679               "[$t, $s, \\{$x, $y\\}];",
2680               []>;
2681 def TLD4_G_2D_F32_F32
2682   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2683                     Float32Regs:$v2, Float32Regs:$v3),
2684               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2685               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2686               "[$t, $s, \\{$x, $y\\}];",
2687               []>;
2688 def TLD4_B_2D_F32_F32
2689   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2690                     Float32Regs:$v2, Float32Regs:$v3),
2691               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2692               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2693               "[$t, $s, \\{$x, $y\\}];",
2694               []>;
2695 def TLD4_A_2D_F32_F32
2696   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2697                     Float32Regs:$v2, Float32Regs:$v3),
2698               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2699               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2700               "[$t, $s, \\{$x, $y\\}];",
2701               []>;
2702 def TLD4_R_2D_S32_F32
2703   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2704                     Int32Regs:$v2, Int32Regs:$v3),
2705               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2706               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2707               "[$t, $s, \\{$x, $y\\}];",
2708               []>;
2709 def TLD4_G_2D_S32_F32
2710   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2711                     Int32Regs:$v2, Int32Regs:$v3),
2712               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2713               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2714               "[$t, $s, \\{$x, $y\\}];",
2715               []>;
2716 def TLD4_B_2D_S32_F32
2717   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2718                     Int32Regs:$v2, Int32Regs:$v3),
2719               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2720               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2721               "[$t, $s, \\{$x, $y\\}];",
2722               []>;
2723 def TLD4_A_2D_S32_F32
2724   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2725                     Int32Regs:$v2, Int32Regs:$v3),
2726               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2727               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2728               "[$t, $s, \\{$x, $y\\}];",
2729               []>;
2730 def TLD4_R_2D_U32_F32
2731   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2732                     Int32Regs:$v2, Int32Regs:$v3),
2733               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2734               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2735               "[$t, $s, \\{$x, $y\\}];",
2736               []>;
2737 def TLD4_G_2D_U32_F32
2738   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2739                     Int32Regs:$v2, Int32Regs:$v3),
2740               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2741               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2742               "[$t, $s, \\{$x, $y\\}];",
2743               []>;
2744 def TLD4_B_2D_U32_F32
2745   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2746                     Int32Regs:$v2, Int32Regs:$v3),
2747               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2748               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2749               "[$t, $s, \\{$x, $y\\}];",
2750               []>;
2751 def TLD4_A_2D_U32_F32
2752   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2753                     Int32Regs:$v2, Int32Regs:$v3),
2754               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2755               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2756               "[$t, $s, \\{$x, $y\\}];",
2757               []>;
2758 }
2759
2760
2761 // texmode_unified
2762 let IsTex = 1, IsTexModeUnified = 1 in {
2763 // Texture fetch instructions using handles
2764 def TEX_UNIFIED_1D_F32_S32
2765   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2766                     Float32Regs:$b, Float32Regs:$a),
2767               (ins Int64Regs:$t, Int32Regs:$x),
2768               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2769               []>;
2770 def TEX_UNIFIED_1D_F32_F32
2771   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2772                     Float32Regs:$b, Float32Regs:$a),
2773               (ins Int64Regs:$t, Float32Regs:$x),
2774               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2775               []>;
2776 def TEX_UNIFIED_1D_F32_F32_LEVEL
2777   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2778                     Float32Regs:$b, Float32Regs:$a),
2779               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2780               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2781               "[$t, \\{$x\\}], $lod;",
2782               []>;
2783 def TEX_UNIFIED_1D_F32_F32_GRAD
2784   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2785                     Float32Regs:$b, Float32Regs:$a),
2786               (ins Int64Regs:$t, Float32Regs:$x,
2787                    Float32Regs:$gradx, Float32Regs:$grady),
2788               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2789               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2790               []>;
2791 def TEX_UNIFIED_1D_S32_S32
2792   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2793                     Int32Regs:$b, Int32Regs:$a),
2794               (ins Int64Regs:$t, Int32Regs:$x),
2795               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2796               []>;
2797 def TEX_UNIFIED_1D_S32_F32
2798   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2799                     Int32Regs:$b, Int32Regs:$a),
2800               (ins Int64Regs:$t, Float32Regs:$x),
2801               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2802               []>;
2803 def TEX_UNIFIED_1D_S32_F32_LEVEL
2804   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2805                     Int32Regs:$b, Int32Regs:$a),
2806               (ins Int64Regs:$t, Float32Regs:$x,
2807                    Float32Regs:$lod),
2808               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2809               "[$t, \\{$x\\}], $lod;",
2810               []>;
2811 def TEX_UNIFIED_1D_S32_F32_GRAD
2812   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2813                     Int32Regs:$b, Int32Regs:$a),
2814               (ins Int64Regs:$t, Float32Regs:$x,
2815                    Float32Regs:$gradx, Float32Regs:$grady),
2816               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2817               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2818               []>;
2819 def TEX_UNIFIED_1D_U32_S32
2820   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2821                     Int32Regs:$b, Int32Regs:$a),
2822               (ins Int64Regs:$t, Int32Regs:$x),
2823               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2824               []>;
2825 def TEX_UNIFIED_1D_U32_F32
2826   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2827                     Int32Regs:$b, Int32Regs:$a),
2828               (ins Int64Regs:$t, Float32Regs:$x),
2829               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2830               []>;
2831 def TEX_UNIFIED_1D_U32_F32_LEVEL
2832   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2833                     Int32Regs:$b, Int32Regs:$a),
2834               (ins Int64Regs:$t, Float32Regs:$x,
2835                    Float32Regs:$lod),
2836               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2837               "[$t, \\{$x\\}], $lod;",
2838               []>;
2839 def TEX_UNIFIED_1D_U32_F32_GRAD
2840   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2841                     Int32Regs:$b, Int32Regs:$a),
2842               (ins Int64Regs:$t, Float32Regs:$x,
2843                    Float32Regs:$gradx, Float32Regs:$grady),
2844               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2845               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2846               []>;
2847
2848 def TEX_UNIFIED_1D_ARRAY_F32_S32
2849   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2850                     Float32Regs:$b, Float32Regs:$a),
2851               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2852               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2853               "[$t, \\{$l, $x\\}];",
2854               []>;
2855 def TEX_UNIFIED_1D_ARRAY_F32_F32
2856   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2857                     Float32Regs:$b, Float32Regs:$a),
2858               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2859               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2860               "[$t, \\{$l, $x\\}];",
2861               []>;
2862 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2863   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2864                     Float32Regs:$b, Float32Regs:$a),
2865               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2866                    Float32Regs:$lod),
2867               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2868               "[$t, \\{$l, $x\\}], $lod;",
2869               []>;
2870 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2871   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2872                     Float32Regs:$b, Float32Regs:$a),
2873               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2874                    Float32Regs:$gradx, Float32Regs:$grady),
2875               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2876               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2877               []>;
2878 def TEX_UNIFIED_1D_ARRAY_S32_S32
2879   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2880                     Int32Regs:$b, Int32Regs:$a),
2881               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2882               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2883               "[$t, \\{$l, $x\\}];",
2884               []>;
2885 def TEX_UNIFIED_1D_ARRAY_S32_F32
2886   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2887                     Int32Regs:$b, Int32Regs:$a),
2888               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2889               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2890               "[$t, \\{$l, $x\\}];",
2891               []>;
2892 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2893   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2894                     Int32Regs:$b, Int32Regs:$a),
2895               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2896                    Float32Regs:$lod),
2897               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2898               "[$t, \\{$l, $x\\}], $lod;",
2899               []>;
2900 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2901   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2902                     Int32Regs:$b, Int32Regs:$a),
2903               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2904                    Float32Regs:$gradx, Float32Regs:$grady),
2905               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2906               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2907               []>;
2908 def TEX_UNIFIED_1D_ARRAY_U32_S32
2909   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2910                     Int32Regs:$b, Int32Regs:$a),
2911               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2912               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2913               "[$t, \\{$l, $x\\}];",
2914               []>;
2915 def TEX_UNIFIED_1D_ARRAY_U32_F32
2916   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2917                     Int32Regs:$b, Int32Regs:$a),
2918               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2919               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2920               "[$t, \\{$l, $x\\}];",
2921               []>;
2922 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
2923   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2924                     Int32Regs:$b, Int32Regs:$a),
2925               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2926                    Float32Regs:$lod),
2927               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2928               "[$t, \\{$l, $x\\}], $lod;",
2929               []>;
2930 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
2931   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2932                     Int32Regs:$b, Int32Regs:$a),
2933               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2934                    Float32Regs:$gradx, Float32Regs:$grady),
2935               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2936               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2937               []>;
2938
2939 def TEX_UNIFIED_2D_F32_S32
2940   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2941                     Float32Regs:$b, Float32Regs:$a),
2942               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2943               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2944               "[$t, \\{$x, $y\\}];",
2945               []>;
2946 def TEX_UNIFIED_2D_F32_F32
2947   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2948                     Float32Regs:$b, Float32Regs:$a),
2949               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2950               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2951               "[$t, \\{$x, $y\\}];",
2952               []>;
2953 def TEX_UNIFIED_2D_F32_F32_LEVEL
2954   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2955                     Float32Regs:$b, Float32Regs:$a),
2956               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2957                    Float32Regs:$lod),
2958               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2959               "[$t, \\{$x, $y\\}], $lod;",
2960               []>;
2961 def TEX_UNIFIED_2D_F32_F32_GRAD
2962   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2963                     Float32Regs:$b, Float32Regs:$a),
2964               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2965                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2966                    Float32Regs:$grady0, Float32Regs:$grady1),
2967               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2968               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2969               "\\{$grady0, $grady1\\};",
2970               []>;
2971 def TEX_UNIFIED_2D_S32_S32
2972   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2973                     Int32Regs:$b, Int32Regs:$a),
2974               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2975               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2976               "[$t, \\{$x, $y\\}];",
2977               []>;
2978 def TEX_UNIFIED_2D_S32_F32
2979   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2980                     Int32Regs:$b, Int32Regs:$a),
2981               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2982               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2983               "[$t, \\{$x, $y\\}];",
2984               []>;
2985 def TEX_UNIFIED_2D_S32_F32_LEVEL
2986   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2987                     Int32Regs:$b, Int32Regs:$a),
2988               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2989                    Float32Regs:$lod),
2990               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2991               "[$t, \\{$x, $y\\}], $lod;",
2992               []>;
2993 def TEX_UNIFIED_2D_S32_F32_GRAD
2994   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2995                     Int32Regs:$b, Int32Regs:$a),
2996               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2997                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2998                    Float32Regs:$grady0, Float32Regs:$grady1),
2999               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3000               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3001               "\\{$grady0, $grady1\\};",
3002               []>;
3003 def TEX_UNIFIED_2D_U32_S32
3004   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3005                     Int32Regs:$b, Int32Regs:$a),
3006               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3007               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3008               "[$t, \\{$x, $y\\}];",
3009               []>;
3010 def TEX_UNIFIED_2D_U32_F32
3011   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3012                     Int32Regs:$b, Int32Regs:$a),
3013               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3014               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3015               "[$t, \\{$x, $y\\}];",
3016               []>;
3017 def TEX_UNIFIED_2D_U32_F32_LEVEL
3018   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3019                     Int32Regs:$b, Int32Regs:$a),
3020               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3021                    Float32Regs:$lod),
3022               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3023               "[$t, \\{$x, $y\\}], $lod;",
3024               []>;
3025 def TEX_UNIFIED_2D_U32_F32_GRAD
3026   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3027                     Int32Regs:$b, Int32Regs:$a),
3028               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3029                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3030                    Float32Regs:$grady0, Float32Regs:$grady1),
3031               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3032               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3033               "\\{$grady0, $grady1\\};",
3034               []>;
3035
3036 def TEX_UNIFIED_2D_ARRAY_F32_S32
3037   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3038                     Float32Regs:$b, Float32Regs:$a),
3039               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3040                    Int32Regs:$y),
3041               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3042               "[$t, \\{$l, $x, $y, $y\\}];",
3043               []>;
3044 def TEX_UNIFIED_2D_ARRAY_F32_F32
3045   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3046                     Float32Regs:$b, Float32Regs:$a),
3047               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3048                    Float32Regs:$y),
3049               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3050               "[$t, \\{$l, $x, $y, $y\\}];",
3051               []>;
3052 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3053   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3054                     Float32Regs:$b, Float32Regs:$a),
3055               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3056                    Float32Regs:$y, Float32Regs:$lod),
3057               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3058               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3059               []>;
3060 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3061   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3062                     Float32Regs:$b, Float32Regs:$a),
3063               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3064                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3065                    Float32Regs:$grady0, Float32Regs:$grady1),
3066               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3067               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3068               "\\{$grady0, $grady1\\};",
3069               []>;
3070 def TEX_UNIFIED_2D_ARRAY_S32_S32
3071   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3072                     Int32Regs:$b, Int32Regs:$a),
3073               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3074                    Int32Regs:$y),
3075               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3076               "[$t, \\{$l, $x, $y, $y\\}];",
3077               []>;
3078 def TEX_UNIFIED_2D_ARRAY_S32_F32
3079   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3080                     Int32Regs:$b, Int32Regs:$a),
3081               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3082                    Float32Regs:$y),
3083               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3084               "[$t, \\{$l, $x, $y, $y\\}];",
3085               []>;
3086 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3087   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3088                     Int32Regs:$b, Int32Regs:$a),
3089               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3090                    Float32Regs:$y, Float32Regs:$lod),
3091               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3092               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3093               []>;
3094 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3095   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3096                     Int32Regs:$b, Int32Regs:$a),
3097               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3098                    Float32Regs:$y,
3099                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3100                    Float32Regs:$grady0, Float32Regs:$grady1),
3101               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3102               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3103               "\\{$grady0, $grady1\\};",
3104               []>;
3105 def TEX_UNIFIED_2D_ARRAY_U32_S32
3106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3107                     Int32Regs:$b, Int32Regs:$a),
3108               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3109                    Int32Regs:$y),
3110               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3111               "[$t, \\{$l, $x, $y, $y\\}];",
3112               []>;
3113 def TEX_UNIFIED_2D_ARRAY_U32_F32
3114   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3115                     Int32Regs:$b, Int32Regs:$a),
3116               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3117                    Float32Regs:$y),
3118               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3119               "[$t, \\{$l, $x, $y, $y\\}];",
3120               []>;
3121 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3122   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3123                     Int32Regs:$b, Int32Regs:$a),
3124               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3125                    Float32Regs:$y, Float32Regs:$lod),
3126               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3127               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3128               []>;
3129 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3130   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3131                     Int32Regs:$b, Int32Regs:$a),
3132               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3133                    Float32Regs:$y,
3134                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3135                    Float32Regs:$grady0, Float32Regs:$grady1),
3136               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3137               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3138               "\\{$grady0, $grady1\\};",
3139               []>;
3140
3141 def TEX_UNIFIED_3D_F32_S32
3142   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3143                     Float32Regs:$b, Float32Regs:$a),
3144               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3145                    Int32Regs:$z),
3146               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3147               "[$t, \\{$x, $y, $z, $z\\}];",
3148               []>;
3149 def TEX_UNIFIED_3D_F32_F32
3150   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3151                     Float32Regs:$b, Float32Regs:$a),
3152               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3153                    Float32Regs:$z),
3154               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3155               "[$t, \\{$x, $y, $z, $z\\}];",
3156               []>;
3157 def TEX_UNIFIED_3D_F32_F32_LEVEL
3158   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159                     Float32Regs:$b, Float32Regs:$a),
3160               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3161                    Float32Regs:$z, Float32Regs:$lod),
3162               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3163               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3164               []>;
3165 def TEX_UNIFIED_3D_F32_F32_GRAD
3166   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167                     Float32Regs:$b, Float32Regs:$a),
3168               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3169                    Float32Regs:$z,
3170                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3171                    Float32Regs:$gradx2, Float32Regs:$grady0,
3172                    Float32Regs:$grady1, Float32Regs:$grady2),
3173               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3174               "[$t, \\{$x, $y, $z, $z\\}], "
3175               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3176               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3177               []>;
3178 def TEX_UNIFIED_3D_S32_S32
3179   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3180                     Int32Regs:$b, Int32Regs:$a),
3181               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3182                    Int32Regs:$z),
3183               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3184               "[$t, \\{$x, $y, $z, $z\\}];",
3185               []>;
3186 def TEX_UNIFIED_3D_S32_F32
3187   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3188                     Int32Regs:$b, Int32Regs:$a),
3189               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3190                    Float32Regs:$z),
3191               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3192               "[$t, \\{$x, $y, $z, $z\\}];",
3193               []>;
3194 def TEX_UNIFIED_3D_S32_F32_LEVEL
3195   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3196                     Int32Regs:$b, Int32Regs:$a),
3197               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3198                    Float32Regs:$z, Float32Regs:$lod),
3199               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3200               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3201               []>;
3202 def TEX_UNIFIED_3D_S32_F32_GRAD
3203   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3204                     Int32Regs:$b, Int32Regs:$a),
3205               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3206                    Float32Regs:$z,
3207                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3208                    Float32Regs:$gradx2, Float32Regs:$grady0,
3209                    Float32Regs:$grady1, Float32Regs:$grady2),
3210               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3211               "[$t, \\{$x, $y, $z, $z\\}], "
3212               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3213               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3214               []>;
3215 def TEX_UNIFIED_3D_U32_S32
3216   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3217                     Int32Regs:$b, Int32Regs:$a),
3218               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3219                    Int32Regs:$z),
3220               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3221               "[$t, \\{$x, $y, $z, $z\\}];",
3222               []>;
3223 def TEX_UNIFIED_3D_U32_F32
3224   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3225                     Int32Regs:$b, Int32Regs:$a),
3226               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3227                    Float32Regs:$z),
3228               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3229               "[$t, \\{$x, $y, $z, $z\\}];",
3230               []>;
3231 def TEX_UNIFIED_3D_U32_F32_LEVEL
3232   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3233                     Int32Regs:$b, Int32Regs:$a),
3234               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3235                    Float32Regs:$z, Float32Regs:$lod),
3236               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3237               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3238               []>;
3239 def TEX_UNIFIED_3D_U32_F32_GRAD
3240   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3241                     Int32Regs:$b, Int32Regs:$a),
3242               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3243                    Float32Regs:$z,
3244                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3245                    Float32Regs:$gradx2, Float32Regs:$grady0,
3246                    Float32Regs:$grady1, Float32Regs:$grady2),
3247               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3248               "[$t, \\{$x, $y, $z, $z\\}], "
3249               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3250               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3251               []>;
3252
3253 def TEX_UNIFIED_CUBE_F32_F32
3254   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3255                     Float32Regs:$b, Float32Regs:$a),
3256               (ins Int64Regs:$t,
3257                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3258               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3259               "[$t, \\{$x, $y, $z, $z\\}];",
3260               []>;
3261 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3262   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3263                     Float32Regs:$b, Float32Regs:$a),
3264               (ins Int64Regs:$t,
3265                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3266                    Float32Regs:$lod),
3267               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3268               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3269               []>;
3270 def TEX_UNIFIED_CUBE_S32_F32
3271   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3272                     Int32Regs:$b, Int32Regs:$a),
3273               (ins Int64Regs:$t,
3274                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3275               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3276               "[$t, \\{$x, $y, $z, $z\\}];",
3277               []>;
3278 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3279   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3280                     Int32Regs:$b, Int32Regs:$a),
3281               (ins Int64Regs:$t,
3282                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3283                    Float32Regs:$lod),
3284               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3285               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3286               []>;
3287 def TEX_UNIFIED_CUBE_U32_F32
3288   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289                     Int32Regs:$b, Int32Regs:$a),
3290               (ins Int64Regs:$t,
3291                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3292               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3293               "[$t, \\{$x, $y, $z, $z\\}];",
3294               []>;
3295 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3296   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297                     Int32Regs:$b, Int32Regs:$a),
3298               (ins Int64Regs:$t,
3299                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3300                    Float32Regs:$lod),
3301               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3302               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3303               []>;
3304
3305 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3306   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3307                     Float32Regs:$b, Float32Regs:$a),
3308               (ins Int64Regs:$t, Int32Regs:$l,
3309                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3310               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3311               "[$t, \\{$l, $x, $y, $z\\}];",
3312               []>;
3313 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3314   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3315                     Float32Regs:$b, Float32Regs:$a),
3316               (ins Int64Regs:$t, Int32Regs:$l,
3317                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3318                    Float32Regs:$lod),
3319               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3320               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3321               []>;
3322 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3323   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3324                     Int32Regs:$b, Int32Regs:$a),
3325               (ins Int64Regs:$t, Int32Regs:$l,
3326                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3327               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3328               "[$t, \\{$l, $x, $y, $z\\}];",
3329               []>;
3330 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3331   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3332                     Int32Regs:$b, Int32Regs:$a),
3333               (ins Int64Regs:$t, Int32Regs:$l,
3334                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3335                    Float32Regs:$lod),
3336               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3337               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3338               []>;
3339 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3340   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3341                     Int32Regs:$b, Int32Regs:$a),
3342               (ins Int64Regs:$t, Int32Regs:$l,
3343                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3344               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3345               "[$t, \\{$l, $x, $y, $z\\}];",
3346               []>;
3347 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3348   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3349                     Int32Regs:$b, Int32Regs:$a),
3350               (ins Int64Regs:$t, Int32Regs:$l,
3351                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3352                    Float32Regs:$lod),
3353               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3354               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3355               []>;
3356
3357 def TLD4_UNIFIED_R_2D_F32_F32
3358   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3359                     Float32Regs:$v2, Float32Regs:$v3),
3360               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3361               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3362               "[$t, \\{$x, $y\\}];",
3363               []>;
3364 def TLD4_UNIFIED_G_2D_F32_F32
3365   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3366                     Float32Regs:$v2, Float32Regs:$v3),
3367               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3368               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3369               "[$t, \\{$x, $y\\}];",
3370               []>;
3371 def TLD4_UNIFIED_B_2D_F32_F32
3372   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3373                     Float32Regs:$v2, Float32Regs:$v3),
3374               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3375               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3376               "[$t, \\{$x, $y\\}];",
3377               []>;
3378 def TLD4_UNIFIED_A_2D_F32_F32
3379   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3380                     Float32Regs:$v2, Float32Regs:$v3),
3381               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3382               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3383               "[$t, \\{$x, $y\\}];",
3384               []>;
3385 def TLD4_UNIFIED_R_2D_S32_F32
3386   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3387                     Int32Regs:$v2, Int32Regs:$v3),
3388               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3389               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3390               "[$t, \\{$x, $y\\}];",
3391               []>;
3392 def TLD4_UNIFIED_G_2D_S32_F32
3393   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3394                     Int32Regs:$v2, Int32Regs:$v3),
3395               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3396               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3397               "[$t, \\{$x, $y\\}];",
3398               []>;
3399 def TLD4_UNIFIED_B_2D_S32_F32
3400   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3401                     Int32Regs:$v2, Int32Regs:$v3),
3402               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3403               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3404               "[$t, \\{$x, $y\\}];",
3405               []>;
3406 def TLD4_UNIFIED_A_2D_S32_F32
3407   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3408                     Int32Regs:$v2, Int32Regs:$v3),
3409               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3410               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3411               "[$t, \\{$x, $y\\}];",
3412               []>;
3413 def TLD4_UNIFIED_R_2D_U32_F32
3414   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3415                     Int32Regs:$v2, Int32Regs:$v3),
3416               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3417               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3418               "[$t, \\{$x, $y\\}];",
3419               []>;
3420 def TLD4_UNIFIED_G_2D_U32_F32
3421   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3422                     Int32Regs:$v2, Int32Regs:$v3),
3423               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3424               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3425               "[$t, \\{$x, $y\\}];",
3426               []>;
3427 def TLD4_UNIFIED_B_2D_U32_F32
3428   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3429                     Int32Regs:$v2, Int32Regs:$v3),
3430               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3431               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3432               "[$t, \\{$x, $y\\}];",
3433               []>;
3434 def TLD4_UNIFIED_A_2D_U32_F32
3435   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3436                     Int32Regs:$v2, Int32Regs:$v3),
3437               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3438               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3439               "[$t, \\{$x, $y\\}];",
3440               []>;
3441 }
3442
3443
3444
3445 //=== Surface load instructions
3446 // .clamp variant
3447 let IsSuld = 1 in {
3448 def SULD_1D_I8_CLAMP
3449   : NVPTXInst<(outs Int16Regs:$r),
3450               (ins Int64Regs:$s, Int32Regs:$x),
3451               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3452               []>;
3453 def SULD_1D_I16_CLAMP
3454   : NVPTXInst<(outs Int16Regs:$r),
3455               (ins Int64Regs:$s, Int32Regs:$x),
3456               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3457               []>;
3458 def SULD_1D_I32_CLAMP
3459   : NVPTXInst<(outs Int32Regs:$r),
3460               (ins Int64Regs:$s, Int32Regs:$x),
3461               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3462               []>;
3463 def SULD_1D_I64_CLAMP
3464   : NVPTXInst<(outs Int64Regs:$r),
3465               (ins Int64Regs:$s, Int32Regs:$x),
3466               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3467               []>;
3468
3469 def SULD_1D_ARRAY_I8_CLAMP
3470   : NVPTXInst<(outs Int16Regs:$r),
3471               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3472               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3473               []>;
3474 def SULD_1D_ARRAY_I16_CLAMP
3475   : NVPTXInst<(outs Int16Regs:$r),
3476               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3477               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3478               []>;
3479 def SULD_1D_ARRAY_I32_CLAMP
3480   : NVPTXInst<(outs Int32Regs:$r),
3481               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3482               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3483               []>;
3484 def SULD_1D_ARRAY_I64_CLAMP
3485   : NVPTXInst<(outs Int64Regs:$r),
3486               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3487               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3488               []>;
3489
3490 def SULD_2D_I8_CLAMP
3491   : NVPTXInst<(outs Int16Regs:$r),
3492               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3493               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3494               []>;
3495 def SULD_2D_I16_CLAMP
3496   : NVPTXInst<(outs Int16Regs:$r),
3497               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3498               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3499               []>;
3500 def SULD_2D_I32_CLAMP
3501   : NVPTXInst<(outs Int32Regs:$r),
3502               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3503               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3504               []>;
3505 def SULD_2D_I64_CLAMP
3506   : NVPTXInst<(outs Int64Regs:$r),
3507               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3508               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3509               []>;
3510
3511 def SULD_2D_ARRAY_I8_CLAMP
3512   : NVPTXInst<(outs Int16Regs:$r),
3513               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3514               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3515               []>;
3516 def SULD_2D_ARRAY_I16_CLAMP
3517   : NVPTXInst<(outs Int16Regs:$r),
3518               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3519               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3520               []>;
3521 def SULD_2D_ARRAY_I32_CLAMP
3522   : NVPTXInst<(outs Int32Regs:$r),
3523               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3524               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3525               []>;
3526 def SULD_2D_ARRAY_I64_CLAMP
3527   : NVPTXInst<(outs Int64Regs:$r),
3528               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3529               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3530               []>;
3531
3532 def SULD_3D_I8_CLAMP
3533   : NVPTXInst<(outs Int16Regs:$r),
3534               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3535               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3536               []>;
3537 def SULD_3D_I16_CLAMP
3538   : NVPTXInst<(outs Int16Regs:$r),
3539               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3540               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3541               []>;
3542 def SULD_3D_I32_CLAMP
3543   : NVPTXInst<(outs Int32Regs:$r),
3544               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3545               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3546               []>;
3547 def SULD_3D_I64_CLAMP
3548   : NVPTXInst<(outs Int64Regs:$r),
3549               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3550               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3551               []>;
3552 }
3553
3554 let IsSuld = 2 in {
3555 def SULD_1D_V2I8_CLAMP
3556   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3557               (ins Int64Regs:$s, Int32Regs:$x),
3558               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3559               []>;
3560 def SULD_1D_V2I16_CLAMP
3561   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3562               (ins Int64Regs:$s, Int32Regs:$x),
3563               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3564               []>;
3565 def SULD_1D_V2I32_CLAMP
3566   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3567               (ins Int64Regs:$s, Int32Regs:$x),
3568               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3569               []>;
3570 def SULD_1D_V2I64_CLAMP
3571   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3572               (ins Int64Regs:$s, Int32Regs:$x),
3573               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3574               []>;
3575
3576 def SULD_1D_ARRAY_V2I8_CLAMP
3577   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3578               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3579               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3580               []>;
3581 def SULD_1D_ARRAY_V2I16_CLAMP
3582   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3583               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3584               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3585               []>;
3586 def SULD_1D_ARRAY_V2I32_CLAMP
3587   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3588               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3589               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3590               []>;
3591 def SULD_1D_ARRAY_V2I64_CLAMP
3592   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3593               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3594               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3595               []>;
3596
3597 def SULD_2D_V2I8_CLAMP
3598   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3599               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3600               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3601               []>;
3602 def SULD_2D_V2I16_CLAMP
3603   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3604               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3605               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3606               []>;
3607 def SULD_2D_V2I32_CLAMP
3608   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3609               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3610               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3611               []>;
3612 def SULD_2D_V2I64_CLAMP
3613   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3614               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3615               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3616               []>;
3617
3618 def SULD_2D_ARRAY_V2I8_CLAMP
3619   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3620               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3621               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3622               "[$s, \\{$l, $x, $y, $y\\}];",
3623               []>;
3624 def SULD_2D_ARRAY_V2I16_CLAMP
3625   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3626               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3627               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3628               "[$s, \\{$l, $x, $y, $y\\}];",
3629               []>;
3630 def SULD_2D_ARRAY_V2I32_CLAMP
3631   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3632               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3633               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3634               "[$s, \\{$l, $x, $y, $y\\}];",
3635               []>;
3636 def SULD_2D_ARRAY_V2I64_CLAMP
3637   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3638               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3639               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3640               "[$s, \\{$l, $x, $y, $y\\}];",
3641               []>;
3642
3643 def SULD_3D_V2I8_CLAMP
3644   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3645               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3646               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3647               []>;
3648 def SULD_3D_V2I16_CLAMP
3649   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3650               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3651               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3652               []>;
3653 def SULD_3D_V2I32_CLAMP
3654   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3655               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3656               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3657               []>;
3658 def SULD_3D_V2I64_CLAMP
3659   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3660               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3661               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3662               []>;
3663 }
3664
3665 let IsSuld = 3 in {
3666 def SULD_1D_V4I8_CLAMP
3667   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3668               (ins Int64Regs:$s, Int32Regs:$x),
3669               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3670               []>;
3671 def SULD_1D_V4I16_CLAMP
3672   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3673               (ins Int64Regs:$s, Int32Regs:$x),
3674               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3675               []>;
3676 def SULD_1D_V4I32_CLAMP
3677   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3678               (ins Int64Regs:$s, Int32Regs:$x),
3679               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3680               []>;
3681
3682 def SULD_1D_ARRAY_V4I8_CLAMP
3683   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3684               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3685               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3686               "[$s, \\{$l, $x\\}];",
3687               []>;
3688 def SULD_1D_ARRAY_V4I16_CLAMP
3689   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3690               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3692               "[$s, \\{$l, $x\\}];",
3693               []>;
3694 def SULD_1D_ARRAY_V4I32_CLAMP
3695   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3696               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3697               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3698               "[$s, \\{$l, $x\\}];",
3699               []>;
3700
3701 def SULD_2D_V4I8_CLAMP
3702   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3703               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3704               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3705               []>;
3706 def SULD_2D_V4I16_CLAMP
3707   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3708               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3709               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3710               []>;
3711 def SULD_2D_V4I32_CLAMP
3712   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3713               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3714               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3715               []>;
3716
3717 def SULD_2D_ARRAY_V4I8_CLAMP
3718   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3719               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3720               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3721               "[$s, \\{$l, $x, $y, $y\\}];",
3722               []>;
3723 def SULD_2D_ARRAY_V4I16_CLAMP
3724   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3725               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3726               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3727               "[$s, \\{$l, $x, $y, $y\\}];",
3728               []>;
3729 def SULD_2D_ARRAY_V4I32_CLAMP
3730   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3731               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3732               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3733               "[$s, \\{$l, $x, $y, $y\\}];",
3734               []>;
3735
3736
3737 def SULD_3D_V4I8_CLAMP
3738   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3739               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3740               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3741               "[$s, \\{$x, $y, $z, $z\\}];",
3742               []>;
3743 def SULD_3D_V4I16_CLAMP
3744   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3745               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3746               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3747               "[$s, \\{$x, $y, $z, $z\\}];",
3748               []>;
3749 def SULD_3D_V4I32_CLAMP
3750   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3751               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3752               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3753               "[$s, \\{$x, $y, $z, $z\\}];",
3754               []>;
3755 }
3756
3757
3758 // .trap variant
3759 let IsSuld = 1 in {
3760 def SULD_1D_I8_TRAP
3761   : NVPTXInst<(outs Int16Regs:$r),
3762               (ins Int64Regs:$s, Int32Regs:$x),
3763               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3764               []>;
3765 def SULD_1D_I16_TRAP
3766   : NVPTXInst<(outs Int16Regs:$r),
3767               (ins Int64Regs:$s, Int32Regs:$x),
3768               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3769               []>;
3770 def SULD_1D_I32_TRAP
3771   : NVPTXInst<(outs Int32Regs:$r),
3772               (ins Int64Regs:$s, Int32Regs:$x),
3773               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3774               []>;
3775 def SULD_1D_I64_TRAP
3776   : NVPTXInst<(outs Int64Regs:$r),
3777               (ins Int64Regs:$s, Int32Regs:$x),
3778               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3779               []>;
3780
3781 def SULD_1D_ARRAY_I8_TRAP
3782   : NVPTXInst<(outs Int16Regs:$r),
3783               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3784               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3785               []>;
3786 def SULD_1D_ARRAY_I16_TRAP
3787   : NVPTXInst<(outs Int16Regs:$r),
3788               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3789               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3790               []>;
3791 def SULD_1D_ARRAY_I32_TRAP
3792   : NVPTXInst<(outs Int32Regs:$r),
3793               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3794               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3795               []>;
3796 def SULD_1D_ARRAY_I64_TRAP
3797   : NVPTXInst<(outs Int64Regs:$r),
3798               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3799               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3800               []>;
3801
3802 def SULD_2D_I8_TRAP
3803   : NVPTXInst<(outs Int16Regs:$r),
3804               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3805               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3806               []>;
3807 def SULD_2D_I16_TRAP
3808   : NVPTXInst<(outs Int16Regs:$r),
3809               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3810               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3811               []>;
3812 def SULD_2D_I32_TRAP
3813   : NVPTXInst<(outs Int32Regs:$r),
3814               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3815               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3816               []>;
3817 def SULD_2D_I64_TRAP
3818   : NVPTXInst<(outs Int64Regs:$r),
3819               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3820               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3821               []>;
3822
3823 def SULD_2D_ARRAY_I8_TRAP
3824   : NVPTXInst<(outs Int16Regs:$r),
3825               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3826               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3827               []>;
3828 def SULD_2D_ARRAY_I16_TRAP
3829   : NVPTXInst<(outs Int16Regs:$r),
3830               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3831               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3832               []>;
3833 def SULD_2D_ARRAY_I32_TRAP
3834   : NVPTXInst<(outs Int32Regs:$r),
3835               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3836               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3837               []>;
3838 def SULD_2D_ARRAY_I64_TRAP
3839   : NVPTXInst<(outs Int64Regs:$r),
3840               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3841               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3842               []>;
3843
3844 def SULD_3D_I8_TRAP
3845   : NVPTXInst<(outs Int16Regs:$r),
3846               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3847               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3848               []>;
3849 def SULD_3D_I16_TRAP
3850   : NVPTXInst<(outs Int16Regs:$r),
3851               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3852               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3853               []>;
3854 def SULD_3D_I32_TRAP
3855   : NVPTXInst<(outs Int32Regs:$r),
3856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3857               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3858               []>;
3859 def SULD_3D_I64_TRAP
3860   : NVPTXInst<(outs Int64Regs:$r),
3861               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3862               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3863               []>;
3864 }
3865
3866 let IsSuld = 2 in {
3867 def SULD_1D_V2I8_TRAP
3868   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3869               (ins Int64Regs:$s, Int32Regs:$x),
3870               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3871               []>;
3872 def SULD_1D_V2I16_TRAP
3873   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3874               (ins Int64Regs:$s, Int32Regs:$x),
3875               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3876               []>;
3877 def SULD_1D_V2I32_TRAP
3878   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3879               (ins Int64Regs:$s, Int32Regs:$x),
3880               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3881               []>;
3882 def SULD_1D_V2I64_TRAP
3883   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3884               (ins Int64Regs:$s, Int32Regs:$x),
3885               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3886               []>;
3887
3888 def SULD_1D_ARRAY_V2I8_TRAP
3889   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3890               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3891               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3892               []>;
3893 def SULD_1D_ARRAY_V2I16_TRAP
3894   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3895               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3896               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3897               []>;
3898 def SULD_1D_ARRAY_V2I32_TRAP
3899   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3900               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3901               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3902               []>;
3903 def SULD_1D_ARRAY_V2I64_TRAP
3904   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3905               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3906               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3907               []>;
3908
3909 def SULD_2D_V2I8_TRAP
3910   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3911               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3912               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3913               []>;
3914 def SULD_2D_V2I16_TRAP
3915   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3916               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3917               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3918               []>;
3919 def SULD_2D_V2I32_TRAP
3920   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3921               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3922               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3923               []>;
3924 def SULD_2D_V2I64_TRAP
3925   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3926               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3927               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3928               []>;
3929
3930 def SULD_2D_ARRAY_V2I8_TRAP
3931   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3932               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3933               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
3934               "[$s, \\{$l, $x, $y, $y\\}];",
3935               []>;
3936 def SULD_2D_ARRAY_V2I16_TRAP
3937   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3938               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3939               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
3940               "[$s, \\{$l, $x, $y, $y\\}];",
3941               []>;
3942 def SULD_2D_ARRAY_V2I32_TRAP
3943   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3944               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3945               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
3946               "[$s, \\{$l, $x, $y, $y\\}];",
3947               []>;
3948 def SULD_2D_ARRAY_V2I64_TRAP
3949   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3950               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3951               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
3952               "[$s, \\{$l, $x, $y, $y\\}];",
3953               []>;
3954
3955 def SULD_3D_V2I8_TRAP
3956   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3957               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3958               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3959               []>;
3960 def SULD_3D_V2I16_TRAP
3961   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3962               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3963               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3964               []>;
3965 def SULD_3D_V2I32_TRAP
3966   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3967               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3968               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3969               []>;
3970 def SULD_3D_V2I64_TRAP
3971   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3972               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3973               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3974               []>;
3975 }
3976
3977 let IsSuld = 3 in {
3978 def SULD_1D_V4I8_TRAP
3979   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3980               (ins Int64Regs:$s, Int32Regs:$x),
3981               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3982               []>;
3983 def SULD_1D_V4I16_TRAP
3984   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3985               (ins Int64Regs:$s, Int32Regs:$x),
3986               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3987               []>;
3988 def SULD_1D_V4I32_TRAP
3989   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3990               (ins Int64Regs:$s, Int32Regs:$x),
3991               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3992               []>;
3993
3994 def SULD_1D_ARRAY_V4I8_TRAP
3995   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3996               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3997               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3998               "[$s, \\{$l, $x\\}];",
3999               []>;
4000 def SULD_1D_ARRAY_V4I16_TRAP
4001   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4004               "[$s, \\{$l, $x\\}];",
4005               []>;
4006 def SULD_1D_ARRAY_V4I32_TRAP
4007   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4008               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4009               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4010               "[$s, \\{$l, $x\\}];",
4011               []>;
4012
4013 def SULD_2D_V4I8_TRAP
4014   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4015               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4016               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4017               []>;
4018 def SULD_2D_V4I16_TRAP
4019   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4020               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4021               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4022               []>;
4023 def SULD_2D_V4I32_TRAP
4024   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4025               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4026               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4027               []>;
4028
4029 def SULD_2D_ARRAY_V4I8_TRAP
4030   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4031               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4032               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4033               "[$s, \\{$l, $x, $y, $y\\}];",
4034               []>;
4035 def SULD_2D_ARRAY_V4I16_TRAP
4036   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4037               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4038               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4039               "[$s, \\{$l, $x, $y, $y\\}];",
4040               []>;
4041 def SULD_2D_ARRAY_V4I32_TRAP
4042   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4043               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4044               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4045               "[$s, \\{$l, $x, $y, $y\\}];",
4046               []>;
4047
4048
4049 def SULD_3D_V4I8_TRAP
4050   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4051               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4052               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4053               "[$s, \\{$x, $y, $z, $z\\}];",
4054               []>;
4055 def SULD_3D_V4I16_TRAP
4056   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4057               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4058               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4059               "[$s, \\{$x, $y, $z, $z\\}];",
4060               []>;
4061 def SULD_3D_V4I32_TRAP
4062   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4063               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4064               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4065               "[$s, \\{$x, $y, $z, $z\\}];",
4066               []>;
4067 }
4068
4069 // .zero variant
4070 let IsSuld = 1 in {
4071 def SULD_1D_I8_ZERO
4072   : NVPTXInst<(outs Int16Regs:$r),
4073               (ins Int64Regs:$s, Int32Regs:$x),
4074               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4075               []>;
4076 def SULD_1D_I16_ZERO
4077   : NVPTXInst<(outs Int16Regs:$r),
4078               (ins Int64Regs:$s, Int32Regs:$x),
4079               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4080               []>;
4081 def SULD_1D_I32_ZERO
4082   : NVPTXInst<(outs Int32Regs:$r),
4083               (ins Int64Regs:$s, Int32Regs:$x),
4084               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4085               []>;
4086 def SULD_1D_I64_ZERO
4087   : NVPTXInst<(outs Int64Regs:$r),
4088               (ins Int64Regs:$s, Int32Regs:$x),
4089               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4090               []>;
4091
4092 def SULD_1D_ARRAY_I8_ZERO
4093   : NVPTXInst<(outs Int16Regs:$r),
4094               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4095               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4096               []>;
4097 def SULD_1D_ARRAY_I16_ZERO
4098   : NVPTXInst<(outs Int16Regs:$r),
4099               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4100               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4101               []>;
4102 def SULD_1D_ARRAY_I32_ZERO
4103   : NVPTXInst<(outs Int32Regs:$r),
4104               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4105               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4106               []>;
4107 def SULD_1D_ARRAY_I64_ZERO
4108   : NVPTXInst<(outs Int64Regs:$r),
4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4111               []>;
4112
4113 def SULD_2D_I8_ZERO
4114   : NVPTXInst<(outs Int16Regs:$r),
4115               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4116               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4117               []>;
4118 def SULD_2D_I16_ZERO
4119   : NVPTXInst<(outs Int16Regs:$r),
4120               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4121               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4122               []>;
4123 def SULD_2D_I32_ZERO
4124   : NVPTXInst<(outs Int32Regs:$r),
4125               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4126               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4127               []>;
4128 def SULD_2D_I64_ZERO
4129   : NVPTXInst<(outs Int64Regs:$r),
4130               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4132               []>;
4133
4134 def SULD_2D_ARRAY_I8_ZERO
4135   : NVPTXInst<(outs Int16Regs:$r),
4136               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4137               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4138               []>;
4139 def SULD_2D_ARRAY_I16_ZERO
4140   : NVPTXInst<(outs Int16Regs:$r),
4141               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4142               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4143               []>;
4144 def SULD_2D_ARRAY_I32_ZERO
4145   : NVPTXInst<(outs Int32Regs:$r),
4146               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4147               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4148               []>;
4149 def SULD_2D_ARRAY_I64_ZERO
4150   : NVPTXInst<(outs Int64Regs:$r),
4151               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4152               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4153               []>;
4154
4155 def SULD_3D_I8_ZERO
4156   : NVPTXInst<(outs Int16Regs:$r),
4157               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4158               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4159               []>;
4160 def SULD_3D_I16_ZERO
4161   : NVPTXInst<(outs Int16Regs:$r),
4162               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4163               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4164               []>;
4165 def SULD_3D_I32_ZERO
4166   : NVPTXInst<(outs Int32Regs:$r),
4167               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4168               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4169               []>;
4170 def SULD_3D_I64_ZERO
4171   : NVPTXInst<(outs Int64Regs:$r),
4172               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4173               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4174               []>;
4175 }
4176
4177 let IsSuld = 2 in {
4178 def SULD_1D_V2I8_ZERO
4179   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4180               (ins Int64Regs:$s, Int32Regs:$x),
4181               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4182               []>;
4183 def SULD_1D_V2I16_ZERO
4184   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4185               (ins Int64Regs:$s, Int32Regs:$x),
4186               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4187               []>;
4188 def SULD_1D_V2I32_ZERO
4189   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4190               (ins Int64Regs:$s, Int32Regs:$x),
4191               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4192               []>;
4193 def SULD_1D_V2I64_ZERO
4194   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4195               (ins Int64Regs:$s, Int32Regs:$x),
4196               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4197               []>;
4198
4199 def SULD_1D_ARRAY_V2I8_ZERO
4200   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4201               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4202               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4203               []>;
4204 def SULD_1D_ARRAY_V2I16_ZERO
4205   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4206               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4207               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4208               []>;
4209 def SULD_1D_ARRAY_V2I32_ZERO
4210   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4211               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4212               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4213               []>;
4214 def SULD_1D_ARRAY_V2I64_ZERO
4215   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4216               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4217               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4218               []>;
4219
4220 def SULD_2D_V2I8_ZERO
4221   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4222               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4223               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4224               []>;
4225 def SULD_2D_V2I16_ZERO
4226   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4227               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4228               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4229               []>;
4230 def SULD_2D_V2I32_ZERO
4231   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4233               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4234               []>;
4235 def SULD_2D_V2I64_ZERO
4236   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4237               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4238               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4239               []>;
4240
4241 def SULD_2D_ARRAY_V2I8_ZERO
4242   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4243               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4244               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4245               "[$s, \\{$l, $x, $y, $y\\}];",
4246               []>;
4247 def SULD_2D_ARRAY_V2I16_ZERO
4248   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4249               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4250               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4251               "[$s, \\{$l, $x, $y, $y\\}];",
4252               []>;
4253 def SULD_2D_ARRAY_V2I32_ZERO
4254   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4255               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4256               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4257               "[$s, \\{$l, $x, $y, $y\\}];",
4258               []>;
4259 def SULD_2D_ARRAY_V2I64_ZERO
4260   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4261               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4262               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4263               "[$s, \\{$l, $x, $y, $y\\}];",
4264               []>;
4265
4266 def SULD_3D_V2I8_ZERO
4267   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4268               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4269               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4270               []>;
4271 def SULD_3D_V2I16_ZERO
4272   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4273               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4274               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4275               []>;
4276 def SULD_3D_V2I32_ZERO
4277   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4278               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4279               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4280               []>;
4281 def SULD_3D_V2I64_ZERO
4282   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4283               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4284               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4285               []>;
4286 }
4287
4288 let IsSuld = 3 in {
4289 def SULD_1D_V4I8_ZERO
4290   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4291               (ins Int64Regs:$s, Int32Regs:$x),
4292               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4293               []>;
4294 def SULD_1D_V4I16_ZERO
4295   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4296               (ins Int64Regs:$s, Int32Regs:$x),
4297               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4298               []>;
4299 def SULD_1D_V4I32_ZERO
4300   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4301               (ins Int64Regs:$s, Int32Regs:$x),
4302               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4303               []>;
4304
4305 def SULD_1D_ARRAY_V4I8_ZERO
4306   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4307               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4308               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4309               "[$s, \\{$l, $x\\}];",
4310               []>;
4311 def SULD_1D_ARRAY_V4I16_ZERO
4312   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4313               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4315               "[$s, \\{$l, $x\\}];",
4316               []>;
4317 def SULD_1D_ARRAY_V4I32_ZERO
4318   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4319               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4320               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4321               "[$s, \\{$l, $x\\}];",
4322               []>;
4323
4324 def SULD_2D_V4I8_ZERO
4325   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4326               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4327               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4328               []>;
4329 def SULD_2D_V4I16_ZERO
4330   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4331               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4332               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4333               []>;
4334 def SULD_2D_V4I32_ZERO
4335   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4336               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4337               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4338               []>;
4339
4340 def SULD_2D_ARRAY_V4I8_ZERO
4341   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4342               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4343               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4344               "[$s, \\{$l, $x, $y, $y\\}];",
4345               []>;
4346 def SULD_2D_ARRAY_V4I16_ZERO
4347   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4348               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4349               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4350               "[$s, \\{$l, $x, $y, $y\\}];",
4351               []>;
4352 def SULD_2D_ARRAY_V4I32_ZERO
4353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4354               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4355               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4356               "[$s, \\{$l, $x, $y, $y\\}];",
4357               []>;
4358
4359
4360 def SULD_3D_V4I8_ZERO
4361   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4362               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4363               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4364               "[$s, \\{$x, $y, $z, $z\\}];",
4365               []>;
4366 def SULD_3D_V4I16_ZERO
4367   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4369               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4370               "[$s, \\{$x, $y, $z, $z\\}];",
4371               []>;
4372 def SULD_3D_V4I32_ZERO
4373   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4374               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4375               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4376               "[$s, \\{$x, $y, $z, $z\\}];",
4377               []>;
4378 }
4379
4380 //-----------------------------------
4381 // Texture Query Intrinsics
4382 //-----------------------------------
4383
4384 let IsSurfTexQuery = 1 in {
4385 def TXQ_CHANNEL_ORDER
4386   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4387               "txq.channel_order.b32 \t$d, [$a];",
4388               []>;
4389 def TXQ_CHANNEL_DATA_TYPE
4390   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4391               "txq.channel_data_type.b32 \t$d, [$a];",
4392               []>;
4393 def TXQ_WIDTH
4394   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4395               "txq.width.b32 \t$d, [$a];",
4396               []>;
4397 def TXQ_HEIGHT
4398   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4399               "txq.height.b32 \t$d, [$a];",
4400               []>;
4401 def TXQ_DEPTH
4402   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4403               "txq.depth.b32 \t$d, [$a];",
4404               []>;
4405 def TXQ_ARRAY_SIZE
4406   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4407               "txq.array_size.b32 \t$d, [$a];",
4408               []>;
4409 def TXQ_NUM_SAMPLES
4410   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4411               "txq.num_samples.b32 \t$d, [$a];",
4412               []>;
4413 def TXQ_NUM_MIPMAP_LEVELS
4414   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4415               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4416               []>;
4417 }
4418
4419 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4420           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4421 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4422           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4423 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4424           (TXQ_WIDTH Int64Regs:$a)>;
4425 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4426           (TXQ_HEIGHT Int64Regs:$a)>;
4427 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4428           (TXQ_DEPTH Int64Regs:$a)>;
4429 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4430           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4431 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4432           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4433 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4434           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4435
4436
4437 //-----------------------------------
4438 // Surface Query Intrinsics
4439 //-----------------------------------
4440
4441 let IsSurfTexQuery = 1 in {
4442 def SUQ_CHANNEL_ORDER
4443   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4444               "suq.channel_order.b32 \t$d, [$a];",
4445               []>;
4446 def SUQ_CHANNEL_DATA_TYPE
4447   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4448               "suq.channel_data_type.b32 \t$d, [$a];",
4449               []>;
4450 def SUQ_WIDTH
4451   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4452               "suq.width.b32 \t$d, [$a];",
4453               []>;
4454 def SUQ_HEIGHT
4455   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4456               "suq.height.b32 \t$d, [$a];",
4457               []>;
4458 def SUQ_DEPTH
4459   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4460               "suq.depth.b32 \t$d, [$a];",
4461               []>;
4462 def SUQ_ARRAY_SIZE
4463   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4464               "suq.array_size.b32 \t$d, [$a];",
4465               []>;
4466 }
4467
4468 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4469           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4470 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4471           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4472 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4473           (SUQ_WIDTH Int64Regs:$a)>;
4474 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4475           (SUQ_HEIGHT Int64Regs:$a)>;
4476 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4477           (SUQ_DEPTH Int64Regs:$a)>;
4478 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4479           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4480
4481
4482 //===- Handle Query -------------------------------------------------------===//
4483
4484 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4485 def ISTYPEP_SAMPLER
4486   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4487               "istypep.samplerref \t$d, $a;",
4488               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4489 def ISTYPEP_SURFACE
4490   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4491               "istypep.surfref \t$d, $a;",
4492               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4493 def ISTYPEP_TEXTURE
4494   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4495               "istypep.texref \t$d, $a;",
4496               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4497
4498 //===- Surface Stores -----------------------------------------------------===//
4499
4500 let IsSust = 1 in {
4501 // Unformatted
4502 // .clamp variant
4503 def SUST_B_1D_B8_CLAMP
4504   : NVPTXInst<(outs),
4505               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4506               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4507               []>;
4508 def SUST_B_1D_B16_CLAMP
4509   : NVPTXInst<(outs),
4510               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4511               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4512               []>;
4513 def SUST_B_1D_B32_CLAMP
4514   : NVPTXInst<(outs),
4515               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4516               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4517               []>;
4518 def SUST_B_1D_B64_CLAMP
4519   : NVPTXInst<(outs),
4520               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4521               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4522               []>;
4523 def SUST_B_1D_V2B8_CLAMP
4524   : NVPTXInst<(outs),
4525               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4526               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4527               []>;
4528 def SUST_B_1D_V2B16_CLAMP
4529   : NVPTXInst<(outs),
4530               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4531               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4532               []>;
4533 def SUST_B_1D_V2B32_CLAMP
4534   : NVPTXInst<(outs),
4535               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4536               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4537               []>;
4538 def SUST_B_1D_V2B64_CLAMP
4539   : NVPTXInst<(outs),
4540               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4541               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4542               []>;
4543 def SUST_B_1D_V4B8_CLAMP
4544   : NVPTXInst<(outs),
4545               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4546                    Int16Regs:$b, Int16Regs:$a),
4547               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4548               []>;
4549 def SUST_B_1D_V4B16_CLAMP
4550   : NVPTXInst<(outs),
4551               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4552                    Int16Regs:$b, Int16Regs:$a),
4553               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4554               []>;
4555 def SUST_B_1D_V4B32_CLAMP
4556   : NVPTXInst<(outs),
4557               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4558                    Int32Regs:$b, Int32Regs:$a),
4559               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4560               []>;
4561
4562
4563 def SUST_B_1D_ARRAY_B8_CLAMP
4564   : NVPTXInst<(outs),
4565               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4566               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4567               []>;
4568 def SUST_B_1D_ARRAY_B16_CLAMP
4569   : NVPTXInst<(outs),
4570               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4571               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4572               []>;
4573 def SUST_B_1D_ARRAY_B32_CLAMP
4574   : NVPTXInst<(outs),
4575               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4576               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4577               []>;
4578 def SUST_B_1D_ARRAY_B64_CLAMP
4579   : NVPTXInst<(outs),
4580               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4581               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4582               []>;
4583 def SUST_B_1D_ARRAY_V2B8_CLAMP
4584   : NVPTXInst<(outs),
4585               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4586                    Int16Regs:$g),
4587               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4588               []>;
4589 def SUST_B_1D_ARRAY_V2B16_CLAMP
4590   : NVPTXInst<(outs),
4591               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4592                    Int16Regs:$g),
4593               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4594               []>;
4595 def SUST_B_1D_ARRAY_V2B32_CLAMP
4596   : NVPTXInst<(outs),
4597               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4598                    Int32Regs:$g),
4599               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4600               []>;
4601 def SUST_B_1D_ARRAY_V2B64_CLAMP
4602   : NVPTXInst<(outs),
4603               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4604                    Int64Regs:$g),
4605               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4606               []>;
4607 def SUST_B_1D_ARRAY_V4B8_CLAMP
4608   : NVPTXInst<(outs),
4609               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4610                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4611               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4612               "\\{$r, $g, $b, $a\\};",
4613               []>;
4614 def SUST_B_1D_ARRAY_V4B16_CLAMP
4615   : NVPTXInst<(outs),
4616               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4617                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4618              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4619              "\\{$r, $g, $b, $a\\};",
4620               []>;
4621 def SUST_B_1D_ARRAY_V4B32_CLAMP
4622   : NVPTXInst<(outs),
4623               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4624                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4625              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4626              "\\{$r, $g, $b, $a\\};",
4627               []>;
4628
4629
4630 def SUST_B_2D_B8_CLAMP
4631   : NVPTXInst<(outs),
4632               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4633               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4634               []>;
4635 def SUST_B_2D_B16_CLAMP
4636   : NVPTXInst<(outs),
4637               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4638               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4639               []>;
4640 def SUST_B_2D_B32_CLAMP
4641   : NVPTXInst<(outs),
4642               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4643               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4644               []>;
4645 def SUST_B_2D_B64_CLAMP
4646   : NVPTXInst<(outs),
4647               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4648               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4649               []>;
4650 def SUST_B_2D_V2B8_CLAMP
4651   : NVPTXInst<(outs),
4652               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4653                    Int16Regs:$g),
4654               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4655               []>;
4656 def SUST_B_2D_V2B16_CLAMP
4657   : NVPTXInst<(outs),
4658               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4659                    Int16Regs:$g),
4660               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4661               []>;
4662 def SUST_B_2D_V2B32_CLAMP
4663   : NVPTXInst<(outs),
4664               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4665                    Int32Regs:$g),
4666               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4667               []>;
4668 def SUST_B_2D_V2B64_CLAMP
4669   : NVPTXInst<(outs),
4670               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4671                    Int64Regs:$g),
4672               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4673               []>;
4674 def SUST_B_2D_V4B8_CLAMP
4675   : NVPTXInst<(outs),
4676               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4677                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4678               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4679               "\\{$r, $g, $b, $a\\};",
4680               []>;
4681 def SUST_B_2D_V4B16_CLAMP
4682   : NVPTXInst<(outs),
4683               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4684                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4685              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4686              "\\{$r, $g, $b, $a\\};",
4687               []>;
4688 def SUST_B_2D_V4B32_CLAMP
4689   : NVPTXInst<(outs),
4690               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4691                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4692              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4693              "\\{$r, $g, $b, $a\\};",
4694               []>;
4695
4696
4697 def SUST_B_2D_ARRAY_B8_CLAMP
4698   : NVPTXInst<(outs),
4699               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4700                    Int16Regs:$r),
4701               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4702               []>;
4703 def SUST_B_2D_ARRAY_B16_CLAMP
4704   : NVPTXInst<(outs),
4705               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4706                    Int16Regs:$r),
4707               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4708               []>;
4709 def SUST_B_2D_ARRAY_B32_CLAMP
4710   : NVPTXInst<(outs),
4711               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4712                    Int32Regs:$r),
4713               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4714               []>;
4715 def SUST_B_2D_ARRAY_B64_CLAMP
4716   : NVPTXInst<(outs),
4717               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4718                    Int64Regs:$r),
4719               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4720               []>;
4721 def SUST_B_2D_ARRAY_V2B8_CLAMP
4722   : NVPTXInst<(outs),
4723               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4724                    Int16Regs:$r, Int16Regs:$g),
4725               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4726               "\\{$r, $g\\};",
4727               []>;
4728 def SUST_B_2D_ARRAY_V2B16_CLAMP
4729   : NVPTXInst<(outs),
4730               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4731                    Int16Regs:$r, Int16Regs:$g),
4732              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4733              "\\{$r, $g\\};",
4734               []>;
4735 def SUST_B_2D_ARRAY_V2B32_CLAMP
4736   : NVPTXInst<(outs),
4737               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4738                    Int32Regs:$r, Int32Regs:$g),
4739              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4740              "\\{$r, $g\\};",
4741               []>;
4742 def SUST_B_2D_ARRAY_V2B64_CLAMP
4743   : NVPTXInst<(outs),
4744               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4745                    Int64Regs:$r, Int64Regs:$g),
4746              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4747              "\\{$r, $g\\};",
4748               []>;
4749 def SUST_B_2D_ARRAY_V4B8_CLAMP
4750   : NVPTXInst<(outs),
4751               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4752                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4753       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4754       "\\{$r, $g, $b, $a\\};",
4755               []>;
4756 def SUST_B_2D_ARRAY_V4B16_CLAMP
4757   : NVPTXInst<(outs),
4758               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4759                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4760      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4761      "\\{$r, $g, $b, $a\\};",
4762               []>;
4763 def SUST_B_2D_ARRAY_V4B32_CLAMP
4764   : NVPTXInst<(outs),
4765               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4766                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4767      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4768      "\\{$r, $g, $b, $a\\};",
4769               []>;
4770
4771
4772 def SUST_B_3D_B8_CLAMP
4773   : NVPTXInst<(outs),
4774               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4775                    Int16Regs:$r),
4776               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4777               []>;
4778 def SUST_B_3D_B16_CLAMP
4779   : NVPTXInst<(outs),
4780               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4781                    Int16Regs:$r),
4782               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4783               []>;
4784 def SUST_B_3D_B32_CLAMP
4785   : NVPTXInst<(outs),
4786               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4787                    Int32Regs:$r),
4788               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4789               []>;
4790 def SUST_B_3D_B64_CLAMP
4791   : NVPTXInst<(outs),
4792               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4793                    Int64Regs:$r),
4794               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4795               []>;
4796 def SUST_B_3D_V2B8_CLAMP
4797   : NVPTXInst<(outs),
4798               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4799                    Int16Regs:$r, Int16Regs:$g),
4800               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4801               "\\{$r, $g\\};",
4802               []>;
4803 def SUST_B_3D_V2B16_CLAMP
4804   : NVPTXInst<(outs),
4805               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4806                    Int16Regs:$r, Int16Regs:$g),
4807               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4808               "\\{$r, $g\\};",
4809               []>;
4810 def SUST_B_3D_V2B32_CLAMP
4811   : NVPTXInst<(outs),
4812               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4813                    Int32Regs:$r, Int32Regs:$g),
4814               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4815               "\\{$r, $g\\};",
4816               []>;
4817 def SUST_B_3D_V2B64_CLAMP
4818   : NVPTXInst<(outs),
4819               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4820                    Int64Regs:$r, Int64Regs:$g),
4821               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4822               "\\{$r, $g\\};",
4823               []>;
4824 def SUST_B_3D_V4B8_CLAMP
4825   : NVPTXInst<(outs),
4826               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4827                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4828          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4829          "\\{$r, $g, $b, $a\\};",
4830               []>;
4831 def SUST_B_3D_V4B16_CLAMP
4832   : NVPTXInst<(outs),
4833               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4834                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4835         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4836         "\\{$r, $g, $b, $a\\};",
4837               []>;
4838 def SUST_B_3D_V4B32_CLAMP
4839   : NVPTXInst<(outs),
4840               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4841                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4842         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4843         "\\{$r, $g, $b, $a\\};",
4844               []>;
4845
4846
4847 // .trap variant
4848 def SUST_B_1D_B8_TRAP
4849   : NVPTXInst<(outs),
4850               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4851               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4852               []>;
4853 def SUST_B_1D_B16_TRAP
4854   : NVPTXInst<(outs),
4855               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4856               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4857               []>;
4858 def SUST_B_1D_B32_TRAP
4859   : NVPTXInst<(outs),
4860               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4861               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4862               []>;
4863 def SUST_B_1D_B64_TRAP
4864   : NVPTXInst<(outs),
4865               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4866               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4867               []>;
4868 def SUST_B_1D_V2B8_TRAP
4869   : NVPTXInst<(outs),
4870               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4871               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4872               []>;
4873 def SUST_B_1D_V2B16_TRAP
4874   : NVPTXInst<(outs),
4875               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4876               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4877               []>;
4878 def SUST_B_1D_V2B32_TRAP
4879   : NVPTXInst<(outs),
4880               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4881               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4882               []>;
4883 def SUST_B_1D_V2B64_TRAP
4884   : NVPTXInst<(outs),
4885               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4886               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4887               []>;
4888 def SUST_B_1D_V4B8_TRAP
4889   : NVPTXInst<(outs),
4890               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4891                    Int16Regs:$b, Int16Regs:$a),
4892               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4893               []>;
4894 def SUST_B_1D_V4B16_TRAP
4895   : NVPTXInst<(outs),
4896               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4897                    Int16Regs:$b, Int16Regs:$a),
4898               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4899               []>;
4900 def SUST_B_1D_V4B32_TRAP
4901   : NVPTXInst<(outs),
4902               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4903                    Int32Regs:$b, Int32Regs:$a),
4904               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4905               []>;
4906
4907
4908 def SUST_B_1D_ARRAY_B8_TRAP
4909   : NVPTXInst<(outs),
4910               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4911               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4912               []>;
4913 def SUST_B_1D_ARRAY_B16_TRAP
4914   : NVPTXInst<(outs),
4915               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4916               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4917               []>;
4918 def SUST_B_1D_ARRAY_B32_TRAP
4919   : NVPTXInst<(outs),
4920               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4921               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4922               []>;
4923 def SUST_B_1D_ARRAY_B64_TRAP
4924   : NVPTXInst<(outs),
4925               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4926               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4927               []>;
4928 def SUST_B_1D_ARRAY_V2B8_TRAP
4929   : NVPTXInst<(outs),
4930               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4931                    Int16Regs:$g),
4932               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4933               []>;
4934 def SUST_B_1D_ARRAY_V2B16_TRAP
4935   : NVPTXInst<(outs),
4936               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4937                    Int16Regs:$g),
4938               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4939               []>;
4940 def SUST_B_1D_ARRAY_V2B32_TRAP
4941   : NVPTXInst<(outs),
4942               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4943                    Int32Regs:$g),
4944               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4945               []>;
4946 def SUST_B_1D_ARRAY_V2B64_TRAP
4947   : NVPTXInst<(outs),
4948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4949                    Int64Regs:$g),
4950               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4951               []>;
4952 def SUST_B_1D_ARRAY_V4B8_TRAP
4953   : NVPTXInst<(outs),
4954               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4955                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4956               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
4957               "\\{$r, $g, $b, $a\\};",
4958               []>;
4959 def SUST_B_1D_ARRAY_V4B16_TRAP
4960   : NVPTXInst<(outs),
4961               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4962                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4963              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
4964              "\\{$r, $g, $b, $a\\};",
4965               []>;
4966 def SUST_B_1D_ARRAY_V4B32_TRAP
4967   : NVPTXInst<(outs),
4968               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4969                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4970              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
4971              "\\{$r, $g, $b, $a\\};",
4972               []>;
4973
4974
4975 def SUST_B_2D_B8_TRAP
4976   : NVPTXInst<(outs),
4977               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4978               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4979               []>;
4980 def SUST_B_2D_B16_TRAP
4981   : NVPTXInst<(outs),
4982               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4983               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4984               []>;
4985 def SUST_B_2D_B32_TRAP
4986   : NVPTXInst<(outs),
4987               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4988               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4989               []>;
4990 def SUST_B_2D_B64_TRAP
4991   : NVPTXInst<(outs),
4992               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4993               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4994               []>;
4995 def SUST_B_2D_V2B8_TRAP
4996   : NVPTXInst<(outs),
4997               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4998                    Int16Regs:$g),
4999               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5000               []>;
5001 def SUST_B_2D_V2B16_TRAP
5002   : NVPTXInst<(outs),
5003               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5004                    Int16Regs:$g),
5005               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5006               []>;
5007 def SUST_B_2D_V2B32_TRAP
5008   : NVPTXInst<(outs),
5009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5010                    Int32Regs:$g),
5011               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5012               []>;
5013 def SUST_B_2D_V2B64_TRAP
5014   : NVPTXInst<(outs),
5015               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5016                    Int64Regs:$g),
5017               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5018               []>;
5019 def SUST_B_2D_V4B8_TRAP
5020   : NVPTXInst<(outs),
5021               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5022                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5023               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5024               "\\{$r, $g, $b, $a\\};",
5025               []>;
5026 def SUST_B_2D_V4B16_TRAP
5027   : NVPTXInst<(outs),
5028               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5029                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5030              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5031              "\\{$r, $g, $b, $a\\};",
5032               []>;
5033 def SUST_B_2D_V4B32_TRAP
5034   : NVPTXInst<(outs),
5035               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5036                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5037              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5038              "\\{$r, $g, $b, $a\\};",
5039               []>;
5040
5041
5042 def SUST_B_2D_ARRAY_B8_TRAP
5043   : NVPTXInst<(outs),
5044               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5045                    Int16Regs:$r),
5046               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5047               []>;
5048 def SUST_B_2D_ARRAY_B16_TRAP
5049   : NVPTXInst<(outs),
5050               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5051                    Int16Regs:$r),
5052               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5053               []>;
5054 def SUST_B_2D_ARRAY_B32_TRAP
5055   : NVPTXInst<(outs),
5056               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5057                    Int32Regs:$r),
5058               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5059               []>;
5060 def SUST_B_2D_ARRAY_B64_TRAP
5061   : NVPTXInst<(outs),
5062               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5063                    Int64Regs:$r),
5064               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5065               []>;
5066 def SUST_B_2D_ARRAY_V2B8_TRAP
5067   : NVPTXInst<(outs),
5068               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5069                    Int16Regs:$r, Int16Regs:$g),
5070               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5071               "\\{$r, $g\\};",
5072               []>;
5073 def SUST_B_2D_ARRAY_V2B16_TRAP
5074   : NVPTXInst<(outs),
5075               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5076                    Int16Regs:$r, Int16Regs:$g),
5077              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5078              "\\{$r, $g\\};",
5079               []>;
5080 def SUST_B_2D_ARRAY_V2B32_TRAP
5081   : NVPTXInst<(outs),
5082               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5083                    Int32Regs:$r, Int32Regs:$g),
5084              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5085              "\\{$r, $g\\};",
5086               []>;
5087 def SUST_B_2D_ARRAY_V2B64_TRAP
5088   : NVPTXInst<(outs),
5089               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5090                    Int64Regs:$r, Int64Regs:$g),
5091              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5092              "\\{$r, $g\\};",
5093               []>;
5094 def SUST_B_2D_ARRAY_V4B8_TRAP
5095   : NVPTXInst<(outs),
5096               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5097                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5098       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5099       "\\{$r, $g, $b, $a\\};",
5100               []>;
5101 def SUST_B_2D_ARRAY_V4B16_TRAP
5102   : NVPTXInst<(outs),
5103               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5104                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5106      "\\{$r, $g, $b, $a\\};",
5107               []>;
5108 def SUST_B_2D_ARRAY_V4B32_TRAP
5109   : NVPTXInst<(outs),
5110               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5111                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5112      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5113      "\\{$r, $g, $b, $a\\};",
5114               []>;
5115
5116
5117 def SUST_B_3D_B8_TRAP
5118   : NVPTXInst<(outs),
5119               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5120                    Int16Regs:$r),
5121               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5122               []>;
5123 def SUST_B_3D_B16_TRAP
5124   : NVPTXInst<(outs),
5125               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5126                    Int16Regs:$r),
5127               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5128               []>;
5129 def SUST_B_3D_B32_TRAP
5130   : NVPTXInst<(outs),
5131               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5132                    Int32Regs:$r),
5133               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5134               []>;
5135 def SUST_B_3D_B64_TRAP
5136   : NVPTXInst<(outs),
5137               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5138                    Int64Regs:$r),
5139               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5140               []>;
5141 def SUST_B_3D_V2B8_TRAP
5142   : NVPTXInst<(outs),
5143               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5144                    Int16Regs:$r, Int16Regs:$g),
5145               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5146               "\\{$r, $g\\};",
5147               []>;
5148 def SUST_B_3D_V2B16_TRAP
5149   : NVPTXInst<(outs),
5150               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5151                    Int16Regs:$r, Int16Regs:$g),
5152               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5153               "\\{$r, $g\\};",
5154               []>;
5155 def SUST_B_3D_V2B32_TRAP
5156   : NVPTXInst<(outs),
5157               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5158                    Int32Regs:$r, Int32Regs:$g),
5159               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5160               "\\{$r, $g\\};",
5161               []>;
5162 def SUST_B_3D_V2B64_TRAP
5163   : NVPTXInst<(outs),
5164               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5165                    Int64Regs:$r, Int64Regs:$g),
5166               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5167               "\\{$r, $g\\};",
5168               []>;
5169 def SUST_B_3D_V4B8_TRAP
5170   : NVPTXInst<(outs),
5171               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5172                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5173          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5174          "\\{$r, $g, $b, $a\\};",
5175               []>;
5176 def SUST_B_3D_V4B16_TRAP
5177   : NVPTXInst<(outs),
5178               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5179                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5180         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5181         "\\{$r, $g, $b, $a\\};",
5182               []>;
5183 def SUST_B_3D_V4B32_TRAP
5184   : NVPTXInst<(outs),
5185               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5186                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5187         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5188         "\\{$r, $g, $b, $a\\};",
5189               []>;
5190
5191
5192 // .zero variant
5193 def SUST_B_1D_B8_ZERO
5194   : NVPTXInst<(outs),
5195               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5196               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5197               []>;
5198 def SUST_B_1D_B16_ZERO
5199   : NVPTXInst<(outs),
5200               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5201               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5202               []>;
5203 def SUST_B_1D_B32_ZERO
5204   : NVPTXInst<(outs),
5205               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5206               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5207               []>;
5208 def SUST_B_1D_B64_ZERO
5209   : NVPTXInst<(outs),
5210               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5211               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5212               []>;
5213 def SUST_B_1D_V2B8_ZERO
5214   : NVPTXInst<(outs),
5215               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5216               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5217               []>;
5218 def SUST_B_1D_V2B16_ZERO
5219   : NVPTXInst<(outs),
5220               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5221               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5222               []>;
5223 def SUST_B_1D_V2B32_ZERO
5224   : NVPTXInst<(outs),
5225               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5226               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5227               []>;
5228 def SUST_B_1D_V2B64_ZERO
5229   : NVPTXInst<(outs),
5230               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5231               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5232               []>;
5233 def SUST_B_1D_V4B8_ZERO
5234   : NVPTXInst<(outs),
5235               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5236                    Int16Regs:$b, Int16Regs:$a),
5237               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5238               []>;
5239 def SUST_B_1D_V4B16_ZERO
5240   : NVPTXInst<(outs),
5241               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5242                    Int16Regs:$b, Int16Regs:$a),
5243               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5244               []>;
5245 def SUST_B_1D_V4B32_ZERO
5246   : NVPTXInst<(outs),
5247               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5248                    Int32Regs:$b, Int32Regs:$a),
5249               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5250               []>;
5251
5252
5253 def SUST_B_1D_ARRAY_B8_ZERO
5254   : NVPTXInst<(outs),
5255               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5256               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5257               []>;
5258 def SUST_B_1D_ARRAY_B16_ZERO
5259   : NVPTXInst<(outs),
5260               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5261               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5262               []>;
5263 def SUST_B_1D_ARRAY_B32_ZERO
5264   : NVPTXInst<(outs),
5265               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5266               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5267               []>;
5268 def SUST_B_1D_ARRAY_B64_ZERO
5269   : NVPTXInst<(outs),
5270               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5271               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5272               []>;
5273 def SUST_B_1D_ARRAY_V2B8_ZERO
5274   : NVPTXInst<(outs),
5275               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5276                    Int16Regs:$g),
5277               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5278               []>;
5279 def SUST_B_1D_ARRAY_V2B16_ZERO
5280   : NVPTXInst<(outs),
5281               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5282                    Int16Regs:$g),
5283               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5284               []>;
5285 def SUST_B_1D_ARRAY_V2B32_ZERO
5286   : NVPTXInst<(outs),
5287               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5288                    Int32Regs:$g),
5289               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5290               []>;
5291 def SUST_B_1D_ARRAY_V2B64_ZERO
5292   : NVPTXInst<(outs),
5293               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5294                    Int64Regs:$g),
5295               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5296               []>;
5297 def SUST_B_1D_ARRAY_V4B8_ZERO
5298   : NVPTXInst<(outs),
5299               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5300                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5301               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5302               "\\{$r, $g, $b, $a\\};",
5303               []>;
5304 def SUST_B_1D_ARRAY_V4B16_ZERO
5305   : NVPTXInst<(outs),
5306               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5307                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5308              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5309              "\\{$r, $g, $b, $a\\};",
5310               []>;
5311 def SUST_B_1D_ARRAY_V4B32_ZERO
5312   : NVPTXInst<(outs),
5313               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5314                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5315              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5316              "\\{$r, $g, $b, $a\\};",
5317               []>;
5318
5319
5320 def SUST_B_2D_B8_ZERO
5321   : NVPTXInst<(outs),
5322               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5323               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5324               []>;
5325 def SUST_B_2D_B16_ZERO
5326   : NVPTXInst<(outs),
5327               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5328               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5329               []>;
5330 def SUST_B_2D_B32_ZERO
5331   : NVPTXInst<(outs),
5332               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5333               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5334               []>;
5335 def SUST_B_2D_B64_ZERO
5336   : NVPTXInst<(outs),
5337               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5338               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5339               []>;
5340 def SUST_B_2D_V2B8_ZERO
5341   : NVPTXInst<(outs),
5342               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5343                    Int16Regs:$g),
5344               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5345               []>;
5346 def SUST_B_2D_V2B16_ZERO
5347   : NVPTXInst<(outs),
5348               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5349                    Int16Regs:$g),
5350               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5351               []>;
5352 def SUST_B_2D_V2B32_ZERO
5353   : NVPTXInst<(outs),
5354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5355                    Int32Regs:$g),
5356               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5357               []>;
5358 def SUST_B_2D_V2B64_ZERO
5359   : NVPTXInst<(outs),
5360               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5361                    Int64Regs:$g),
5362               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5363               []>;
5364 def SUST_B_2D_V4B8_ZERO
5365   : NVPTXInst<(outs),
5366               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5367                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5368               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5369               "\\{$r, $g, $b, $a\\};",
5370               []>;
5371 def SUST_B_2D_V4B16_ZERO
5372   : NVPTXInst<(outs),
5373               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5374                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5375              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5376              "\\{$r, $g, $b, $a\\};",
5377               []>;
5378 def SUST_B_2D_V4B32_ZERO
5379   : NVPTXInst<(outs),
5380               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5381                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5382              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5383              "\\{$r, $g, $b, $a\\};",
5384               []>;
5385
5386
5387 def SUST_B_2D_ARRAY_B8_ZERO
5388   : NVPTXInst<(outs),
5389               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5390                    Int16Regs:$r),
5391               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5392               []>;
5393 def SUST_B_2D_ARRAY_B16_ZERO
5394   : NVPTXInst<(outs),
5395               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5396                    Int16Regs:$r),
5397               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5398               []>;
5399 def SUST_B_2D_ARRAY_B32_ZERO
5400   : NVPTXInst<(outs),
5401               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5402                    Int32Regs:$r),
5403               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5404               []>;
5405 def SUST_B_2D_ARRAY_B64_ZERO
5406   : NVPTXInst<(outs),
5407               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5408                    Int64Regs:$r),
5409               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5410               []>;
5411 def SUST_B_2D_ARRAY_V2B8_ZERO
5412   : NVPTXInst<(outs),
5413               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5414                    Int16Regs:$r, Int16Regs:$g),
5415               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5416               "\\{$r, $g\\};",
5417               []>;
5418 def SUST_B_2D_ARRAY_V2B16_ZERO
5419   : NVPTXInst<(outs),
5420               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5421                    Int16Regs:$r, Int16Regs:$g),
5422              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5423              "\\{$r, $g\\};",
5424               []>;
5425 def SUST_B_2D_ARRAY_V2B32_ZERO
5426   : NVPTXInst<(outs),
5427               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5428                    Int32Regs:$r, Int32Regs:$g),
5429              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5430              "\\{$r, $g\\};",
5431               []>;
5432 def SUST_B_2D_ARRAY_V2B64_ZERO
5433   : NVPTXInst<(outs),
5434               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5435                    Int64Regs:$r, Int64Regs:$g),
5436              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5437              "\\{$r, $g\\};",
5438               []>;
5439 def SUST_B_2D_ARRAY_V4B8_ZERO
5440   : NVPTXInst<(outs),
5441               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5442                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5443       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5444       "\\{$r, $g, $b, $a\\};",
5445               []>;
5446 def SUST_B_2D_ARRAY_V4B16_ZERO
5447   : NVPTXInst<(outs),
5448               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5449                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5450      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5451      "\\{$r, $g, $b, $a\\};",
5452               []>;
5453 def SUST_B_2D_ARRAY_V4B32_ZERO
5454   : NVPTXInst<(outs),
5455               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5456                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5457      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5458      "\\{$r, $g, $b, $a\\};",
5459               []>;
5460
5461
5462 def SUST_B_3D_B8_ZERO
5463   : NVPTXInst<(outs),
5464               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5465                    Int16Regs:$r),
5466               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5467               []>;
5468 def SUST_B_3D_B16_ZERO
5469   : NVPTXInst<(outs),
5470               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5471                    Int16Regs:$r),
5472               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5473               []>;
5474 def SUST_B_3D_B32_ZERO
5475   : NVPTXInst<(outs),
5476               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5477                    Int32Regs:$r),
5478               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5479               []>;
5480 def SUST_B_3D_B64_ZERO
5481   : NVPTXInst<(outs),
5482               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5483                    Int64Regs:$r),
5484               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5485               []>;
5486 def SUST_B_3D_V2B8_ZERO
5487   : NVPTXInst<(outs),
5488               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5489                    Int16Regs:$r, Int16Regs:$g),
5490               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5491               "\\{$r, $g\\};",
5492               []>;
5493 def SUST_B_3D_V2B16_ZERO
5494   : NVPTXInst<(outs),
5495               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5496                    Int16Regs:$r, Int16Regs:$g),
5497               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5498               "\\{$r, $g\\};",
5499               []>;
5500 def SUST_B_3D_V2B32_ZERO
5501   : NVPTXInst<(outs),
5502               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5503                    Int32Regs:$r, Int32Regs:$g),
5504               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5505               "\\{$r, $g\\};",
5506               []>;
5507 def SUST_B_3D_V2B64_ZERO
5508   : NVPTXInst<(outs),
5509               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5510                    Int64Regs:$r, Int64Regs:$g),
5511               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5512               "\\{$r, $g\\};",
5513               []>;
5514 def SUST_B_3D_V4B8_ZERO
5515   : NVPTXInst<(outs),
5516               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5517                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5518          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5519          "\\{$r, $g, $b, $a\\};",
5520               []>;
5521 def SUST_B_3D_V4B16_ZERO
5522   : NVPTXInst<(outs),
5523               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5524                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5525         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5526         "\\{$r, $g, $b, $a\\};",
5527               []>;
5528 def SUST_B_3D_V4B32_ZERO
5529   : NVPTXInst<(outs),
5530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5531                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5532         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5533         "\\{$r, $g, $b, $a\\};",
5534               []>;
5535
5536
5537
5538 // Formatted
5539
5540 def SUST_P_1D_B8_TRAP
5541   : NVPTXInst<(outs),
5542               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5543               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5544               []>;
5545 def SUST_P_1D_B16_TRAP
5546   : NVPTXInst<(outs),
5547               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5548               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5549               []>;
5550 def SUST_P_1D_B32_TRAP
5551   : NVPTXInst<(outs),
5552               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5553               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5554               []>;
5555 def SUST_P_1D_V2B8_TRAP
5556   : NVPTXInst<(outs),
5557               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5558               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5559               []>;
5560 def SUST_P_1D_V2B16_TRAP
5561   : NVPTXInst<(outs),
5562               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5563               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5564               []>;
5565 def SUST_P_1D_V2B32_TRAP
5566   : NVPTXInst<(outs),
5567               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5568               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5569               []>;
5570 def SUST_P_1D_V4B8_TRAP
5571   : NVPTXInst<(outs),
5572               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5573                    Int16Regs:$b, Int16Regs:$a),
5574               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5575               []>;
5576 def SUST_P_1D_V4B16_TRAP
5577   : NVPTXInst<(outs),
5578               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5579                    Int16Regs:$b, Int16Regs:$a),
5580               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5581               []>;
5582 def SUST_P_1D_V4B32_TRAP
5583   : NVPTXInst<(outs),
5584               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5585                    Int32Regs:$b, Int32Regs:$a),
5586               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5587               []>;
5588
5589
5590 def SUST_P_1D_ARRAY_B8_TRAP
5591   : NVPTXInst<(outs),
5592               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5593               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5594               []>;
5595 def SUST_P_1D_ARRAY_B16_TRAP
5596   : NVPTXInst<(outs),
5597               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5598               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5599               []>;
5600 def SUST_P_1D_ARRAY_B32_TRAP
5601   : NVPTXInst<(outs),
5602               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5603               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5604               []>;
5605 def SUST_P_1D_ARRAY_V2B8_TRAP
5606   : NVPTXInst<(outs),
5607               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5608                    Int16Regs:$g),
5609               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5610               []>;
5611 def SUST_P_1D_ARRAY_V2B16_TRAP
5612   : NVPTXInst<(outs),
5613               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5614                    Int16Regs:$g),
5615               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5616               []>;
5617 def SUST_P_1D_ARRAY_V2B32_TRAP
5618   : NVPTXInst<(outs),
5619               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5620                    Int32Regs:$g),
5621               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5622               []>;
5623 def SUST_P_1D_ARRAY_V4B8_TRAP
5624   : NVPTXInst<(outs),
5625               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5626                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5627               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5628               "\\{$r, $g, $b, $a\\};",
5629               []>;
5630 def SUST_P_1D_ARRAY_V4B16_TRAP
5631   : NVPTXInst<(outs),
5632               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5633                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5634              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5635              "\\{$r, $g, $b, $a\\};",
5636               []>;
5637 def SUST_P_1D_ARRAY_V4B32_TRAP
5638   : NVPTXInst<(outs),
5639               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5640                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5641              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5642              "\\{$r, $g, $b, $a\\};",
5643               []>;
5644
5645
5646 def SUST_P_2D_B8_TRAP
5647   : NVPTXInst<(outs),
5648               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5649               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5650               []>;
5651 def SUST_P_2D_B16_TRAP
5652   : NVPTXInst<(outs),
5653               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5654               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5655               []>;
5656 def SUST_P_2D_B32_TRAP
5657   : NVPTXInst<(outs),
5658               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5659               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5660               []>;
5661 def SUST_P_2D_V2B8_TRAP
5662   : NVPTXInst<(outs),
5663               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5664                    Int16Regs:$g),
5665               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5666               []>;
5667 def SUST_P_2D_V2B16_TRAP
5668   : NVPTXInst<(outs),
5669               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5670                    Int16Regs:$g),
5671               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5672               []>;
5673 def SUST_P_2D_V2B32_TRAP
5674   : NVPTXInst<(outs),
5675               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5676                    Int32Regs:$g),
5677               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5678               []>;
5679 def SUST_P_2D_V4B8_TRAP
5680   : NVPTXInst<(outs),
5681               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5682                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5683               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5684               "\\{$r, $g, $b, $a\\};",
5685               []>;
5686 def SUST_P_2D_V4B16_TRAP
5687   : NVPTXInst<(outs),
5688               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5689                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5690              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5691              "\\{$r, $g, $b, $a\\};",
5692               []>;
5693 def SUST_P_2D_V4B32_TRAP
5694   : NVPTXInst<(outs),
5695               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5696                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5697              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5698              "\\{$r, $g, $b, $a\\};",
5699               []>;
5700
5701
5702 def SUST_P_2D_ARRAY_B8_TRAP
5703   : NVPTXInst<(outs),
5704               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5705                    Int16Regs:$r),
5706               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5707               []>;
5708 def SUST_P_2D_ARRAY_B16_TRAP
5709   : NVPTXInst<(outs),
5710               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5711                    Int16Regs:$r),
5712               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5713               []>;
5714 def SUST_P_2D_ARRAY_B32_TRAP
5715   : NVPTXInst<(outs),
5716               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5717                    Int32Regs:$r),
5718               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5719               []>;
5720 def SUST_P_2D_ARRAY_V2B8_TRAP
5721   : NVPTXInst<(outs),
5722               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5723                    Int16Regs:$r, Int16Regs:$g),
5724               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5725               "\\{$r, $g\\};",
5726               []>;
5727 def SUST_P_2D_ARRAY_V2B16_TRAP
5728   : NVPTXInst<(outs),
5729               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5730                    Int16Regs:$r, Int16Regs:$g),
5731              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5732              "\\{$r, $g\\};",
5733               []>;
5734 def SUST_P_2D_ARRAY_V2B32_TRAP
5735   : NVPTXInst<(outs),
5736               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5737                    Int32Regs:$r, Int32Regs:$g),
5738              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5739              "\\{$r, $g\\};",
5740               []>;
5741 def SUST_P_2D_ARRAY_V4B8_TRAP
5742   : NVPTXInst<(outs),
5743               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5744                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5745       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5746       "\\{$r, $g, $b, $a\\};",
5747               []>;
5748 def SUST_P_2D_ARRAY_V4B16_TRAP
5749   : NVPTXInst<(outs),
5750               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5751                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5752      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5753      "\\{$r, $g, $b, $a\\};",
5754               []>;
5755 def SUST_P_2D_ARRAY_V4B32_TRAP
5756   : NVPTXInst<(outs),
5757               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5758                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5759      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5760      "\\{$r, $g, $b, $a\\};",
5761               []>;
5762
5763
5764 def SUST_P_3D_B8_TRAP
5765   : NVPTXInst<(outs),
5766               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5767                    Int16Regs:$r),
5768               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5769               []>;
5770 def SUST_P_3D_B16_TRAP
5771   : NVPTXInst<(outs),
5772               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5773                    Int16Regs:$r),
5774               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5775               []>;
5776 def SUST_P_3D_B32_TRAP
5777   : NVPTXInst<(outs),
5778               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5779                    Int32Regs:$r),
5780               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5781               []>;
5782 def SUST_P_3D_V2B8_TRAP
5783   : NVPTXInst<(outs),
5784               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5785                    Int16Regs:$r, Int16Regs:$g),
5786               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5787               "\\{$r, $g\\};",
5788               []>;
5789 def SUST_P_3D_V2B16_TRAP
5790   : NVPTXInst<(outs),
5791               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5792                    Int16Regs:$r, Int16Regs:$g),
5793               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5794               "\\{$r, $g\\};",
5795               []>;
5796 def SUST_P_3D_V2B32_TRAP
5797   : NVPTXInst<(outs),
5798               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5799                    Int32Regs:$r, Int32Regs:$g),
5800               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5801               "\\{$r, $g\\};",
5802               []>;
5803 def SUST_P_3D_V4B8_TRAP
5804   : NVPTXInst<(outs),
5805               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5806                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5807          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5808          "\\{$r, $g, $b, $a\\};",
5809               []>;
5810 def SUST_P_3D_V4B16_TRAP
5811   : NVPTXInst<(outs),
5812               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5813                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5814         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5815         "\\{$r, $g, $b, $a\\};",
5816               []>;
5817 def SUST_P_3D_V4B32_TRAP
5818   : NVPTXInst<(outs),
5819               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5820                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5821         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5822         "\\{$r, $g, $b, $a\\};",
5823               []>;
5824 }
5825
5826 // Surface store instruction patterns
5827 // I'm not sure why we can't just include these in the instruction definitions,
5828 // but TableGen complains of type errors :(
5829
5830 // .clamp variant
5831 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5832            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5833           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5834
5835 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5836            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5837           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5838
5839 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5840            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5841           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5842
5843 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5844            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5845           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5846
5847 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5848            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5849           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5850            Int16Regs:$r, Int16Regs:$g)>;
5851
5852 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5853            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5854           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5855            Int16Regs:$r, Int16Regs:$g)>;
5856
5857 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5858            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5859           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5860            Int32Regs:$r, Int32Regs:$g)>;
5861
5862 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5863            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5864           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5865            Int64Regs:$r, Int64Regs:$g)>;
5866
5867 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5868            Int64Regs:$s, Int32Regs:$x,
5869            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5870           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5871            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5872
5873 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5874            Int64Regs:$s, Int32Regs:$x,
5875            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5876           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5877            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5878
5879 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5880            Int64Regs:$s, Int32Regs:$x,
5881            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5882           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5883            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5884
5885
5886
5887 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5888            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5889           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5890            Int16Regs:$r)>;
5891
5892 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5893            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5894           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5895            Int16Regs:$r)>;
5896
5897 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5898            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5899           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5900            Int32Regs:$r)>;
5901
5902 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5903            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5904           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5905            Int64Regs:$r)>;
5906
5907 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5908           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5909           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5910            Int16Regs:$r, Int16Regs:$g)>;
5911
5912 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5913           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5914           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5915            Int16Regs:$r, Int16Regs:$g)>;
5916
5917 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5918           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5919           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5920            Int32Regs:$r, Int32Regs:$g)>;
5921
5922 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5923           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5924           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5925            Int64Regs:$r, Int64Regs:$g)>;
5926
5927 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5928            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5929            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5930           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5931            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5932
5933 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5934            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5935            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5936           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5937            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5938
5939 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5940            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5941            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5942           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5943            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5944
5945
5946
5947 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5948            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5949           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5950            Int16Regs:$r)>;
5951
5952 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5953            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5954           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5955            Int16Regs:$r)>;
5956
5957 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5958            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5959           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5960            Int32Regs:$r)>;
5961
5962 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5963            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5964           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5965            Int64Regs:$r)>;
5966
5967 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5968           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5969           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5970            Int16Regs:$r, Int16Regs:$g)>;
5971
5972 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5973           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5974           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5975            Int16Regs:$r, Int16Regs:$g)>;
5976
5977 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5978           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5979           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5980            Int32Regs:$r, Int32Regs:$g)>;
5981
5982 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5983           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5984           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5985            Int64Regs:$r, Int64Regs:$g)>;
5986
5987 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5988            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5989            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5990           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5991            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5992
5993 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5994            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5995            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5996           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5997            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5998
5999 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6000            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6001            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6002           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6003            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6004
6005
6006
6007 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6008           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6009           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6010            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6011            Int16Regs:$r)>;
6012
6013 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6014           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6015           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6016            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6017            Int16Regs:$r)>;
6018
6019 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6020           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6021           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6022            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6023            Int32Regs:$r)>;
6024
6025 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6026           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6027           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6028            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6029            Int64Regs:$r)>;
6030
6031 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6032            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6033            Int16Regs:$r, Int16Regs:$g),
6034           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6035            Int32Regs:$x, Int32Regs:$y,
6036            Int16Regs:$r, Int16Regs:$g)>;
6037
6038 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6039            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6040            Int16Regs:$r, Int16Regs:$g),
6041           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6042            Int32Regs:$x, Int32Regs:$y,
6043            Int16Regs:$r, Int16Regs:$g)>;
6044
6045 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6046            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6047            Int32Regs:$g),
6048           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6049            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6050
6051 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6052            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6053            Int64Regs:$g),
6054           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6055            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6056
6057 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6058            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6059            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6060           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6061            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6062            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6063
6064 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6065            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6066            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6067           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6068            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6069            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6070
6071 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6072            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6073            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6074           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6075            Int32Regs:$x, Int32Regs:$y,
6076            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6077
6078
6079
6080 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6081            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6082            Int16Regs:$r),
6083           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6084            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6085            Int16Regs:$r)>;
6086
6087 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6088            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6089            Int16Regs:$r),
6090           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6091            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6092            Int16Regs:$r)>;
6093
6094 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6095            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6096            Int32Regs:$r),
6097           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6098            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6099            Int32Regs:$r)>;
6100
6101 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6102            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6103            Int64Regs:$r),
6104           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6105            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6106            Int64Regs:$r)>;
6107
6108 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6109            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6110            Int16Regs:$r, Int16Regs:$g),
6111           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6112            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6113            Int16Regs:$r, Int16Regs:$g)>;
6114
6115 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6116            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6117            Int16Regs:$r, Int16Regs:$g),
6118           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6119            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6120            Int16Regs:$r, Int16Regs:$g)>;
6121
6122 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6123            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6124            Int32Regs:$r, Int32Regs:$g),
6125           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6126            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6127            Int32Regs:$r, Int32Regs:$g)>;
6128
6129 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6130            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6131            Int64Regs:$r, Int64Regs:$g),
6132           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6133            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6134            Int64Regs:$r, Int64Regs:$g)>;
6135
6136 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6137            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6138            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6139           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6140            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6141            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6142
6143 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6144            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6145            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6146           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6147            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6148            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6149
6150 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6151            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6152            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6153           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6154            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6155            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6156
6157
6158 // .trap variant
6159 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6160            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6161           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6162
6163 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6164            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6165           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6166
6167 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6168            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6169           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6170
6171 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6172            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6173           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6174
6175 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6176            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6177           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6178            Int16Regs:$r, Int16Regs:$g)>;
6179
6180 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6181            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6182           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6183            Int16Regs:$r, Int16Regs:$g)>;
6184
6185 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6186            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6187           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6188            Int32Regs:$r, Int32Regs:$g)>;
6189
6190 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6191            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6192           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6193            Int64Regs:$r, Int64Regs:$g)>;
6194
6195 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6196            Int64Regs:$s, Int32Regs:$x,
6197            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6198           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6199            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6200
6201 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6202            Int64Regs:$s, Int32Regs:$x,
6203            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6204           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6205            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6206
6207 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6208            Int64Regs:$s, Int32Regs:$x,
6209            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6210           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6211            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6212
6213
6214
6215 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6216            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6217           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6218            Int16Regs:$r)>;
6219
6220 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6221            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6222           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6223            Int16Regs:$r)>;
6224
6225 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6226            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6227           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6228            Int32Regs:$r)>;
6229
6230 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6231            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6232           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6233            Int64Regs:$r)>;
6234
6235 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6236           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6237           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6238            Int16Regs:$r, Int16Regs:$g)>;
6239
6240 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6241           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6242           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6243            Int16Regs:$r, Int16Regs:$g)>;
6244
6245 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6246           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6247           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6248            Int32Regs:$r, Int32Regs:$g)>;
6249
6250 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6251           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6252           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6253            Int64Regs:$r, Int64Regs:$g)>;
6254
6255 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6256            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6257            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6258           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6259            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6260
6261 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6262            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6263            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6264           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6265            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6266
6267 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6268            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6269            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6270           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6271            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6272
6273
6274
6275 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6276            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6277           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6278            Int16Regs:$r)>;
6279
6280 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6281            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6282           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6283            Int16Regs:$r)>;
6284
6285 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6286            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6287           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6288            Int32Regs:$r)>;
6289
6290 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6291            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6292           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6293            Int64Regs:$r)>;
6294
6295 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6296           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6297           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6298            Int16Regs:$r, Int16Regs:$g)>;
6299
6300 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6301           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6302           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6303            Int16Regs:$r, Int16Regs:$g)>;
6304
6305 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6306           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6307           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6308            Int32Regs:$r, Int32Regs:$g)>;
6309
6310 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6311           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6312           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6313            Int64Regs:$r, Int64Regs:$g)>;
6314
6315 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6316            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6317            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6318           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6319            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6320
6321 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6322            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6323            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6324           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6325            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6326
6327 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6328            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6329            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6330           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6331            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6332
6333
6334
6335 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6336           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6337           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6338            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6339            Int16Regs:$r)>;
6340
6341 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6342           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6343           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6344            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6345            Int16Regs:$r)>;
6346
6347 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6348           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6349           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6350            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6351            Int32Regs:$r)>;
6352
6353 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6354           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6355           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6356            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6357            Int64Regs:$r)>;
6358
6359 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6360            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6361            Int16Regs:$r, Int16Regs:$g),
6362           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6363            Int32Regs:$x, Int32Regs:$y,
6364            Int16Regs:$r, Int16Regs:$g)>;
6365
6366 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6367            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6368            Int16Regs:$r, Int16Regs:$g),
6369           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6370            Int32Regs:$x, Int32Regs:$y,
6371            Int16Regs:$r, Int16Regs:$g)>;
6372
6373 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6374            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6375            Int32Regs:$g),
6376           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6377            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6378
6379 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6380            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6381            Int64Regs:$g),
6382           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6383            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6384
6385 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6386            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6387            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6388           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6389            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6390            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6391
6392 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6393            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6394            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6395           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6396            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6397            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6398
6399 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6400            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6401            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6402           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6403            Int32Regs:$x, Int32Regs:$y,
6404            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6405
6406
6407
6408 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6409            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6410            Int16Regs:$r),
6411           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6412            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6413            Int16Regs:$r)>;
6414
6415 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6416            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6417            Int16Regs:$r),
6418           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6419            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6420            Int16Regs:$r)>;
6421
6422 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6423            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6424            Int32Regs:$r),
6425           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6426            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6427            Int32Regs:$r)>;
6428
6429 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6430            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6431            Int64Regs:$r),
6432           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6433            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6434            Int64Regs:$r)>;
6435
6436 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6437            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6438            Int16Regs:$r, Int16Regs:$g),
6439           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6440            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6441            Int16Regs:$r, Int16Regs:$g)>;
6442
6443 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6444            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6445            Int16Regs:$r, Int16Regs:$g),
6446           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6447            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6448            Int16Regs:$r, Int16Regs:$g)>;
6449
6450 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6451            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6452            Int32Regs:$r, Int32Regs:$g),
6453           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6454            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6455            Int32Regs:$r, Int32Regs:$g)>;
6456
6457 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6458            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6459            Int64Regs:$r, Int64Regs:$g),
6460           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6461            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6462            Int64Regs:$r, Int64Regs:$g)>;
6463
6464 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6465            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6466            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6467           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6468            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6469            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6470
6471 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6472            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6473            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6474           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6475            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6476            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6477
6478 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6479            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6480            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6481           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6482            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6483            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6484
6485
6486 // .zero variant
6487 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6488            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6489           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6490
6491 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6492            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6493           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6494
6495 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6496            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6497           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6498
6499 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6500            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6501           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6502
6503 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6504            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6505           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6506            Int16Regs:$r, Int16Regs:$g)>;
6507
6508 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6509            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6510           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6511            Int16Regs:$r, Int16Regs:$g)>;
6512
6513 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6514            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6515           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6516            Int32Regs:$r, Int32Regs:$g)>;
6517
6518 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6519            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6520           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6521            Int64Regs:$r, Int64Regs:$g)>;
6522
6523 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6524            Int64Regs:$s, Int32Regs:$x,
6525            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6526           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6527            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6528
6529 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6530            Int64Regs:$s, Int32Regs:$x,
6531            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6532           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6533            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6534
6535 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6536            Int64Regs:$s, Int32Regs:$x,
6537            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6538           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6539            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6540
6541
6542
6543 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6544            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6545           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6546            Int16Regs:$r)>;
6547
6548 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6549            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6550           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6551            Int16Regs:$r)>;
6552
6553 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6554            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6555           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6556            Int32Regs:$r)>;
6557
6558 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6559            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6560           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6561            Int64Regs:$r)>;
6562
6563 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6564           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6565           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6566            Int16Regs:$r, Int16Regs:$g)>;
6567
6568 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6569           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6570           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6571            Int16Regs:$r, Int16Regs:$g)>;
6572
6573 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6574           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6575           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6576            Int32Regs:$r, Int32Regs:$g)>;
6577
6578 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6579           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6580           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6581            Int64Regs:$r, Int64Regs:$g)>;
6582
6583 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6584            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6585            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6586           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6587            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6588
6589 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6590            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6591            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6592           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6593            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6594
6595 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6596            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6597            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6598           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6599            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6600
6601
6602
6603 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6604            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6605           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6606            Int16Regs:$r)>;
6607
6608 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6609            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6610           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6611            Int16Regs:$r)>;
6612
6613 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6614            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6615           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6616            Int32Regs:$r)>;
6617
6618 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6619            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6620           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6621            Int64Regs:$r)>;
6622
6623 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6624           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6625           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6626            Int16Regs:$r, Int16Regs:$g)>;
6627
6628 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6629           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6630           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6631            Int16Regs:$r, Int16Regs:$g)>;
6632
6633 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6634           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6635           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6636            Int32Regs:$r, Int32Regs:$g)>;
6637
6638 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6639           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6640           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6641            Int64Regs:$r, Int64Regs:$g)>;
6642
6643 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6644            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6645            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6646           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6647            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6648
6649 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6650            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6651            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6652           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6653            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6654
6655 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6656            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6657            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6658           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6659            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6660
6661
6662
6663 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6664           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6665           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6666            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6667            Int16Regs:$r)>;
6668
6669 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6670           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6671           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6672            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6673            Int16Regs:$r)>;
6674
6675 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6676           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6677           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6678            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6679            Int32Regs:$r)>;
6680
6681 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6682           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6683           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6684            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6685            Int64Regs:$r)>;
6686
6687 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6688            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6689            Int16Regs:$r, Int16Regs:$g),
6690           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6691            Int32Regs:$x, Int32Regs:$y,
6692            Int16Regs:$r, Int16Regs:$g)>;
6693
6694 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6695            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6696            Int16Regs:$r, Int16Regs:$g),
6697           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6698            Int32Regs:$x, Int32Regs:$y,
6699            Int16Regs:$r, Int16Regs:$g)>;
6700
6701 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6702            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6703            Int32Regs:$g),
6704           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6705            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6706
6707 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6708            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6709            Int64Regs:$g),
6710           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6711            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6712
6713 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6714            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6715            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6716           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6717            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6718            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6719
6720 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6721            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6722            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6723           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6724            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6725            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6726
6727 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6728            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6729            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6730           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6731            Int32Regs:$x, Int32Regs:$y,
6732            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6733
6734
6735
6736 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6737            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6738            Int16Regs:$r),
6739           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6740            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6741            Int16Regs:$r)>;
6742
6743 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6744            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6745            Int16Regs:$r),
6746           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6747            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6748            Int16Regs:$r)>;
6749
6750 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6751            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6752            Int32Regs:$r),
6753           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6754            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6755            Int32Regs:$r)>;
6756
6757 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6758            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6759            Int64Regs:$r),
6760           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6761            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6762            Int64Regs:$r)>;
6763
6764 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6765            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6766            Int16Regs:$r, Int16Regs:$g),
6767           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6768            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6769            Int16Regs:$r, Int16Regs:$g)>;
6770
6771 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6772            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6773            Int16Regs:$r, Int16Regs:$g),
6774           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6775            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6776            Int16Regs:$r, Int16Regs:$g)>;
6777
6778 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6779            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6780            Int32Regs:$r, Int32Regs:$g),
6781           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6782            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6783            Int32Regs:$r, Int32Regs:$g)>;
6784
6785 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6786            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6787            Int64Regs:$r, Int64Regs:$g),
6788           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6789            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6790            Int64Regs:$r, Int64Regs:$g)>;
6791
6792 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6793            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6794            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6795           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6796            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6798
6799 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6800            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6801            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6802           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6803            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6804            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6805
6806 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6807            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6808            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6809           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6810            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6811            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6812
6813
6814
6815
6816 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6817            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6818           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6819
6820 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6821            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6822           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6823
6824 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6825            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6826           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6827
6828 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6829            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6830           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6831            Int16Regs:$r, Int16Regs:$g)>;
6832
6833 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6834            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6835           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6836            Int16Regs:$r, Int16Regs:$g)>;
6837
6838 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6839            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6840           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6841            Int32Regs:$r, Int32Regs:$g)>;
6842
6843 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6844            Int64Regs:$s, Int32Regs:$x,
6845            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6846           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6847            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6848
6849 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6850            Int64Regs:$s, Int32Regs:$x,
6851            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6852           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6853            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6854
6855 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6856            Int64Regs:$s, Int32Regs:$x,
6857            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6858           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6859            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6860
6861
6862
6863 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6864            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6865           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6866            Int16Regs:$r)>;
6867
6868 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6869            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6870           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6871            Int16Regs:$r)>;
6872
6873 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6874            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6875           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6876            Int32Regs:$r)>;
6877
6878 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6879           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6880           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6881            Int16Regs:$r, Int16Regs:$g)>;
6882
6883 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6884           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6885           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6886            Int16Regs:$r, Int16Regs:$g)>;
6887
6888 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6889           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6890           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6891            Int32Regs:$r, Int32Regs:$g)>;
6892
6893 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6894            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6895            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6896           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6897            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6898
6899 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6900            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6901            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6902           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6903            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6904
6905 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6906            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6907            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6908           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6909            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6910
6911
6912
6913 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6914            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6915           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6916            Int16Regs:$r)>;
6917
6918 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6919            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6920           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6921            Int16Regs:$r)>;
6922
6923 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6924            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6925           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6926            Int32Regs:$r)>;
6927
6928 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6929           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6930           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6931            Int16Regs:$r, Int16Regs:$g)>;
6932
6933 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6934           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6935           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6936            Int16Regs:$r, Int16Regs:$g)>;
6937
6938 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6939           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6940           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6941            Int32Regs:$r, Int32Regs:$g)>;
6942
6943 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6944            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6945            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6946           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6947            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6948
6949 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6950            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6951            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6952           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6953            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6954
6955 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6956            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6957            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6958           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6959            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6960
6961
6962
6963 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6964           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6965           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
6966            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6967            Int16Regs:$r)>;
6968
6969 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6970           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6971           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
6972            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6973            Int16Regs:$r)>;
6974
6975 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6976           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6977           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
6978            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6979            Int32Regs:$r)>;
6980
6981 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6982            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6983            Int16Regs:$r, Int16Regs:$g),
6984           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6985            Int32Regs:$x, Int32Regs:$y,
6986            Int16Regs:$r, Int16Regs:$g)>;
6987
6988 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6989            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6990            Int16Regs:$r, Int16Regs:$g),
6991           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6992            Int32Regs:$x, Int32Regs:$y,
6993            Int16Regs:$r, Int16Regs:$g)>;
6994
6995 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6996            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6997            Int32Regs:$g),
6998           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6999            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7000
7001 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7002            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7003            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7004           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7005            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7006            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7007
7008 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7009            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7010            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7011           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7012            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7013            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7014
7015 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7016            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7017            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7018           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7019            Int32Regs:$x, Int32Regs:$y,
7020            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7021
7022
7023
7024 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7025            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7026            Int16Regs:$r),
7027           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7028            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7029            Int16Regs:$r)>;
7030
7031 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7032            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7033            Int16Regs:$r),
7034           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7035            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7036            Int16Regs:$r)>;
7037
7038 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7039            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7040            Int32Regs:$r),
7041           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7042            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7043            Int32Regs:$r)>;
7044
7045 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7046            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7047            Int16Regs:$r, Int16Regs:$g),
7048           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7049            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7050            Int16Regs:$r, Int16Regs:$g)>;
7051
7052 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7053            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7054            Int16Regs:$r, Int16Regs:$g),
7055           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7056            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7057            Int16Regs:$r, Int16Regs:$g)>;
7058
7059 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7060            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7061            Int32Regs:$r, Int32Regs:$g),
7062           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7063            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7064            Int32Regs:$r, Int32Regs:$g)>;
7065
7066 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7067            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7068            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7069           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7070            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7071            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7072
7073 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7074            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7075            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7076           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7077            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7078            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7079
7080 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7081            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7082            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7083           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7084            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7085            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7086
7087 //-----------------------------------
7088 // Read Special Registers
7089 //-----------------------------------
7090
7091 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7092   : NVPTXInst<(outs Int64Regs:$d), (ins),
7093               !strconcat("mov.u64 \t$d, %", regname, ";"),
7094               [(set Int64Regs:$d, (intop))]>;
7095
7096 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7097   : NVPTXInst<(outs Int32Regs:$d), (ins),
7098               !strconcat("mov.u32 \t$d, %", regname, ";"),
7099               [(set Int32Regs:$d, (intop))]>;
7100
7101 // TODO Add read vector-version of special registers
7102
7103 def INT_PTX_SREG_TID_X :
7104     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7105 def INT_PTX_SREG_TID_Y :
7106     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7107 def INT_PTX_SREG_TID_Z :
7108     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7109 def INT_PTX_SREG_TID_W :
7110     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7111
7112 def INT_PTX_SREG_NTID_X :
7113     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7114 def INT_PTX_SREG_NTID_Y :
7115     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7116 def INT_PTX_SREG_NTID_Z :
7117     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7118 def INT_PTX_SREG_NTID_W :
7119     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7120
7121 def INT_PTX_SREG_LANEID :
7122     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7123 def INT_PTX_SREG_WARPID :
7124     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7125 def INT_PTX_SREG_NWARPID :
7126     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7127
7128 def INT_PTX_SREG_CTAID_X :
7129     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7130 def INT_PTX_SREG_CTAID_Y :
7131     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7132 def INT_PTX_SREG_CTAID_Z :
7133     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7134 def INT_PTX_SREG_CTAID_W :
7135     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7136
7137 def INT_PTX_SREG_NCTAID_X :
7138     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7139 def INT_PTX_SREG_NCTAID_Y :
7140     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7141 def INT_PTX_SREG_NCTAID_Z :
7142     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7143 def INT_PTX_SREG_NCTAID_W :
7144     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7145
7146 def INT_PTX_SREG_SMID :
7147     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7148 def INT_PTX_SREG_NSMID :
7149     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7150 def INT_PTX_SREG_GRIDID :
7151     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7152
7153 def INT_PTX_SREG_LANEMASK_EQ :
7154     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7155 def INT_PTX_SREG_LANEMASK_LE :
7156     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7157 def INT_PTX_SREG_LANEMASK_LT :
7158     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7159 def INT_PTX_SREG_LANEMASK_GE :
7160     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7161 def INT_PTX_SREG_LANEMASK_GT :
7162     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7163
7164 def INT_PTX_SREG_CLOCK :
7165     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7166 def INT_PTX_SREG_CLOCK64 :
7167     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7168
7169 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7170 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7171 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7172 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7173
7174 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7175 // handle the constant.
7176 def INT_PTX_SREG_WARPSIZE :
7177     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7178               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;