]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Merge latest version of blacklist sources from NetBSD (@ 20170503)
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 def immFloat0 : PatLeaf<(fpimm), [{
11     float f = (float)N->getValueAPF().convertToFloat();
12     return (f==0.0f);
13 }]>;
14
15 def immFloat1 : PatLeaf<(fpimm), [{
16     float f = (float)N->getValueAPF().convertToFloat();
17     return (f==1.0f);
18 }]>;
19
20 def immDouble0 : PatLeaf<(fpimm), [{
21     double d = (double)N->getValueAPF().convertToDouble();
22     return (d==0.0);
23 }]>;
24
25 def immDouble1 : PatLeaf<(fpimm), [{
26     double d = (double)N->getValueAPF().convertToDouble();
27     return (d==1.0);
28 }]>;
29
30
31
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
37                   "bar.sync \t0;",
38       [(int_nvvm_barrier0)]>;
39 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
40   !strconcat("{{ \n\t",
41       !strconcat(".reg .pred \t%p1; \n\t",
42       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
43       !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
44         !strconcat("}}", ""))))),
45       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
46 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
47   !strconcat("{{ \n\t",
48       !strconcat(".reg .pred \t%p1; \n\t",
49       !strconcat(".reg .pred \t%p2; \n\t",
50       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
51       !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
52       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
53         !strconcat("}}", ""))))))),
54       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
55 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
56   !strconcat("{{ \n\t",
57       !strconcat(".reg .pred \t%p1; \n\t",
58       !strconcat(".reg .pred \t%p2; \n\t",
59       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
60       !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
61       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
62         !strconcat("}}", ""))))))),
63       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
64
65 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
66                              [(int_nvvm_bar_sync imm:$i)]>;
67
68 // shfl.{up,down,bfly,idx}.b32
69 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
70   // The last two parameters to shfl can be regs or imms.  ptxas is smart
71   // enough to inline constant registers, so strictly speaking we don't need to
72   // handle immediates here.  But it's easy enough, and it makes our ptx more
73   // readable.
74   def reg : NVPTXInst<
75       (outs regclass:$dst),
76       (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
77       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
78       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
79
80   def imm1 : NVPTXInst<
81       (outs regclass:$dst),
82       (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
83       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
84       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
85
86   def imm2 : NVPTXInst<
87       (outs regclass:$dst),
88       (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
89       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
90       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
91
92   def imm3 : NVPTXInst<
93       (outs regclass:$dst),
94       (ins regclass:$src, i32imm:$offset, i32imm:$mask),
95       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
96       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
97 }
98
99 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
100 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
101 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
102 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
103 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
104 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
105 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
106 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
107
108 } // isConvergent = 1
109
110
111 //-----------------------------------
112 // Explicit Memory Fence Functions
113 //-----------------------------------
114 class MEMBAR<string StrOp, Intrinsic IntOP> :
115               NVPTXInst<(outs), (ins),
116             StrOp, [(IntOP)]>;
117
118 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
119 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
120 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
121
122
123 //-----------------------------------
124 // Math Functions
125 //-----------------------------------
126
127 // Map min(1.0, max(0.0, x)) to sat(x)
128 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
129 // NaN
130 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
131 // Same story for fmax, fmin.
132
133 def : Pat<(int_nvvm_fmin_f immFloat1,
134             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
135           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
136 def : Pat<(int_nvvm_fmin_f immFloat1,
137             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
138           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
139 def : Pat<(int_nvvm_fmin_f
140             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
141           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
142 def : Pat<(int_nvvm_fmin_f
143             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
144           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
145
146 def : Pat<(int_nvvm_fmin_d immDouble1,
147             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
148           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
149 def : Pat<(int_nvvm_fmin_d immDouble1,
150             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
151           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
152 def : Pat<(int_nvvm_fmin_d
153             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
154           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
155 def : Pat<(int_nvvm_fmin_d
156             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
157           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
158
159
160 // We need a full string for OpcStr here because we need to deal with case like
161 // INT_PTX_RECIP.
162 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
163   NVPTXRegClass src_regclass, Intrinsic IntOP>
164             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
165             OpcStr,
166         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
167
168 // We need a full string for OpcStr here because we need to deal with the case
169 // like INT_PTX_NATIVE_POWR_F.
170 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
171   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
172             : NVPTXInst<(outs t_regclass:$dst),
173               (ins s0_regclass:$src0, s1_regclass:$src1),
174             OpcStr,
175         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
176
177 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
178   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
179   NVPTXRegClass s2_regclass, Intrinsic IntOP>
180             : NVPTXInst<(outs t_regclass:$dst),
181               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
182             OpcStr,
183         [(set t_regclass:$dst,
184           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
185
186 //
187 // MISC
188 //
189
190 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
191   int_nvvm_clz_i>;
192 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
193   int_nvvm_clz_ll>;
194
195 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
196   int_nvvm_popc_i>;
197 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
198   int_nvvm_popc_ll>;
199
200 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
201   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
202
203 //
204 // Min Max
205 //
206
207 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
208   Int32Regs, Int32Regs, int_nvvm_min_i>;
209 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
210   Int32Regs, Int32Regs, int_nvvm_min_ui>;
211
212 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
213   Int64Regs, Int64Regs, int_nvvm_min_ll>;
214 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
215   Int64Regs, Int64Regs, int_nvvm_min_ull>;
216
217 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
218   Int32Regs, Int32Regs, int_nvvm_max_i>;
219 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
220   Int32Regs, Int32Regs, int_nvvm_max_ui>;
221
222 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
223   Int64Regs, Int64Regs, int_nvvm_max_ll>;
224 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
225   Int64Regs, Int64Regs, int_nvvm_max_ull>;
226
227 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
228   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
229 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
230   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
231
232 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
233   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
234 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
235   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
236
237 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
238   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
239 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
240   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
241
242 //
243 // Multiplication
244 //
245
246 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
247   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
248 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
249   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
250
251 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
252   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
253 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
254   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
255
256 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
257   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
258 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
259   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
260 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
261   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
262 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
263   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
264 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
265   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
266 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
267   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
268 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
269   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
270 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
271   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
272
273 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
274   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
275 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
276   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
277 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
278   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
279 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
280   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
281
282 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
283   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
284 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
285   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
286
287 //
288 // Div
289 //
290
291 def INT_NVVM_DIV_APPROX_FTZ_F
292   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
293     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
294 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
295   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
296
297 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
298   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
299 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
300   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
301 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
302   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
303 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
304   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
305 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
306   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
307 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
308   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
309 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
310   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
311 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
312   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
313
314 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
315   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
316 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
317   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
318 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
319   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
320 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
321   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
322
323 //
324 // Brev
325 //
326
327 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
328   int_nvvm_brev32>;
329 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
330   int_nvvm_brev64>;
331
332 //
333 // Sad
334 //
335
336 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
337   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
338 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
339   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
340
341 //
342 // Floor  Ceil
343 //
344
345 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
346           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
347 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
348           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
349 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
350           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
351
352 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
353           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
354 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
355           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
356 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
357           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
358
359 //
360 // Abs
361 //
362
363 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
364   int_nvvm_abs_i>;
365 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
366   int_nvvm_abs_ll>;
367
368 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
369   Float32Regs, int_nvvm_fabs_ftz_f>;
370 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
371   Float32Regs, int_nvvm_fabs_f>;
372
373 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
374   Float64Regs, int_nvvm_fabs_d>;
375
376 //
377 // Round
378 //
379
380 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
381           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
382 def : Pat<(int_nvvm_round_f Float32Regs:$a),
383           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
384 def : Pat<(int_nvvm_round_d Float64Regs:$a),
385           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
386
387 //
388 // Trunc
389 //
390
391 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
392           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
393 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
394           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
395 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
396           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
397
398 //
399 // Saturate
400 //
401
402 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
403           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
404 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
405           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
406 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
407           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
408
409 //
410 // Exp2  Log2
411 //
412
413 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
414   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
415 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
416   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
417 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
418   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
419
420 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
421   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
422 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
423   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
424 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
425   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
426
427 //
428 // Sin  Cos
429 //
430
431 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
432   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
433 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
434   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
435
436 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
437   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
438 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
439   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
440
441 //
442 // Fma
443 //
444
445 def INT_NVVM_FMA_RN_FTZ_F
446   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
447     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
448 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
449   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
450 def INT_NVVM_FMA_RZ_FTZ_F
451   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
452     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
453 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
454   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
455 def INT_NVVM_FMA_RM_FTZ_F
456   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
457     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
458 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
459   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
460 def INT_NVVM_FMA_RP_FTZ_F
461   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
462     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
463 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
464   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
465
466 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
467   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
468 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
469   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
470 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
471   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
472 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
473   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
474
475 //
476 // Rcp
477 //
478
479 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
480   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
481 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
482   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
483 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
484   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
485 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
486   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
487 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
488   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
489 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
490   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
491 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
492   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
493 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
494   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
495
496 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
497   Float64Regs, int_nvvm_rcp_rn_d>;
498 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
499   Float64Regs, int_nvvm_rcp_rz_d>;
500 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
501   Float64Regs, int_nvvm_rcp_rm_d>;
502 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
503   Float64Regs, int_nvvm_rcp_rp_d>;
504
505 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
506   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
507
508 //
509 // Sqrt
510 //
511
512 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
513   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
514 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
515   Float32Regs, int_nvvm_sqrt_rn_f>;
516 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
517   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
518 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
519   Float32Regs, int_nvvm_sqrt_rz_f>;
520 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
521   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
522 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
523   Float32Regs, int_nvvm_sqrt_rm_f>;
524 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
525   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
526 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
527   Float32Regs, int_nvvm_sqrt_rp_f>;
528 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
529   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
530 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
531   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
532
533 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
534   Float64Regs, int_nvvm_sqrt_rn_d>;
535 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
536   Float64Regs, int_nvvm_sqrt_rz_d>;
537 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
538   Float64Regs, int_nvvm_sqrt_rm_d>;
539 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
540   Float64Regs, int_nvvm_sqrt_rp_d>;
541
542 // nvvm_sqrt intrinsic
543 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
544           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
545 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
546           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
547 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
548           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
549 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
550           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
551
552 //
553 // Rsqrt
554 //
555
556 def INT_NVVM_RSQRT_APPROX_FTZ_F
557   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
558     int_nvvm_rsqrt_approx_ftz_f>;
559 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
560   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
561 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
562   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
563
564 //
565 // Add
566 //
567
568 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
569   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
570 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
571   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
572 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
573   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
574 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
575   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
576 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
577   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
578 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
579   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
580 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
581   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
582 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
583   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
584
585 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
586   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
587 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
588   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
589 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
590   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
591 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
592   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
593
594 //
595 // Convert
596 //
597
598 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
599           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
600 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
601           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
602 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
603           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
604 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
605           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
606 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
607           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
608 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
609           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
610 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
611           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
612 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
613           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
614
615 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
616           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
617 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
618           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
619 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
620           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
621 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
622           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
623
624 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
625           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
626 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
627           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
628 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
629           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
630 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
631           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
632
633 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
634           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
635 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
636           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
637 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
638           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
639 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
640           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
641
642 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
643           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
644 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
645           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
646 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
647           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
648 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
649           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
650
651 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
652           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
653 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
654           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
655 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
656           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
657 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
658           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
659 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
660           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
661 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
662           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
663 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
664           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
665 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
666           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
667
668 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
669           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
670 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
671           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
672 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
673           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
674 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
675           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
676 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
677           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
678 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
679           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
680 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
681           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
682 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
683           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
684
685 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
686           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
687 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
688           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
689 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
690           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
691 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
692           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
693
694 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
695           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
696 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
697           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
698 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
699           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
700 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
701           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
702
703 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
704   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
705
706 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
707                        !strconcat(".reg .b32 %temp; \n\t",
708              !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
709                "}}"))),
710              Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
711 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
712                        !strconcat(".reg .b32 %temp; \n\t",
713                          !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
714                            "}}"))),
715              Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
716
717 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
718           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
719 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
720           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
721 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
722           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
723 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
724           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
725 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
726           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
727 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
728           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
729 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
730           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
731 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
732           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
733
734 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
735           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
736 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
737           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
738 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
739           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
740 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
741           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
742 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
743           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
744 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
745           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
746 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
747           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
748 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
749           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
750
751 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
752           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
753 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
754           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
755 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
756           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
757 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
758           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
759
760 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
761           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
762 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
763           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
764 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
765           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
766 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
767           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
768
769 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
770           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
771 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
772           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
773 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
774           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
775 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
776           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
777
778 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
779           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
780 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
781           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
782 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
783           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
784 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
785           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
786
787 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
788           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
789 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
790           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
791 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
792           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
793 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
794           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
795
796 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
797           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
798 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
799           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
800 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
801           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
802 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
803           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
804
805
806 // FIXME: Ideally, we could use these patterns instead of the scope-creating
807 // patterns, but ptxas does not like these since .s16 is not compatible with
808 // .f16.  The solution is to use .bXX for all integer register types, but we
809 // are not there yet.
810 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
811 //          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
812 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
813 //          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
814 //
815 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
816 //          (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
817
818 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
819                                    !strconcat(".reg .b16 %temp;\n\t",
820            !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
821            !strconcat("mov.b16 \t$dst, %temp;\n",
822              "}}")))),
823                                    Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
824 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
825                                    !strconcat(".reg .b16 %temp;\n\t",
826            !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
827            !strconcat("mov.b16 \t$dst, %temp;\n",
828              "}}")))),
829            Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
830
831 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
832                             !strconcat(".reg .b16 %temp;\n\t",
833           !strconcat("mov.b16 \t%temp, $src0;\n\t",
834           !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
835             "}}")))),
836           Float32Regs, Int16Regs, int_nvvm_h2f>;
837
838 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
839           (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
840 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
841           (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
842 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
843           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
844
845 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
846           (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
847 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
848           (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
849
850 //
851 // Bitcast
852 //
853
854 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
855   Float32Regs, int_nvvm_bitcast_f2i>;
856 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
857   Int32Regs, int_nvvm_bitcast_i2f>;
858
859 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
860   Int64Regs, int_nvvm_bitcast_ll2d>;
861 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
862   Float64Regs, int_nvvm_bitcast_d2ll>;
863
864 //-----------------------------------
865 // Atomic Functions
866 //-----------------------------------
867
868 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
869  : PatFrag<ops, frag, [{
870    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
871 }]>;
872 class ATOMIC_SHARED_CHK <dag ops, dag frag>
873  : PatFrag<ops, frag, [{
874    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
875 }]>;
876 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
877  : PatFrag<ops, frag, [{
878    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
879 }]>;
880
881 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
882   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
883   Operand IMMType, SDNode IMM, Predicate Pred> {
884   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
885                !strconcat("atom",
886          !strconcat(SpaceStr,
887          !strconcat(OpcStr,
888          !strconcat(TypeStr,
889          !strconcat(" \t$dst, [$addr], $b;", ""))))),
890          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
891   Requires<[Pred]>;
892   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
893                !strconcat("atom",
894          !strconcat(SpaceStr,
895          !strconcat(OpcStr,
896          !strconcat(TypeStr,
897          !strconcat(" \t$dst, [$addr], $b;", ""))))),
898          [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
899   Requires<[Pred]>;
900 }
901 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
902   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
903   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
904     IntOp, IMMType, IMM, Pred>;
905   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
906     IntOp, IMMType, IMM, Pred>;
907 }
908
909 // has 2 operands, neg the second one
910 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
911   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
912   Operand IMMType, Predicate Pred> {
913   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
914     !strconcat("{{ \n\t",
915          !strconcat(".reg \t.s",
916          !strconcat(TypeStr,
917          !strconcat(" temp; \n\t",
918          !strconcat("neg.s",
919          !strconcat(TypeStr,
920          !strconcat(" \ttemp, $b; \n\t",
921                !strconcat("atom",
922          !strconcat(SpaceStr,
923          !strconcat(OpcStr,
924          !strconcat(".u",
925          !strconcat(TypeStr,
926          !strconcat(" \t$dst, [$addr], temp; \n\t",
927            !strconcat("}}", "")))))))))))))),
928          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
929   Requires<[Pred]>;
930 }
931 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
932   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
933   Predicate Pred> {
934  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
935    IntOp, IMMType, Pred> ;
936  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
937    IntOp, IMMType, Pred> ;
938 }
939
940 // has 3 operands
941 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
942   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
943   Operand IMMType, Predicate Pred> {
944   def reg : NVPTXInst<(outs regclass:$dst),
945     (ins ptrclass:$addr, regclass:$b, regclass:$c),
946                !strconcat("atom",
947          !strconcat(SpaceStr,
948          !strconcat(OpcStr,
949          !strconcat(TypeStr,
950          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
951          [(set regclass:$dst,
952            (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
953          Requires<[Pred]>;
954   def imm1 : NVPTXInst<(outs regclass:$dst),
955     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
956                !strconcat("atom",
957          !strconcat(SpaceStr,
958          !strconcat(OpcStr,
959          !strconcat(TypeStr,
960          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
961          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
962   Requires<[Pred]>;
963   def imm2 : NVPTXInst<(outs regclass:$dst),
964     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
965                !strconcat("atom",
966          !strconcat(SpaceStr,
967          !strconcat(OpcStr,
968          !strconcat(TypeStr,
969          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
970          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
971   Requires<[Pred]>;
972   def imm3 : NVPTXInst<(outs regclass:$dst),
973     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
974                !strconcat("atom",
975          !strconcat(SpaceStr,
976          !strconcat(OpcStr,
977          !strconcat(TypeStr,
978          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
979          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
980   Requires<[Pred]>;
981 }
982 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
983   string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
984   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
985     IntOp, IMMType, Pred>;
986   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
987     IntOp, IMMType, Pred>;
988 }
989
990 // atom_add
991
992 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
993   (atomic_load_add_32 node:$a, node:$b)>;
994 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
995   (atomic_load_add_32 node:$a, node:$b)>;
996 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
997   (atomic_load_add_32 node:$a, node:$b)>;
998 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
999   (atomic_load_add_64 node:$a, node:$b)>;
1000 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1001   (atomic_load_add_64 node:$a, node:$b)>;
1002 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1003   (atomic_load_add_64 node:$a, node:$b)>;
1004 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1005   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1006 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1007   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1008 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1009   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1010
1011 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1012   atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
1013 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1014   atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
1015 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1016   atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
1017 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1018   ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1019
1020 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1021   atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
1022 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1023   atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
1024 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1025   atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
1026 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1027   ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1028
1029 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1030   atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
1031 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1032   atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
1033 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1034   atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
1035
1036 // atom_sub
1037
1038 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1039   (atomic_load_sub_32 node:$a, node:$b)>;
1040 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1041   (atomic_load_sub_32 node:$a, node:$b)>;
1042 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1043   (atomic_load_sub_32 node:$a, node:$b)>;
1044 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1045   (atomic_load_sub_64 node:$a, node:$b)>;
1046 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1047   (atomic_load_sub_64 node:$a, node:$b)>;
1048 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1049   (atomic_load_sub_64 node:$a, node:$b)>;
1050
1051 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1052   atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1053 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1054   atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1055 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1056   atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1057 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1058   ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1059 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1060   atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1061 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1062   atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1063 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1064   atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1065 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1066   ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1067
1068 // atom_swap
1069
1070 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1071   (atomic_swap_32 node:$a, node:$b)>;
1072 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1073   (atomic_swap_32 node:$a, node:$b)>;
1074 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1075   (atomic_swap_32 node:$a, node:$b)>;
1076 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1077   (atomic_swap_64 node:$a, node:$b)>;
1078 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1079   (atomic_swap_64 node:$a, node:$b)>;
1080 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1081   (atomic_swap_64 node:$a, node:$b)>;
1082
1083 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1084   atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1085 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1086   atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1087 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1088   atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1089 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1090   ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1091 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1092   atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1093 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1094   atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1095 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1096   atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1097 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1098   ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1099
1100 // atom_max
1101
1102 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1103   , (atomic_load_max_32 node:$a, node:$b)>;
1104 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1105   (atomic_load_max_32 node:$a, node:$b)>;
1106 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1107   (atomic_load_max_32 node:$a, node:$b)>;
1108 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1109   , (atomic_load_max_64 node:$a, node:$b)>;
1110 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1111   (atomic_load_max_64 node:$a, node:$b)>;
1112 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1113   (atomic_load_max_64 node:$a, node:$b)>;
1114 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1115   (atomic_load_umax_32 node:$a, node:$b)>;
1116 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1117   (atomic_load_umax_32 node:$a, node:$b)>;
1118 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1119   (atomic_load_umax_32 node:$a, node:$b)>;
1120 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1121   (atomic_load_umax_64 node:$a, node:$b)>;
1122 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1123   (atomic_load_umax_64 node:$a, node:$b)>;
1124 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1125   (atomic_load_umax_64 node:$a, node:$b)>;
1126
1127 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1128   ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1129 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1130   ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1131 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1132   atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1133 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1134   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1135 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1136   ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1137 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1138   ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1139 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1140   atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1141 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1142   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1143 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1144   ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1145 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1146   ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1147 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1148   atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1149 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1150   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1151 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1152   ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1153 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1154   ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1155 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1156   atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1157 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1158   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1159
1160 // atom_min
1161
1162 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1163   (atomic_load_min_32 node:$a, node:$b)>;
1164 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1165   (atomic_load_min_32 node:$a, node:$b)>;
1166 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1167   (atomic_load_min_32 node:$a, node:$b)>;
1168 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1169   (atomic_load_min_64 node:$a, node:$b)>;
1170 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1171   (atomic_load_min_64 node:$a, node:$b)>;
1172 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1173   (atomic_load_min_64 node:$a, node:$b)>;
1174 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1175   (atomic_load_umin_32 node:$a, node:$b)>;
1176 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1177   (atomic_load_umin_32 node:$a, node:$b)>;
1178 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1179   (atomic_load_umin_32 node:$a, node:$b)>;
1180 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1181   (atomic_load_umin_64 node:$a, node:$b)>;
1182 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1183   (atomic_load_umin_64 node:$a, node:$b)>;
1184 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1185   (atomic_load_umin_64 node:$a, node:$b)>;
1186
1187 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1188   ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1189 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1190   ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1191 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1192   atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1193 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1194   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1195 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1196   ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1197 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1198   ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1199 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1200   atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1201 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1202   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1203 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1204   ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1205 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1206   ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1207 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1208   atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1209 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1210   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1211 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1212   ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1213 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1214   ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1215 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1216   atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1217 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1218   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1219
1220 // atom_inc  atom_dec
1221
1222 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1223   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1224 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1225   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1226 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1227   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1228 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1229   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1230 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1231   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1232 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1233   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1234
1235 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1236   atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1237 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1238   atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1239 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1240   atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1241 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1242   ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1243 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1244   atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1245 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1246   atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1247 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1248   atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1249 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1250   ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1251
1252 // atom_and
1253
1254 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1255   (atomic_load_and_32 node:$a, node:$b)>;
1256 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1257   (atomic_load_and_32 node:$a, node:$b)>;
1258 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1259   (atomic_load_and_32 node:$a, node:$b)>;
1260 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1261   (atomic_load_and_64 node:$a, node:$b)>;
1262 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1263   (atomic_load_and_64 node:$a, node:$b)>;
1264 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1265   (atomic_load_and_64 node:$a, node:$b)>;
1266
1267 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1268   atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1269 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1270   atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1271 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1272   atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1273 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1274   ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1275 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1276   atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1277 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1278   atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1279 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1280   atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1281 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1282   ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1283
1284 // atom_or
1285
1286 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1287   (atomic_load_or_32 node:$a, node:$b)>;
1288 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1289   (atomic_load_or_32 node:$a, node:$b)>;
1290 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1291   (atomic_load_or_32 node:$a, node:$b)>;
1292 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1293   (atomic_load_or_64 node:$a, node:$b)>;
1294 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1295   (atomic_load_or_64 node:$a, node:$b)>;
1296 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1297   (atomic_load_or_64 node:$a, node:$b)>;
1298
1299 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1300   atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1301 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1302   atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1303 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1304   ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1305 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1306   atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1307 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1308   atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1309 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1310   atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1311 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1312   ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1313 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1314   atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1315
1316 // atom_xor
1317
1318 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1319   (atomic_load_xor_32 node:$a, node:$b)>;
1320 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1321   (atomic_load_xor_32 node:$a, node:$b)>;
1322 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1323   (atomic_load_xor_32 node:$a, node:$b)>;
1324 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1325   (atomic_load_xor_64 node:$a, node:$b)>;
1326 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1327   (atomic_load_xor_64 node:$a, node:$b)>;
1328 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1329   (atomic_load_xor_64 node:$a, node:$b)>;
1330
1331 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1332   atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1333 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1334   atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1335 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1336   atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1337 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1338   ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1339 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1340   atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1341 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1342   atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1343 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1344   atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1345 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1346   ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1347
1348 // atom_cas
1349
1350 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1351   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1352 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1353   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1354 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1355   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1356 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1357   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1358 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1359   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1360 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1361   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1362
1363 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1364   atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1365 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1366   atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1367 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1368   atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1369 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1370   ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1371 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1372   atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1373 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1374   atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1375 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1376   atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1377 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1378   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1379
1380 // Support for scoped atomic operations.  Matches
1381 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1382 // and converts it into the appropriate instruction.
1383 // NOTE: not all possible combinations are implemented
1384 //  'space' is limited to generic as it's the only one needed to support CUDA.
1385 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1386 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1387                   dag ins, dag Operands>
1388       : NVPTXInst<(outs regclass:$result), ins,
1389                   AsmStr,
1390                   [(set regclass:$result, Operands)]>,
1391         Requires<Preds>;
1392
1393 // Define instruction variants for all addressing modes.
1394 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1395                        NVPTXRegClass regclass, Operand ImmType,
1396                        SDNode Imm, ValueType ImmTy,
1397                        list<Predicate> Preds> {
1398   let AddedComplexity = 1 in {
1399     def : ATOM23_impl<AsmStr, regclass, Preds,
1400                       (ins Int32Regs:$src, regclass:$b),
1401                       (Intr Int32Regs:$src, regclass:$b)>;
1402     def : ATOM23_impl<AsmStr, regclass, Preds,
1403                       (ins Int64Regs:$src, regclass:$b),
1404                       (Intr Int64Regs:$src, regclass:$b)>;
1405   }
1406   // tablegen can't infer argument types from Intrinsic (though it can
1407   // from Instruction) so we have to enforce specific type on
1408   // immediates via explicit cast to ImmTy.
1409   def : ATOM23_impl<AsmStr, regclass, Preds,
1410                     (ins Int32Regs:$src, ImmType:$b),
1411                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1412   def : ATOM23_impl<AsmStr, regclass, Preds,
1413                     (ins Int64Regs:$src, ImmType:$b),
1414                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1415 }
1416
1417 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1418                        NVPTXRegClass regclass, Operand ImmType,
1419                        SDNode Imm, ValueType ImmTy,
1420                        list<Predicate> Preds> {
1421   // Variants for register/immediate permutations of $b and $c
1422   let AddedComplexity = 2 in {
1423     def : ATOM23_impl<AsmStr, regclass, Preds,
1424                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1425                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1426     def : ATOM23_impl<AsmStr, regclass, Preds,
1427                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1428                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1429   }
1430   let AddedComplexity = 1 in {
1431     def : ATOM23_impl<AsmStr, regclass, Preds,
1432                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1433                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1434     def : ATOM23_impl<AsmStr, regclass, Preds,
1435                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1436                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1437     def : ATOM23_impl<AsmStr, regclass, Preds,
1438                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1439                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1440     def : ATOM23_impl<AsmStr, regclass, Preds,
1441                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1442                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1443   }
1444   def : ATOM23_impl<AsmStr, regclass, Preds,
1445                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1446                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1447   def : ATOM23_impl<AsmStr, regclass, Preds,
1448                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1449                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1450 }
1451
1452 // Constructs instrinsic name and instruction asm strings.
1453 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1454                        string ScopeStr, string SpaceStr,
1455                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1456                        ValueType ImmTy, list<Predicate> Preds> {
1457   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1458                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1459                             # "." # OpStr # "." # TypeStr
1460                             # " \t$result, [$src], $b;",
1461                      !cast<Intrinsic>(
1462                             "int_nvvm_atomic_" # OpStr
1463                             # "_" # SpaceStr # "_" # IntTypeStr
1464                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1465                      regclass, ImmType, Imm, ImmTy, Preds>;
1466 }
1467 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1468                        string ScopeStr, string SpaceStr,
1469                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1470                        ValueType ImmTy, list<Predicate> Preds> {
1471   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1472                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1473                             # "." # OpStr # "." # TypeStr
1474                             # " \t$result, [$src], $b, $c;",
1475                      !cast<Intrinsic>(
1476                             "int_nvvm_atomic_" # OpStr
1477                             # "_" # SpaceStr # "_" # IntTypeStr
1478                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1479                      regclass, ImmType, Imm, ImmTy, Preds>;
1480 }
1481
1482 // Constructs variants for different address spaces.
1483 // For now we only need variants for generic space pointers.
1484 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1485                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1486                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1487    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1488                             regclass, ImmType, Imm, ImmTy, Preds>;
1489 }
1490 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1491                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1492                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1493    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1494                             regclass, ImmType, Imm, ImmTy, Preds>;
1495 }
1496
1497 // Constructs variants for different scopes of atomic op.
1498 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1499                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1500                        ValueType ImmTy, list<Predicate> Preds> {
1501    // .gpu scope is default and is currently covered by existing
1502    // atomics w/o explicitly specified scope.
1503    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1504                            regclass, ImmType, Imm, ImmTy,
1505                            !listconcat(Preds,[hasAtomScope])>;
1506    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1507                            regclass, ImmType, Imm, ImmTy,
1508                            !listconcat(Preds,[hasAtomScope])>;
1509 }
1510 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1511            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1512            list<Predicate> Preds> {
1513    // No need to define ".gpu"-scoped atomics.  They do the same thing
1514    // as the regular, non-scoped atomics defined elsewhere.
1515    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1516                            regclass, ImmType, Imm, ImmTy,
1517                            !listconcat(Preds,[hasAtomScope])>;
1518    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1519                            regclass, ImmType, Imm, ImmTy,
1520                            !listconcat(Preds,[hasAtomScope])>;
1521 }
1522
1523 // atom.add
1524 multiclass ATOM2_add_impl<string OpStr> {
1525    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1526    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1527    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1528    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1529                             [hasAtomAddF32]>;
1530    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1531                             [hasAtomAddF64]>;
1532 }
1533
1534 // atom.{and,or,xor}
1535 multiclass ATOM2_bitwise_impl<string OpStr> {
1536    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1537    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1538                             [hasAtomBitwise64]>;
1539 }
1540
1541 // atom.exch
1542 multiclass ATOM2_exch_impl<string OpStr> {
1543    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1544    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1545 }
1546
1547 // atom.{min,max}
1548 multiclass ATOM2_minmax_impl<string OpStr> {
1549    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1550    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1551    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1552                             [hasAtomMinMax64]>;
1553    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1554                             [hasAtomMinMax64]>;
1555 }
1556
1557 // atom.{inc,dec}
1558 multiclass ATOM2_incdec_impl<string OpStr> {
1559    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1560 }
1561
1562 // atom.cas
1563 multiclass ATOM3_cas_impl<string OpStr> {
1564    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1565    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1566 }
1567
1568 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1569 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1570 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1571 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1572 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1573 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1574 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1575 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1576 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1577 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1578
1579 //-----------------------------------
1580 // Support for ldu on sm_20 or later
1581 //-----------------------------------
1582
1583 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1584 // read-only in a kernel.
1585
1586 // Scalar
1587
1588 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1589   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1590                !strconcat("ldu.global.", TyStr),
1591                       []>, Requires<[hasLDU]>;
1592   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1593                !strconcat("ldu.global.", TyStr),
1594                         []>, Requires<[hasLDU]>;
1595  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1596                !strconcat("ldu.global.", TyStr),
1597                       []>, Requires<[hasLDU]>;
1598  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1599                !strconcat("ldu.global.", TyStr),
1600                       []>, Requires<[hasLDU]>;
1601  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1602                !strconcat("ldu.global.", TyStr),
1603                         []>, Requires<[hasLDU]>;
1604 }
1605
1606 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1607 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1608 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1609 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1610 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1611 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1612 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1613 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1614
1615 // vector
1616
1617 // Elementized vector ldu
1618 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1619  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1620                      (ins Int32Regs:$src),
1621                      !strconcat("ldu.global.", TyStr), []>;
1622  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1623                      (ins Int64Regs:$src),
1624                      !strconcat("ldu.global.", TyStr), []>;
1625  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1626                      (ins MEMri:$src),
1627                      !strconcat("ldu.global.", TyStr), []>;
1628  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1629                      (ins MEMri64:$src),
1630                      !strconcat("ldu.global.", TyStr), []>;
1631  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1632                      (ins imemAny:$src),
1633                      !strconcat("ldu.global.", TyStr), []>;
1634 }
1635
1636 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1637  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1638                             regclass:$dst4), (ins Int32Regs:$src), 
1639                !strconcat("ldu.global.", TyStr), []>;
1640  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1641                             regclass:$dst4), (ins Int64Regs:$src), 
1642                !strconcat("ldu.global.", TyStr), []>;
1643  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1644                             regclass:$dst4), (ins MEMri:$src), 
1645                !strconcat("ldu.global.", TyStr), []>;
1646  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1647                             regclass:$dst4), (ins MEMri64:$src), 
1648                !strconcat("ldu.global.", TyStr), []>;
1649  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1650                             regclass:$dst4), (ins imemAny:$src), 
1651                !strconcat("ldu.global.", TyStr), []>;
1652 }
1653
1654 defm INT_PTX_LDU_G_v2i8_ELE
1655   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1656 defm INT_PTX_LDU_G_v2i16_ELE
1657   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1658 defm INT_PTX_LDU_G_v2i32_ELE
1659   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1660 defm INT_PTX_LDU_G_v2f32_ELE
1661   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1662 defm INT_PTX_LDU_G_v2i64_ELE
1663   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1664 defm INT_PTX_LDU_G_v2f64_ELE
1665   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1666 defm INT_PTX_LDU_G_v4i8_ELE
1667   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1668 defm INT_PTX_LDU_G_v4i16_ELE
1669   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1670     Int16Regs>;
1671 defm INT_PTX_LDU_G_v4i32_ELE
1672   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1673     Int32Regs>;
1674 defm INT_PTX_LDU_G_v4f32_ELE
1675   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1676     Float32Regs>;
1677
1678
1679 //-----------------------------------
1680 // Support for ldg on sm_35 or later 
1681 //-----------------------------------
1682
1683 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1684 // non-coherent texture cache, and therefore the values read must be read-only
1685 // during the lifetime of the kernel.
1686
1687 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1688   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1689                !strconcat("ld.global.nc.", TyStr),
1690                       []>, Requires<[hasLDG]>;
1691   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1692                !strconcat("ld.global.nc.", TyStr),
1693                         []>, Requires<[hasLDG]>;
1694  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1695                !strconcat("ld.global.nc.", TyStr),
1696                       []>, Requires<[hasLDG]>;
1697  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1698                !strconcat("ld.global.nc.", TyStr),
1699                       []>, Requires<[hasLDG]>;
1700  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1701                !strconcat("ld.global.nc.", TyStr),
1702                         []>, Requires<[hasLDG]>;
1703 }
1704
1705 defm INT_PTX_LDG_GLOBAL_i8
1706   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1707 defm INT_PTX_LDG_GLOBAL_i16
1708   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1709 defm INT_PTX_LDG_GLOBAL_i32
1710   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1711 defm INT_PTX_LDG_GLOBAL_i64
1712   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1713 defm INT_PTX_LDG_GLOBAL_f32
1714   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1715 defm INT_PTX_LDG_GLOBAL_f64
1716   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1717 defm INT_PTX_LDG_GLOBAL_p32
1718   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1719 defm INT_PTX_LDG_GLOBAL_p64
1720   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1721
1722 // vector
1723
1724 // Elementized vector ldg 
1725 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1726  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1727                      (ins Int32Regs:$src),
1728                      !strconcat("ld.global.nc.", TyStr), []>;
1729  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1730                      (ins Int64Regs:$src),
1731                      !strconcat("ld.global.nc.", TyStr), []>;
1732  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1733                      (ins MEMri:$src),
1734                      !strconcat("ld.global.nc.", TyStr), []>;
1735  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1736                      (ins MEMri64:$src),
1737                      !strconcat("ld.global.nc.", TyStr), []>;
1738  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1739                      (ins imemAny:$src),
1740                      !strconcat("ld.global.nc.", TyStr), []>;
1741 }
1742
1743 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1744   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1745                               regclass:$dst4), (ins Int32Regs:$src), 
1746                !strconcat("ld.global.nc.", TyStr), []>;
1747   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1748                                regclass:$dst4), (ins Int64Regs:$src), 
1749                !strconcat("ld.global.nc.", TyStr), []>;
1750   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1751                               regclass:$dst4), (ins MEMri:$src), 
1752                !strconcat("ld.global.nc.", TyStr), []>;
1753   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1754                               regclass:$dst4), (ins MEMri64:$src), 
1755                !strconcat("ld.global.nc.", TyStr), []>;
1756   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1757                              regclass:$dst4), (ins imemAny:$src), 
1758                !strconcat("ld.global.nc.", TyStr), []>;
1759 }
1760
1761 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1762 defm INT_PTX_LDG_G_v2i8_ELE
1763   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1764 defm INT_PTX_LDG_G_v2i16_ELE
1765   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1766 defm INT_PTX_LDG_G_v2i32_ELE
1767   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1768 defm INT_PTX_LDG_G_v2f32_ELE
1769   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1770 defm INT_PTX_LDG_G_v2i64_ELE
1771   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1772 defm INT_PTX_LDG_G_v2f64_ELE
1773   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1774 defm INT_PTX_LDG_G_v4i8_ELE
1775   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1776 defm INT_PTX_LDG_G_v4i16_ELE
1777   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1778 defm INT_PTX_LDG_G_v4i32_ELE
1779   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1780 defm INT_PTX_LDG_G_v4f32_ELE
1781   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1782
1783
1784 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1785    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1786           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1787       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1788    Requires<[hasGenericLdSt]>;
1789    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1790           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1791       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1792    Requires<[hasGenericLdSt]>;
1793
1794 // @TODO: Are these actually needed?  I believe global addresses will be copied
1795 // to register values anyway.
1796    /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1797           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1798       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1799       Requires<[hasGenericLdSt]>;
1800    def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1801           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1802       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1803       Requires<[hasGenericLdSt]>;*/
1804
1805    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1806           "mov.u32 \t$result, $src;",
1807       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1808    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1809           "mov.u64 \t$result, $src;",
1810       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1811
1812 // @TODO: Are these actually needed?  I believe global addresses will be copied
1813 // to register values anyway.
1814    /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1815           "mov.u32 \t$result, $src;",
1816       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1817    def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1818           "mov.u64 \t$result, $src;",
1819       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1820 }
1821
1822 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1823    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1824           !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
1825       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1826    Requires<[hasGenericLdSt]>;
1827    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1828           !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
1829       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1830    Requires<[hasGenericLdSt]>;
1831    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1832           "mov.u32 \t$result, $src;",
1833       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1834    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1835           "mov.u64 \t$result, $src;",
1836       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1837 }
1838
1839 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1840 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1841 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1842 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1843
1844 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1845 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1846 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1847 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1848
1849
1850 // nvvm.ptr.gen.to.param
1851 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1852   (ins Int32Regs:$src),
1853                         "mov.u32 \t$result, $src;",
1854                               [(set Int32Regs:$result,
1855                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1856 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1857   (ins Int64Regs:$src),
1858                         "mov.u64 \t$result, $src;",
1859                               [(set Int64Regs:$result,
1860                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1861
1862
1863 // nvvm.move intrinsicc
1864 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1865                              "mov.b16 \t$r, $s;",
1866                              [(set Int16Regs:$r,
1867                                (int_nvvm_move_i16 Int16Regs:$s))]>;
1868 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1869                              "mov.b32 \t$r, $s;",
1870                              [(set Int32Regs:$r,
1871                                (int_nvvm_move_i32 Int32Regs:$s))]>;
1872 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1873                              "mov.b64 \t$r, $s;",
1874                              [(set Int64Regs:$r,
1875                                (int_nvvm_move_i64 Int64Regs:$s))]>;
1876 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1877                              "mov.f32 \t$r, $s;",
1878                              [(set Float32Regs:$r,
1879                                (int_nvvm_move_float Float32Regs:$s))]>;
1880 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1881                              "mov.f64 \t$r, $s;",
1882                              [(set Float64Regs:$r,
1883                                (int_nvvm_move_double Float64Regs:$s))]>;
1884 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1885                              "mov.u32 \t$r, $s;",
1886                              [(set Int32Regs:$r,
1887                                (int_nvvm_move_ptr Int32Regs:$s))]>;
1888 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1889                              "mov.u64 \t$r, $s;",
1890                              [(set Int64Regs:$r,
1891                                (int_nvvm_move_ptr Int64Regs:$s))]>;
1892
1893 // @TODO: Are these actually needed, or will we always just see symbols
1894 // copied to registers first?
1895 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1896                              "mov.u32 \t$r, $s;",
1897                              [(set Int32Regs:$r,
1898                              (int_nvvm_move_ptr texternalsym:$s))]>;
1899 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1900                              "mov.u64 \t$r, $s;",
1901                              [(set Int64Regs:$r,
1902                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
1903
1904
1905 // MoveParam        %r1, param
1906 // ptr_local_to_gen %r2, %r1
1907 // ptr_gen_to_local %r3, %r2
1908 // ->
1909 // mov %r1, param
1910
1911 // @TODO: Revisit this.  There is a type
1912 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1913 // instructions are not currently defined. However, we can use the ptr
1914 // variants and the asm printer will do the right thing.
1915 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1916                 (MoveParam texternalsym:$src)))),
1917                (nvvm_move_ptr64  texternalsym:$src)>;
1918 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1919                 (MoveParam texternalsym:$src)))),
1920                (nvvm_move_ptr32  texternalsym:$src)>;
1921
1922 def texsurf_handles
1923   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1924               "mov.u64 \t$result, $src;", []>;
1925
1926 //-----------------------------------
1927 // Compiler Error Warn
1928 // - Just ignore them in codegen
1929 //-----------------------------------
1930
1931 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1932                 "// llvm.nvvm.compiler.warn()",
1933                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1934 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1935                 "// llvm.nvvm.compiler.warn()",
1936                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1937 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1938                 "// llvm.nvvm.compiler.error()",
1939                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1940 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1941                 "// llvm.nvvm.compiler.error()",
1942                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1943
1944
1945 // isspacep
1946
1947 def ISSPACEP_CONST_32
1948   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1949               "isspacep.const \t$d, $a;",
1950               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1951     Requires<[hasPTX31]>;
1952 def ISSPACEP_CONST_64
1953   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1954               "isspacep.const \t$d, $a;",
1955               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1956     Requires<[hasPTX31]>;
1957 def ISSPACEP_GLOBAL_32
1958   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1959               "isspacep.global \t$d, $a;",
1960               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1961 def ISSPACEP_GLOBAL_64
1962   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1963               "isspacep.global \t$d, $a;",
1964               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1965 def ISSPACEP_LOCAL_32
1966   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1967               "isspacep.local \t$d, $a;",
1968               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1969 def ISSPACEP_LOCAL_64
1970   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1971               "isspacep.local \t$d, $a;",
1972               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1973 def ISSPACEP_SHARED_32
1974   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1975               "isspacep.shared \t$d, $a;",
1976               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1977 def ISSPACEP_SHARED_64
1978   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1979               "isspacep.shared \t$d, $a;",
1980               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1981
1982
1983 // Special register reads
1984 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1985                             (ins SpecialRegs:$r),
1986                             "mov.b32\t$d, $r;", []>;
1987
1988 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1989 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1990 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1991 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1992 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1993 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1994 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1995 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1996 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1997 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1998 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1999 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2000 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2001 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2002 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2003 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2004 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2005 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2006 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2007 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2008 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2009 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2010 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2011 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2012 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2013 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2014 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2015 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2016 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2017 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2018 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2019 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2020
2021
2022 // rotate builtin support
2023
2024 def ROTATE_B32_HW_IMM
2025   : NVPTXInst<(outs Int32Regs:$dst),
2026               (ins  Int32Regs:$src, i32imm:$amt),
2027               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2028               [(set Int32Regs:$dst,
2029                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2030               Requires<[hasHWROT32]> ;
2031
2032 def ROTATE_B32_HW_REG
2033   : NVPTXInst<(outs Int32Regs:$dst),
2034               (ins  Int32Regs:$src, Int32Regs:$amt),
2035               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2036               [(set Int32Regs:$dst,
2037                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2038               Requires<[hasHWROT32]> ;
2039
2040 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2041           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2042       Requires<[noHWROT32]> ;
2043
2044 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2045           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2046       Requires<[noHWROT32]> ;
2047
2048 let hasSideEffects = 0 in {
2049   def GET_LO_INT64
2050     : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2051                 !strconcat("{{\n\t",
2052                 !strconcat(".reg .b32 %dummy;\n\t",
2053                 !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
2054           !strconcat("}}", "")))),
2055           []> ;
2056
2057   def GET_HI_INT64
2058     : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2059                 !strconcat("{{\n\t",
2060                 !strconcat(".reg .b32 %dummy;\n\t",
2061                 !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
2062           !strconcat("}}", "")))),
2063           []> ;
2064 }
2065
2066 let hasSideEffects = 0 in {
2067   def PACK_TWO_INT32
2068     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2069                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2070 }
2071
2072 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2073           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2074                           (GET_LO_INT64 Int64Regs:$src))> ;
2075
2076 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2077 // no side effects.
2078 let hasSideEffects = 0 in {
2079   def SHF_L_WRAP_B32_IMM
2080     : NVPTXInst<(outs Int32Regs:$dst),
2081                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2082                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2083       Requires<[hasHWROT32]>;
2084
2085   def SHF_L_WRAP_B32_REG
2086     : NVPTXInst<(outs Int32Regs:$dst),
2087                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2088                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2089       Requires<[hasHWROT32]>;
2090
2091   def SHF_R_WRAP_B32_IMM
2092     : NVPTXInst<(outs Int32Regs:$dst),
2093                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2094                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2095       Requires<[hasHWROT32]>;
2096
2097   def SHF_R_WRAP_B32_REG
2098     : NVPTXInst<(outs Int32Regs:$dst),
2099                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2100                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2101       Requires<[hasHWROT32]>;
2102 }
2103
2104 // HW version of rotate 64
2105 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2106           (PACK_TWO_INT32
2107             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2108                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2109             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2110                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2111       Requires<[hasHWROT32]>;
2112
2113 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2114           (PACK_TWO_INT32
2115             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2116                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2117             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2118                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2119       Requires<[hasHWROT32]>;
2120
2121
2122 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2123           (PACK_TWO_INT32
2124             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2125                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2126             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2127                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2128       Requires<[hasHWROT32]>;
2129
2130 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2131           (PACK_TWO_INT32
2132             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2133                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2134             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2135                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2136       Requires<[hasHWROT32]>;
2137
2138 // SW version of rotate 64
2139 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2140           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2141       Requires<[noHWROT32]>;
2142 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2143           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2144       Requires<[noHWROT32]>;
2145 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2146           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2147       Requires<[noHWROT32]>;
2148 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2149           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2150       Requires<[noHWROT32]>;
2151
2152
2153 //-----------------------------------
2154 // Texture Intrinsics
2155 //-----------------------------------
2156
2157 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2158 // also defined in NVPTXReplaceImageHandles.cpp
2159
2160 // texmode_independent
2161 let IsTex = 1, IsTexModeUnified = 0 in {
2162 // Texture fetch instructions using handles
2163 def TEX_1D_F32_S32
2164   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2165                     Float32Regs:$b, Float32Regs:$a),
2166               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2167               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2168               []>;
2169 def TEX_1D_F32_F32
2170   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2171                     Float32Regs:$b, Float32Regs:$a),
2172               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2173               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2174               []>;
2175 def TEX_1D_F32_F32_LEVEL
2176   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2177                     Float32Regs:$b, Float32Regs:$a),
2178               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2179               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2180               "[$t, $s, \\{$x\\}], $lod;",
2181               []>;
2182 def TEX_1D_F32_F32_GRAD
2183   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2184                     Float32Regs:$b, Float32Regs:$a),
2185               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2186                    Float32Regs:$gradx, Float32Regs:$grady),
2187               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2188               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2189               []>;
2190 def TEX_1D_S32_S32
2191   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2192                     Int32Regs:$b, Int32Regs:$a),
2193               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2194               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2195               []>;
2196 def TEX_1D_S32_F32
2197   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2198                     Int32Regs:$b, Int32Regs:$a),
2199               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2200               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2201               []>;
2202 def TEX_1D_S32_F32_LEVEL
2203   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2204                     Int32Regs:$b, Int32Regs:$a),
2205               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2206                    Float32Regs:$lod),
2207               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2208               "[$t, $s, \\{$x\\}], $lod;",
2209               []>;
2210 def TEX_1D_S32_F32_GRAD
2211   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2212                     Int32Regs:$b, Int32Regs:$a),
2213               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2214                    Float32Regs:$gradx, Float32Regs:$grady),
2215               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2216               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2217               []>;
2218 def TEX_1D_U32_S32
2219   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2220                     Int32Regs:$b, Int32Regs:$a),
2221               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2222               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2223               []>;
2224 def TEX_1D_U32_F32
2225   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2226                     Int32Regs:$b, Int32Regs:$a),
2227               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2228               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2229               []>;
2230 def TEX_1D_U32_F32_LEVEL
2231   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2232                     Int32Regs:$b, Int32Regs:$a),
2233               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2234                    Float32Regs:$lod),
2235               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2236               "[$t, $s, \\{$x\\}], $lod;",
2237               []>;
2238 def TEX_1D_U32_F32_GRAD
2239   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2240                     Int32Regs:$b, Int32Regs:$a),
2241               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2242                    Float32Regs:$gradx, Float32Regs:$grady),
2243               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2244               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2245               []>;
2246
2247 def TEX_1D_ARRAY_F32_S32
2248   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249                     Float32Regs:$b, Float32Regs:$a),
2250               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2251               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2252               "[$t, $s, \\{$l, $x\\}];",
2253               []>;
2254 def TEX_1D_ARRAY_F32_F32
2255   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2256                     Float32Regs:$b, Float32Regs:$a),
2257               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2258               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2259               "[$t, $s, \\{$l, $x\\}];",
2260               []>;
2261 def TEX_1D_ARRAY_F32_F32_LEVEL
2262   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2263                     Float32Regs:$b, Float32Regs:$a),
2264               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2265                    Float32Regs:$lod),
2266               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2267               "[$t, $s, \\{$l, $x\\}], $lod;",
2268               []>;
2269 def TEX_1D_ARRAY_F32_F32_GRAD
2270   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2271                     Float32Regs:$b, Float32Regs:$a),
2272               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2273                    Float32Regs:$gradx, Float32Regs:$grady),
2274               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2275               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2276               []>;
2277 def TEX_1D_ARRAY_S32_S32
2278   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2279                     Int32Regs:$b, Int32Regs:$a),
2280               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2281               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2282               "[$t, $s, \\{$l, $x\\}];",
2283               []>;
2284 def TEX_1D_ARRAY_S32_F32
2285   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2286                     Int32Regs:$b, Int32Regs:$a),
2287               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2288               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2289               "[$t, $s, \\{$l, $x\\}];",
2290               []>;
2291 def TEX_1D_ARRAY_S32_F32_LEVEL
2292   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2293                     Int32Regs:$b, Int32Regs:$a),
2294               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2295                    Float32Regs:$lod),
2296               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2297               "[$t, $s, \\{$l, $x\\}], $lod;",
2298               []>;
2299 def TEX_1D_ARRAY_S32_F32_GRAD
2300   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2301                     Int32Regs:$b, Int32Regs:$a),
2302               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2303                    Float32Regs:$gradx, Float32Regs:$grady),
2304               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2305               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2306               []>;
2307 def TEX_1D_ARRAY_U32_S32
2308   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2309                     Int32Regs:$b, Int32Regs:$a),
2310               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2311               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2312               "[$t, $s, \\{$l, $x\\}];",
2313               []>;
2314 def TEX_1D_ARRAY_U32_F32
2315   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2316                     Int32Regs:$b, Int32Regs:$a),
2317               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2318               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2319               "[$t, $s, \\{$l, $x\\}];",
2320               []>;
2321 def TEX_1D_ARRAY_U32_F32_LEVEL
2322   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2323                     Int32Regs:$b, Int32Regs:$a),
2324               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2325                    Float32Regs:$lod),
2326               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2327               "[$t, $s, \\{$l, $x\\}], $lod;",
2328               []>;
2329 def TEX_1D_ARRAY_U32_F32_GRAD
2330   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2331                     Int32Regs:$b, Int32Regs:$a),
2332               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2333                    Float32Regs:$gradx, Float32Regs:$grady),
2334               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2335               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2336               []>;
2337
2338 def TEX_2D_F32_S32
2339   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2340                     Float32Regs:$b, Float32Regs:$a),
2341               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2342               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2343               "[$t, $s, \\{$x, $y\\}];",
2344               []>;
2345 def TEX_2D_F32_F32
2346   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2347                     Float32Regs:$b, Float32Regs:$a),
2348               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2349               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2350               "[$t, $s, \\{$x, $y\\}];",
2351               []>;
2352 def TEX_2D_F32_F32_LEVEL
2353   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2354                     Float32Regs:$b, Float32Regs:$a),
2355               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2356                    Float32Regs:$lod),
2357               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2358               "[$t, $s, \\{$x, $y\\}], $lod;",
2359               []>;
2360 def TEX_2D_F32_F32_GRAD
2361   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2362                     Float32Regs:$b, Float32Regs:$a),
2363               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2364                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2365                    Float32Regs:$grady0, Float32Regs:$grady1),
2366               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2367               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2368               "\\{$grady0, $grady1\\};",
2369               []>;
2370 def TEX_2D_S32_S32
2371   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2372                     Int32Regs:$b, Int32Regs:$a),
2373               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2374               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2375               "[$t, $s, \\{$x, $y\\}];",
2376               []>;
2377 def TEX_2D_S32_F32
2378   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2379                     Int32Regs:$b, Int32Regs:$a),
2380               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2381               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2382               "[$t, $s, \\{$x, $y\\}];",
2383               []>;
2384 def TEX_2D_S32_F32_LEVEL
2385   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2386                     Int32Regs:$b, Int32Regs:$a),
2387               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2388                    Float32Regs:$lod),
2389               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2390               "[$t, $s, \\{$x, $y\\}], $lod;",
2391               []>;
2392 def TEX_2D_S32_F32_GRAD
2393   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2394                     Int32Regs:$b, Int32Regs:$a),
2395               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2396                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2397                    Float32Regs:$grady0, Float32Regs:$grady1),
2398               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2399               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2400               "\\{$grady0, $grady1\\};",
2401               []>;
2402 def TEX_2D_U32_S32
2403   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2404                     Int32Regs:$b, Int32Regs:$a),
2405               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2406               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2407               "[$t, $s, \\{$x, $y\\}];",
2408               []>;
2409 def TEX_2D_U32_F32
2410   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2411                     Int32Regs:$b, Int32Regs:$a),
2412               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2413               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2414               "[$t, $s, \\{$x, $y\\}];",
2415               []>;
2416 def TEX_2D_U32_F32_LEVEL
2417   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2418                     Int32Regs:$b, Int32Regs:$a),
2419               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2420                    Float32Regs:$lod),
2421               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2422               "[$t, $s, \\{$x, $y\\}], $lod;",
2423               []>;
2424 def TEX_2D_U32_F32_GRAD
2425   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2426                     Int32Regs:$b, Int32Regs:$a),
2427               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2428                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2429                    Float32Regs:$grady0, Float32Regs:$grady1),
2430               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2431               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2432               "\\{$grady0, $grady1\\};",
2433               []>;
2434
2435 def TEX_2D_ARRAY_F32_S32
2436   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2437                     Float32Regs:$b, Float32Regs:$a),
2438               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2439                    Int32Regs:$y),
2440               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2441               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2442               []>;
2443 def TEX_2D_ARRAY_F32_F32
2444   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2445                     Float32Regs:$b, Float32Regs:$a),
2446               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2447                    Float32Regs:$y),
2448               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2449               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2450               []>;
2451 def TEX_2D_ARRAY_F32_F32_LEVEL
2452   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2453                     Float32Regs:$b, Float32Regs:$a),
2454               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2455                    Float32Regs:$y, Float32Regs:$lod),
2456               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2457               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2458               []>;
2459 def TEX_2D_ARRAY_F32_F32_GRAD
2460   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2461                     Float32Regs:$b, Float32Regs:$a),
2462               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2463                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2464                    Float32Regs:$grady0, Float32Regs:$grady1),
2465               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2466               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2467               "\\{$grady0, $grady1\\};",
2468               []>;
2469 def TEX_2D_ARRAY_S32_S32
2470   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2471                     Int32Regs:$b, Int32Regs:$a),
2472               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2473                    Int32Regs:$y),
2474               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2475               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2476               []>;
2477 def TEX_2D_ARRAY_S32_F32
2478   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2479                     Int32Regs:$b, Int32Regs:$a),
2480               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2481                    Float32Regs:$y),
2482               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2483               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2484               []>;
2485 def TEX_2D_ARRAY_S32_F32_LEVEL
2486   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2487                     Int32Regs:$b, Int32Regs:$a),
2488               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2489                    Float32Regs:$y, Float32Regs:$lod),
2490               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2491               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2492               []>;
2493 def TEX_2D_ARRAY_S32_F32_GRAD
2494   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2495                     Int32Regs:$b, Int32Regs:$a),
2496               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2497                    Float32Regs:$y,
2498                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2499                    Float32Regs:$grady0, Float32Regs:$grady1),
2500               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2501               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2502               "\\{$grady0, $grady1\\};",
2503               []>;
2504 def TEX_2D_ARRAY_U32_S32
2505   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2506                     Int32Regs:$b, Int32Regs:$a),
2507               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2508                    Int32Regs:$y),
2509               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2510               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2511               []>;
2512 def TEX_2D_ARRAY_U32_F32
2513   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2514                     Int32Regs:$b, Int32Regs:$a),
2515               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2516                    Float32Regs:$y),
2517               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2518               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2519               []>;
2520 def TEX_2D_ARRAY_U32_F32_LEVEL
2521   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2522                     Int32Regs:$b, Int32Regs:$a),
2523               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2524                    Float32Regs:$y, Float32Regs:$lod),
2525               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2526               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2527               []>;
2528 def TEX_2D_ARRAY_U32_F32_GRAD
2529   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2530                     Int32Regs:$b, Int32Regs:$a),
2531               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2532                    Float32Regs:$y,
2533                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2534                    Float32Regs:$grady0, Float32Regs:$grady1),
2535               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2536               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2537               "\\{$grady0, $grady1\\};",
2538               []>;
2539
2540 def TEX_3D_F32_S32
2541   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2542                     Float32Regs:$b, Float32Regs:$a),
2543               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2544                    Int32Regs:$z),
2545               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2546               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2547               []>;
2548 def TEX_3D_F32_F32
2549   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2550                     Float32Regs:$b, Float32Regs:$a),
2551               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2552                    Float32Regs:$z),
2553               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2554               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2555               []>;
2556 def TEX_3D_F32_F32_LEVEL
2557   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2558                     Float32Regs:$b, Float32Regs:$a),
2559               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2560                    Float32Regs:$z, Float32Regs:$lod),
2561               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2562               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2563               []>;
2564 def TEX_3D_F32_F32_GRAD
2565   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2566                     Float32Regs:$b, Float32Regs:$a),
2567               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2568                    Float32Regs:$z,
2569                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2570                    Float32Regs:$gradx2, Float32Regs:$grady0,
2571                    Float32Regs:$grady1, Float32Regs:$grady2),
2572               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2573               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2574               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2575               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2576               []>;
2577 def TEX_3D_S32_S32
2578   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2579                     Int32Regs:$b, Int32Regs:$a),
2580               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2581                    Int32Regs:$z),
2582               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2583               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2584               []>;
2585 def TEX_3D_S32_F32
2586   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2587                     Int32Regs:$b, Int32Regs:$a),
2588               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2589                    Float32Regs:$z),
2590               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2591               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2592               []>;
2593 def TEX_3D_S32_F32_LEVEL
2594   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2595                     Int32Regs:$b, Int32Regs:$a),
2596               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2597                    Float32Regs:$z, Float32Regs:$lod),
2598               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2599               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2600               []>;
2601 def TEX_3D_S32_F32_GRAD
2602   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2603                     Int32Regs:$b, Int32Regs:$a),
2604               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2605                    Float32Regs:$z,
2606                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2607                    Float32Regs:$gradx2, Float32Regs:$grady0,
2608                    Float32Regs:$grady1, Float32Regs:$grady2),
2609               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2610               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2611               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2612               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2613               []>;
2614 def TEX_3D_U32_S32
2615   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2616                     Int32Regs:$b, Int32Regs:$a),
2617               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2618                    Int32Regs:$z),
2619               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2620               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2621               []>;
2622 def TEX_3D_U32_F32
2623   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2624                     Int32Regs:$b, Int32Regs:$a),
2625               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2626                    Float32Regs:$z),
2627               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2628               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2629               []>;
2630 def TEX_3D_U32_F32_LEVEL
2631   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2632                     Int32Regs:$b, Int32Regs:$a),
2633               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2634                    Float32Regs:$z, Float32Regs:$lod),
2635               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2636               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2637               []>;
2638 def TEX_3D_U32_F32_GRAD
2639   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2640                     Int32Regs:$b, Int32Regs:$a),
2641               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2642                    Float32Regs:$z,
2643                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2644                    Float32Regs:$gradx2, Float32Regs:$grady0,
2645                    Float32Regs:$grady1, Float32Regs:$grady2),
2646               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2647               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2648               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2649               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2650               []>;
2651
2652 def TEX_CUBE_F32_F32
2653   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2654                     Float32Regs:$b, Float32Regs:$a),
2655               (ins Int64Regs:$t, Int64Regs:$s,
2656                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2657               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2658               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2659               []>;
2660 def TEX_CUBE_F32_F32_LEVEL
2661   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2662                     Float32Regs:$b, Float32Regs:$a),
2663               (ins Int64Regs:$t, Int64Regs:$s,
2664                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2665                    Float32Regs:$lod),
2666               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2667               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2668               []>;
2669 def TEX_CUBE_S32_F32
2670   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671                     Int32Regs:$b, Int32Regs:$a),
2672               (ins Int64Regs:$t, Int64Regs:$s,
2673                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2674               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2675               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2676               []>;
2677 def TEX_CUBE_S32_F32_LEVEL
2678   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679                     Int32Regs:$b, Int32Regs:$a),
2680               (ins Int64Regs:$t, Int64Regs:$s,
2681                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2682                    Float32Regs:$lod),
2683               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2684               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2685               []>;
2686 def TEX_CUBE_U32_F32
2687   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2688                     Int32Regs:$b, Int32Regs:$a),
2689               (ins Int64Regs:$t, Int64Regs:$s,
2690                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2691               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2692               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2693               []>;
2694 def TEX_CUBE_U32_F32_LEVEL
2695   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2696                     Int32Regs:$b, Int32Regs:$a),
2697               (ins Int64Regs:$t, Int64Regs:$s,
2698                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2699                    Float32Regs:$lod),
2700               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2701               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2702               []>;
2703
2704 def TEX_CUBE_ARRAY_F32_F32
2705   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2706                     Float32Regs:$b, Float32Regs:$a),
2707               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2708                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2709               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2710               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2711               []>;
2712 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2713   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2714                     Float32Regs:$b, Float32Regs:$a),
2715               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2716                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2717                    Float32Regs:$lod),
2718               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2719               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2720               []>;
2721 def TEX_CUBE_ARRAY_S32_F32
2722   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2723                     Int32Regs:$b, Int32Regs:$a),
2724               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2725                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2726               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2727               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2728               []>;
2729 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2730   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2731                     Int32Regs:$b, Int32Regs:$a),
2732               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2733                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2734                    Float32Regs:$lod),
2735               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2736               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2737               []>;
2738 def TEX_CUBE_ARRAY_U32_F32
2739   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2740                     Int32Regs:$b, Int32Regs:$a),
2741               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2742                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2743               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2744               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2745               []>;
2746 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2747   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2748                     Int32Regs:$b, Int32Regs:$a),
2749               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2750                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2751                    Float32Regs:$lod),
2752               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2753               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2754               []>;
2755
2756 def TLD4_R_2D_F32_F32
2757   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2758                     Float32Regs:$v2, Float32Regs:$v3),
2759               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2760               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2761               "[$t, $s, \\{$x, $y\\}];",
2762               []>;
2763 def TLD4_G_2D_F32_F32
2764   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2765                     Float32Regs:$v2, Float32Regs:$v3),
2766               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2767               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2768               "[$t, $s, \\{$x, $y\\}];",
2769               []>;
2770 def TLD4_B_2D_F32_F32
2771   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2772                     Float32Regs:$v2, Float32Regs:$v3),
2773               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2774               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2775               "[$t, $s, \\{$x, $y\\}];",
2776               []>;
2777 def TLD4_A_2D_F32_F32
2778   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2779                     Float32Regs:$v2, Float32Regs:$v3),
2780               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2781               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2782               "[$t, $s, \\{$x, $y\\}];",
2783               []>;
2784 def TLD4_R_2D_S32_F32
2785   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2786                     Int32Regs:$v2, Int32Regs:$v3),
2787               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2788               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2789               "[$t, $s, \\{$x, $y\\}];",
2790               []>;
2791 def TLD4_G_2D_S32_F32
2792   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2793                     Int32Regs:$v2, Int32Regs:$v3),
2794               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2795               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2796               "[$t, $s, \\{$x, $y\\}];",
2797               []>;
2798 def TLD4_B_2D_S32_F32
2799   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2800                     Int32Regs:$v2, Int32Regs:$v3),
2801               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2802               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2803               "[$t, $s, \\{$x, $y\\}];",
2804               []>;
2805 def TLD4_A_2D_S32_F32
2806   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2807                     Int32Regs:$v2, Int32Regs:$v3),
2808               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2809               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2810               "[$t, $s, \\{$x, $y\\}];",
2811               []>;
2812 def TLD4_R_2D_U32_F32
2813   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2814                     Int32Regs:$v2, Int32Regs:$v3),
2815               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2816               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2817               "[$t, $s, \\{$x, $y\\}];",
2818               []>;
2819 def TLD4_G_2D_U32_F32
2820   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2821                     Int32Regs:$v2, Int32Regs:$v3),
2822               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2823               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2824               "[$t, $s, \\{$x, $y\\}];",
2825               []>;
2826 def TLD4_B_2D_U32_F32
2827   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2828                     Int32Regs:$v2, Int32Regs:$v3),
2829               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2830               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2831               "[$t, $s, \\{$x, $y\\}];",
2832               []>;
2833 def TLD4_A_2D_U32_F32
2834   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2835                     Int32Regs:$v2, Int32Regs:$v3),
2836               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2837               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2838               "[$t, $s, \\{$x, $y\\}];",
2839               []>;
2840 }
2841
2842
2843 // texmode_unified
2844 let IsTex = 1, IsTexModeUnified = 1 in {
2845 // Texture fetch instructions using handles
2846 def TEX_UNIFIED_1D_F32_S32
2847   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2848                     Float32Regs:$b, Float32Regs:$a),
2849               (ins Int64Regs:$t, Int32Regs:$x),
2850               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2851               []>;
2852 def TEX_UNIFIED_1D_F32_F32
2853   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2854                     Float32Regs:$b, Float32Regs:$a),
2855               (ins Int64Regs:$t, Float32Regs:$x),
2856               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2857               []>;
2858 def TEX_UNIFIED_1D_F32_F32_LEVEL
2859   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2860                     Float32Regs:$b, Float32Regs:$a),
2861               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2862               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2863               "[$t, \\{$x\\}], $lod;",
2864               []>;
2865 def TEX_UNIFIED_1D_F32_F32_GRAD
2866   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2867                     Float32Regs:$b, Float32Regs:$a),
2868               (ins Int64Regs:$t, Float32Regs:$x,
2869                    Float32Regs:$gradx, Float32Regs:$grady),
2870               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2871               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2872               []>;
2873 def TEX_UNIFIED_1D_S32_S32
2874   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2875                     Int32Regs:$b, Int32Regs:$a),
2876               (ins Int64Regs:$t, Int32Regs:$x),
2877               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2878               []>;
2879 def TEX_UNIFIED_1D_S32_F32
2880   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2881                     Int32Regs:$b, Int32Regs:$a),
2882               (ins Int64Regs:$t, Float32Regs:$x),
2883               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2884               []>;
2885 def TEX_UNIFIED_1D_S32_F32_LEVEL
2886   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2887                     Int32Regs:$b, Int32Regs:$a),
2888               (ins Int64Regs:$t, Float32Regs:$x,
2889                    Float32Regs:$lod),
2890               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2891               "[$t, \\{$x\\}], $lod;",
2892               []>;
2893 def TEX_UNIFIED_1D_S32_F32_GRAD
2894   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2895                     Int32Regs:$b, Int32Regs:$a),
2896               (ins Int64Regs:$t, Float32Regs:$x,
2897                    Float32Regs:$gradx, Float32Regs:$grady),
2898               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2899               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2900               []>;
2901 def TEX_UNIFIED_1D_U32_S32
2902   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2903                     Int32Regs:$b, Int32Regs:$a),
2904               (ins Int64Regs:$t, Int32Regs:$x),
2905               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2906               []>;
2907 def TEX_UNIFIED_1D_U32_F32
2908   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2909                     Int32Regs:$b, Int32Regs:$a),
2910               (ins Int64Regs:$t, Float32Regs:$x),
2911               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2912               []>;
2913 def TEX_UNIFIED_1D_U32_F32_LEVEL
2914   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2915                     Int32Regs:$b, Int32Regs:$a),
2916               (ins Int64Regs:$t, Float32Regs:$x,
2917                    Float32Regs:$lod),
2918               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2919               "[$t, \\{$x\\}], $lod;",
2920               []>;
2921 def TEX_UNIFIED_1D_U32_F32_GRAD
2922   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2923                     Int32Regs:$b, Int32Regs:$a),
2924               (ins Int64Regs:$t, Float32Regs:$x,
2925                    Float32Regs:$gradx, Float32Regs:$grady),
2926               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2927               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2928               []>;
2929
2930 def TEX_UNIFIED_1D_ARRAY_F32_S32
2931   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932                     Float32Regs:$b, Float32Regs:$a),
2933               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2934               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2935               "[$t, \\{$l, $x\\}];",
2936               []>;
2937 def TEX_UNIFIED_1D_ARRAY_F32_F32
2938   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2939                     Float32Regs:$b, Float32Regs:$a),
2940               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2941               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2942               "[$t, \\{$l, $x\\}];",
2943               []>;
2944 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2945   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2946                     Float32Regs:$b, Float32Regs:$a),
2947               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2948                    Float32Regs:$lod),
2949               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2950               "[$t, \\{$l, $x\\}], $lod;",
2951               []>;
2952 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2953   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2954                     Float32Regs:$b, Float32Regs:$a),
2955               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2956                    Float32Regs:$gradx, Float32Regs:$grady),
2957               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2958               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2959               []>;
2960 def TEX_UNIFIED_1D_ARRAY_S32_S32
2961   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2962                     Int32Regs:$b, Int32Regs:$a),
2963               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2964               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2965               "[$t, \\{$l, $x\\}];",
2966               []>;
2967 def TEX_UNIFIED_1D_ARRAY_S32_F32
2968   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2969                     Int32Regs:$b, Int32Regs:$a),
2970               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2971               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2972               "[$t, \\{$l, $x\\}];",
2973               []>;
2974 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2975   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2976                     Int32Regs:$b, Int32Regs:$a),
2977               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2978                    Float32Regs:$lod),
2979               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2980               "[$t, \\{$l, $x\\}], $lod;",
2981               []>;
2982 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2983   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2984                     Int32Regs:$b, Int32Regs:$a),
2985               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2986                    Float32Regs:$gradx, Float32Regs:$grady),
2987               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2988               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2989               []>;
2990 def TEX_UNIFIED_1D_ARRAY_U32_S32
2991   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2992                     Int32Regs:$b, Int32Regs:$a),
2993               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2994               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2995               "[$t, \\{$l, $x\\}];",
2996               []>;
2997 def TEX_UNIFIED_1D_ARRAY_U32_F32
2998   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2999                     Int32Regs:$b, Int32Regs:$a),
3000               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3001               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3002               "[$t, \\{$l, $x\\}];",
3003               []>;
3004 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3005   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3006                     Int32Regs:$b, Int32Regs:$a),
3007               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3008                    Float32Regs:$lod),
3009               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3010               "[$t, \\{$l, $x\\}], $lod;",
3011               []>;
3012 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3013   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3014                     Int32Regs:$b, Int32Regs:$a),
3015               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3016                    Float32Regs:$gradx, Float32Regs:$grady),
3017               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3018               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3019               []>;
3020
3021 def TEX_UNIFIED_2D_F32_S32
3022   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3023                     Float32Regs:$b, Float32Regs:$a),
3024               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3025               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3026               "[$t, \\{$x, $y\\}];",
3027               []>;
3028 def TEX_UNIFIED_2D_F32_F32
3029   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3030                     Float32Regs:$b, Float32Regs:$a),
3031               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3032               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3033               "[$t, \\{$x, $y\\}];",
3034               []>;
3035 def TEX_UNIFIED_2D_F32_F32_LEVEL
3036   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3037                     Float32Regs:$b, Float32Regs:$a),
3038               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3039                    Float32Regs:$lod),
3040               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3041               "[$t, \\{$x, $y\\}], $lod;",
3042               []>;
3043 def TEX_UNIFIED_2D_F32_F32_GRAD
3044   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3045                     Float32Regs:$b, Float32Regs:$a),
3046               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3047                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3048                    Float32Regs:$grady0, Float32Regs:$grady1),
3049               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3050               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3051               "\\{$grady0, $grady1\\};",
3052               []>;
3053 def TEX_UNIFIED_2D_S32_S32
3054   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3055                     Int32Regs:$b, Int32Regs:$a),
3056               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3057               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3058               "[$t, \\{$x, $y\\}];",
3059               []>;
3060 def TEX_UNIFIED_2D_S32_F32
3061   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3062                     Int32Regs:$b, Int32Regs:$a),
3063               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3064               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3065               "[$t, \\{$x, $y\\}];",
3066               []>;
3067 def TEX_UNIFIED_2D_S32_F32_LEVEL
3068   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3069                     Int32Regs:$b, Int32Regs:$a),
3070               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3071                    Float32Regs:$lod),
3072               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3073               "[$t, \\{$x, $y\\}], $lod;",
3074               []>;
3075 def TEX_UNIFIED_2D_S32_F32_GRAD
3076   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3077                     Int32Regs:$b, Int32Regs:$a),
3078               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3079                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3080                    Float32Regs:$grady0, Float32Regs:$grady1),
3081               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3082               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3083               "\\{$grady0, $grady1\\};",
3084               []>;
3085 def TEX_UNIFIED_2D_U32_S32
3086   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3087                     Int32Regs:$b, Int32Regs:$a),
3088               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3089               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3090               "[$t, \\{$x, $y\\}];",
3091               []>;
3092 def TEX_UNIFIED_2D_U32_F32
3093   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3094                     Int32Regs:$b, Int32Regs:$a),
3095               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3096               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3097               "[$t, \\{$x, $y\\}];",
3098               []>;
3099 def TEX_UNIFIED_2D_U32_F32_LEVEL
3100   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3101                     Int32Regs:$b, Int32Regs:$a),
3102               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3103                    Float32Regs:$lod),
3104               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3105               "[$t, \\{$x, $y\\}], $lod;",
3106               []>;
3107 def TEX_UNIFIED_2D_U32_F32_GRAD
3108   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3109                     Int32Regs:$b, Int32Regs:$a),
3110               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3111                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3112                    Float32Regs:$grady0, Float32Regs:$grady1),
3113               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3114               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3115               "\\{$grady0, $grady1\\};",
3116               []>;
3117
3118 def TEX_UNIFIED_2D_ARRAY_F32_S32
3119   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3120                     Float32Regs:$b, Float32Regs:$a),
3121               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3122                    Int32Regs:$y),
3123               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3124               "[$t, \\{$l, $x, $y, $y\\}];",
3125               []>;
3126 def TEX_UNIFIED_2D_ARRAY_F32_F32
3127   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3128                     Float32Regs:$b, Float32Regs:$a),
3129               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3130                    Float32Regs:$y),
3131               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3132               "[$t, \\{$l, $x, $y, $y\\}];",
3133               []>;
3134 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3135   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3136                     Float32Regs:$b, Float32Regs:$a),
3137               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3138                    Float32Regs:$y, Float32Regs:$lod),
3139               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3140               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3141               []>;
3142 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3143   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3144                     Float32Regs:$b, Float32Regs:$a),
3145               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3146                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3147                    Float32Regs:$grady0, Float32Regs:$grady1),
3148               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3149               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3150               "\\{$grady0, $grady1\\};",
3151               []>;
3152 def TEX_UNIFIED_2D_ARRAY_S32_S32
3153   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3154                     Int32Regs:$b, Int32Regs:$a),
3155               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3156                    Int32Regs:$y),
3157               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3158               "[$t, \\{$l, $x, $y, $y\\}];",
3159               []>;
3160 def TEX_UNIFIED_2D_ARRAY_S32_F32
3161   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3162                     Int32Regs:$b, Int32Regs:$a),
3163               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3164                    Float32Regs:$y),
3165               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3166               "[$t, \\{$l, $x, $y, $y\\}];",
3167               []>;
3168 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3169   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3170                     Int32Regs:$b, Int32Regs:$a),
3171               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3172                    Float32Regs:$y, Float32Regs:$lod),
3173               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3174               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3175               []>;
3176 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3177   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3178                     Int32Regs:$b, Int32Regs:$a),
3179               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3180                    Float32Regs:$y,
3181                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3182                    Float32Regs:$grady0, Float32Regs:$grady1),
3183               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3184               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3185               "\\{$grady0, $grady1\\};",
3186               []>;
3187 def TEX_UNIFIED_2D_ARRAY_U32_S32
3188   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3189                     Int32Regs:$b, Int32Regs:$a),
3190               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3191                    Int32Regs:$y),
3192               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3193               "[$t, \\{$l, $x, $y, $y\\}];",
3194               []>;
3195 def TEX_UNIFIED_2D_ARRAY_U32_F32
3196   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3197                     Int32Regs:$b, Int32Regs:$a),
3198               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3199                    Float32Regs:$y),
3200               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3201               "[$t, \\{$l, $x, $y, $y\\}];",
3202               []>;
3203 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3204   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3205                     Int32Regs:$b, Int32Regs:$a),
3206               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3207                    Float32Regs:$y, Float32Regs:$lod),
3208               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3209               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3210               []>;
3211 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3212   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3213                     Int32Regs:$b, Int32Regs:$a),
3214               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3215                    Float32Regs:$y,
3216                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3217                    Float32Regs:$grady0, Float32Regs:$grady1),
3218               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3219               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3220               "\\{$grady0, $grady1\\};",
3221               []>;
3222
3223 def TEX_UNIFIED_3D_F32_S32
3224   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3225                     Float32Regs:$b, Float32Regs:$a),
3226               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3227                    Int32Regs:$z),
3228               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3229               "[$t, \\{$x, $y, $z, $z\\}];",
3230               []>;
3231 def TEX_UNIFIED_3D_F32_F32
3232   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3233                     Float32Regs:$b, Float32Regs:$a),
3234               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3235                    Float32Regs:$z),
3236               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3237               "[$t, \\{$x, $y, $z, $z\\}];",
3238               []>;
3239 def TEX_UNIFIED_3D_F32_F32_LEVEL
3240   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3241                     Float32Regs:$b, Float32Regs:$a),
3242               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3243                    Float32Regs:$z, Float32Regs:$lod),
3244               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3245               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3246               []>;
3247 def TEX_UNIFIED_3D_F32_F32_GRAD
3248   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3249                     Float32Regs:$b, Float32Regs:$a),
3250               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3251                    Float32Regs:$z,
3252                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3253                    Float32Regs:$gradx2, Float32Regs:$grady0,
3254                    Float32Regs:$grady1, Float32Regs:$grady2),
3255               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3256               "[$t, \\{$x, $y, $z, $z\\}], "
3257               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3258               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3259               []>;
3260 def TEX_UNIFIED_3D_S32_S32
3261   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3262                     Int32Regs:$b, Int32Regs:$a),
3263               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3264                    Int32Regs:$z),
3265               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3266               "[$t, \\{$x, $y, $z, $z\\}];",
3267               []>;
3268 def TEX_UNIFIED_3D_S32_F32
3269   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3270                     Int32Regs:$b, Int32Regs:$a),
3271               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3272                    Float32Regs:$z),
3273               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3274               "[$t, \\{$x, $y, $z, $z\\}];",
3275               []>;
3276 def TEX_UNIFIED_3D_S32_F32_LEVEL
3277   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3278                     Int32Regs:$b, Int32Regs:$a),
3279               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3280                    Float32Regs:$z, Float32Regs:$lod),
3281               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3282               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3283               []>;
3284 def TEX_UNIFIED_3D_S32_F32_GRAD
3285   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3286                     Int32Regs:$b, Int32Regs:$a),
3287               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3288                    Float32Regs:$z,
3289                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3290                    Float32Regs:$gradx2, Float32Regs:$grady0,
3291                    Float32Regs:$grady1, Float32Regs:$grady2),
3292               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3293               "[$t, \\{$x, $y, $z, $z\\}], "
3294               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3295               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3296               []>;
3297 def TEX_UNIFIED_3D_U32_S32
3298   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3299                     Int32Regs:$b, Int32Regs:$a),
3300               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3301                    Int32Regs:$z),
3302               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3303               "[$t, \\{$x, $y, $z, $z\\}];",
3304               []>;
3305 def TEX_UNIFIED_3D_U32_F32
3306   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3307                     Int32Regs:$b, Int32Regs:$a),
3308               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3309                    Float32Regs:$z),
3310               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3311               "[$t, \\{$x, $y, $z, $z\\}];",
3312               []>;
3313 def TEX_UNIFIED_3D_U32_F32_LEVEL
3314   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3315                     Int32Regs:$b, Int32Regs:$a),
3316               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3317                    Float32Regs:$z, Float32Regs:$lod),
3318               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3319               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3320               []>;
3321 def TEX_UNIFIED_3D_U32_F32_GRAD
3322   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3323                     Int32Regs:$b, Int32Regs:$a),
3324               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3325                    Float32Regs:$z,
3326                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3327                    Float32Regs:$gradx2, Float32Regs:$grady0,
3328                    Float32Regs:$grady1, Float32Regs:$grady2),
3329               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3330               "[$t, \\{$x, $y, $z, $z\\}], "
3331               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3332               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3333               []>;
3334
3335 def TEX_UNIFIED_CUBE_F32_F32
3336   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3337                     Float32Regs:$b, Float32Regs:$a),
3338               (ins Int64Regs:$t,
3339                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3340               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3341               "[$t, \\{$x, $y, $z, $z\\}];",
3342               []>;
3343 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3344   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3345                     Float32Regs:$b, Float32Regs:$a),
3346               (ins Int64Regs:$t,
3347                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3348                    Float32Regs:$lod),
3349               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3350               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3351               []>;
3352 def TEX_UNIFIED_CUBE_S32_F32
3353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3354                     Int32Regs:$b, Int32Regs:$a),
3355               (ins Int64Regs:$t,
3356                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3357               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3358               "[$t, \\{$x, $y, $z, $z\\}];",
3359               []>;
3360 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3361   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3362                     Int32Regs:$b, Int32Regs:$a),
3363               (ins Int64Regs:$t,
3364                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3365                    Float32Regs:$lod),
3366               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3367               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3368               []>;
3369 def TEX_UNIFIED_CUBE_U32_F32
3370   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3371                     Int32Regs:$b, Int32Regs:$a),
3372               (ins Int64Regs:$t,
3373                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3374               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3375               "[$t, \\{$x, $y, $z, $z\\}];",
3376               []>;
3377 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3378   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3379                     Int32Regs:$b, Int32Regs:$a),
3380               (ins Int64Regs:$t,
3381                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3382                    Float32Regs:$lod),
3383               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3384               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3385               []>;
3386
3387 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3388   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3389                     Float32Regs:$b, Float32Regs:$a),
3390               (ins Int64Regs:$t, Int32Regs:$l,
3391                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3392               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3393               "[$t, \\{$l, $x, $y, $z\\}];",
3394               []>;
3395 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3396   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3397                     Float32Regs:$b, Float32Regs:$a),
3398               (ins Int64Regs:$t, Int32Regs:$l,
3399                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3400                    Float32Regs:$lod),
3401               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3402               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3403               []>;
3404 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3405   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3406                     Int32Regs:$b, Int32Regs:$a),
3407               (ins Int64Regs:$t, Int32Regs:$l,
3408                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3409               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3410               "[$t, \\{$l, $x, $y, $z\\}];",
3411               []>;
3412 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3413   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3414                     Int32Regs:$b, Int32Regs:$a),
3415               (ins Int64Regs:$t, Int32Regs:$l,
3416                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3417                    Float32Regs:$lod),
3418               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3419               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3420               []>;
3421 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3422   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3423                     Int32Regs:$b, Int32Regs:$a),
3424               (ins Int64Regs:$t, Int32Regs:$l,
3425                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3426               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3427               "[$t, \\{$l, $x, $y, $z\\}];",
3428               []>;
3429 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3430   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3431                     Int32Regs:$b, Int32Regs:$a),
3432               (ins Int64Regs:$t, Int32Regs:$l,
3433                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3434                    Float32Regs:$lod),
3435               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3436               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3437               []>;
3438
3439 def TLD4_UNIFIED_R_2D_F32_F32
3440   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3441                     Float32Regs:$v2, Float32Regs:$v3),
3442               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3443               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3444               "[$t, \\{$x, $y\\}];",
3445               []>;
3446 def TLD4_UNIFIED_G_2D_F32_F32
3447   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3448                     Float32Regs:$v2, Float32Regs:$v3),
3449               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3450               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3451               "[$t, \\{$x, $y\\}];",
3452               []>;
3453 def TLD4_UNIFIED_B_2D_F32_F32
3454   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3455                     Float32Regs:$v2, Float32Regs:$v3),
3456               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3457               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3458               "[$t, \\{$x, $y\\}];",
3459               []>;
3460 def TLD4_UNIFIED_A_2D_F32_F32
3461   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3462                     Float32Regs:$v2, Float32Regs:$v3),
3463               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3464               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3465               "[$t, \\{$x, $y\\}];",
3466               []>;
3467 def TLD4_UNIFIED_R_2D_S32_F32
3468   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3469                     Int32Regs:$v2, Int32Regs:$v3),
3470               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3471               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3472               "[$t, \\{$x, $y\\}];",
3473               []>;
3474 def TLD4_UNIFIED_G_2D_S32_F32
3475   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3476                     Int32Regs:$v2, Int32Regs:$v3),
3477               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3478               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3479               "[$t, \\{$x, $y\\}];",
3480               []>;
3481 def TLD4_UNIFIED_B_2D_S32_F32
3482   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3483                     Int32Regs:$v2, Int32Regs:$v3),
3484               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3485               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3486               "[$t, \\{$x, $y\\}];",
3487               []>;
3488 def TLD4_UNIFIED_A_2D_S32_F32
3489   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3490                     Int32Regs:$v2, Int32Regs:$v3),
3491               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3492               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3493               "[$t, \\{$x, $y\\}];",
3494               []>;
3495 def TLD4_UNIFIED_R_2D_U32_F32
3496   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3497                     Int32Regs:$v2, Int32Regs:$v3),
3498               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3499               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3500               "[$t, \\{$x, $y\\}];",
3501               []>;
3502 def TLD4_UNIFIED_G_2D_U32_F32
3503   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3504                     Int32Regs:$v2, Int32Regs:$v3),
3505               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3506               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3507               "[$t, \\{$x, $y\\}];",
3508               []>;
3509 def TLD4_UNIFIED_B_2D_U32_F32
3510   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3511                     Int32Regs:$v2, Int32Regs:$v3),
3512               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3513               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3514               "[$t, \\{$x, $y\\}];",
3515               []>;
3516 def TLD4_UNIFIED_A_2D_U32_F32
3517   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3518                     Int32Regs:$v2, Int32Regs:$v3),
3519               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3520               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3521               "[$t, \\{$x, $y\\}];",
3522               []>;
3523 }
3524
3525
3526
3527 //=== Surface load instructions
3528 // .clamp variant
3529 let IsSuld = 1 in {
3530 def SULD_1D_I8_CLAMP
3531   : NVPTXInst<(outs Int16Regs:$r),
3532               (ins Int64Regs:$s, Int32Regs:$x),
3533               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3534               []>;
3535 def SULD_1D_I16_CLAMP
3536   : NVPTXInst<(outs Int16Regs:$r),
3537               (ins Int64Regs:$s, Int32Regs:$x),
3538               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3539               []>;
3540 def SULD_1D_I32_CLAMP
3541   : NVPTXInst<(outs Int32Regs:$r),
3542               (ins Int64Regs:$s, Int32Regs:$x),
3543               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3544               []>;
3545 def SULD_1D_I64_CLAMP
3546   : NVPTXInst<(outs Int64Regs:$r),
3547               (ins Int64Regs:$s, Int32Regs:$x),
3548               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3549               []>;
3550
3551 def SULD_1D_ARRAY_I8_CLAMP
3552   : NVPTXInst<(outs Int16Regs:$r),
3553               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3554               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3555               []>;
3556 def SULD_1D_ARRAY_I16_CLAMP
3557   : NVPTXInst<(outs Int16Regs:$r),
3558               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3559               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3560               []>;
3561 def SULD_1D_ARRAY_I32_CLAMP
3562   : NVPTXInst<(outs Int32Regs:$r),
3563               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3564               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3565               []>;
3566 def SULD_1D_ARRAY_I64_CLAMP
3567   : NVPTXInst<(outs Int64Regs:$r),
3568               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3569               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3570               []>;
3571
3572 def SULD_2D_I8_CLAMP
3573   : NVPTXInst<(outs Int16Regs:$r),
3574               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3575               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3576               []>;
3577 def SULD_2D_I16_CLAMP
3578   : NVPTXInst<(outs Int16Regs:$r),
3579               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3580               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3581               []>;
3582 def SULD_2D_I32_CLAMP
3583   : NVPTXInst<(outs Int32Regs:$r),
3584               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3585               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3586               []>;
3587 def SULD_2D_I64_CLAMP
3588   : NVPTXInst<(outs Int64Regs:$r),
3589               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3590               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3591               []>;
3592
3593 def SULD_2D_ARRAY_I8_CLAMP
3594   : NVPTXInst<(outs Int16Regs:$r),
3595               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3596               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3597               []>;
3598 def SULD_2D_ARRAY_I16_CLAMP
3599   : NVPTXInst<(outs Int16Regs:$r),
3600               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3601               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3602               []>;
3603 def SULD_2D_ARRAY_I32_CLAMP
3604   : NVPTXInst<(outs Int32Regs:$r),
3605               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3606               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3607               []>;
3608 def SULD_2D_ARRAY_I64_CLAMP
3609   : NVPTXInst<(outs Int64Regs:$r),
3610               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3611               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3612               []>;
3613
3614 def SULD_3D_I8_CLAMP
3615   : NVPTXInst<(outs Int16Regs:$r),
3616               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3617               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3618               []>;
3619 def SULD_3D_I16_CLAMP
3620   : NVPTXInst<(outs Int16Regs:$r),
3621               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3622               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3623               []>;
3624 def SULD_3D_I32_CLAMP
3625   : NVPTXInst<(outs Int32Regs:$r),
3626               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3627               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3628               []>;
3629 def SULD_3D_I64_CLAMP
3630   : NVPTXInst<(outs Int64Regs:$r),
3631               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3632               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3633               []>;
3634 }
3635
3636 let IsSuld = 2 in {
3637 def SULD_1D_V2I8_CLAMP
3638   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3639               (ins Int64Regs:$s, Int32Regs:$x),
3640               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3641               []>;
3642 def SULD_1D_V2I16_CLAMP
3643   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3644               (ins Int64Regs:$s, Int32Regs:$x),
3645               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3646               []>;
3647 def SULD_1D_V2I32_CLAMP
3648   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3649               (ins Int64Regs:$s, Int32Regs:$x),
3650               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3651               []>;
3652 def SULD_1D_V2I64_CLAMP
3653   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3654               (ins Int64Regs:$s, Int32Regs:$x),
3655               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3656               []>;
3657
3658 def SULD_1D_ARRAY_V2I8_CLAMP
3659   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3660               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3661               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3662               []>;
3663 def SULD_1D_ARRAY_V2I16_CLAMP
3664   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3665               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3666               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3667               []>;
3668 def SULD_1D_ARRAY_V2I32_CLAMP
3669   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3670               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3671               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3672               []>;
3673 def SULD_1D_ARRAY_V2I64_CLAMP
3674   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3675               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3676               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3677               []>;
3678
3679 def SULD_2D_V2I8_CLAMP
3680   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3681               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3682               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3683               []>;
3684 def SULD_2D_V2I16_CLAMP
3685   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3686               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3687               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3688               []>;
3689 def SULD_2D_V2I32_CLAMP
3690   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3691               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3692               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3693               []>;
3694 def SULD_2D_V2I64_CLAMP
3695   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3696               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3697               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3698               []>;
3699
3700 def SULD_2D_ARRAY_V2I8_CLAMP
3701   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3702               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3703               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3704               "[$s, \\{$l, $x, $y, $y\\}];",
3705               []>;
3706 def SULD_2D_ARRAY_V2I16_CLAMP
3707   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3708               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3709               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3710               "[$s, \\{$l, $x, $y, $y\\}];",
3711               []>;
3712 def SULD_2D_ARRAY_V2I32_CLAMP
3713   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3714               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3715               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3716               "[$s, \\{$l, $x, $y, $y\\}];",
3717               []>;
3718 def SULD_2D_ARRAY_V2I64_CLAMP
3719   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3720               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3721               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3722               "[$s, \\{$l, $x, $y, $y\\}];",
3723               []>;
3724
3725 def SULD_3D_V2I8_CLAMP
3726   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3727               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3728               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3729               []>;
3730 def SULD_3D_V2I16_CLAMP
3731   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3732               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3733               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3734               []>;
3735 def SULD_3D_V2I32_CLAMP
3736   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3737               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3738               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3739               []>;
3740 def SULD_3D_V2I64_CLAMP
3741   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3742               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3743               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3744               []>;
3745 }
3746
3747 let IsSuld = 3 in {
3748 def SULD_1D_V4I8_CLAMP
3749   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3750               (ins Int64Regs:$s, Int32Regs:$x),
3751               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3752               []>;
3753 def SULD_1D_V4I16_CLAMP
3754   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3755               (ins Int64Regs:$s, Int32Regs:$x),
3756               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3757               []>;
3758 def SULD_1D_V4I32_CLAMP
3759   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3760               (ins Int64Regs:$s, Int32Regs:$x),
3761               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3762               []>;
3763
3764 def SULD_1D_ARRAY_V4I8_CLAMP
3765   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3766               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3767               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3768               "[$s, \\{$l, $x\\}];",
3769               []>;
3770 def SULD_1D_ARRAY_V4I16_CLAMP
3771   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3772               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3773               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3774               "[$s, \\{$l, $x\\}];",
3775               []>;
3776 def SULD_1D_ARRAY_V4I32_CLAMP
3777   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3778               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3779               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3780               "[$s, \\{$l, $x\\}];",
3781               []>;
3782
3783 def SULD_2D_V4I8_CLAMP
3784   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3785               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3786               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3787               []>;
3788 def SULD_2D_V4I16_CLAMP
3789   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3790               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3791               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3792               []>;
3793 def SULD_2D_V4I32_CLAMP
3794   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3795               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3796               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3797               []>;
3798
3799 def SULD_2D_ARRAY_V4I8_CLAMP
3800   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3801               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3802               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3803               "[$s, \\{$l, $x, $y, $y\\}];",
3804               []>;
3805 def SULD_2D_ARRAY_V4I16_CLAMP
3806   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3807               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3808               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3809               "[$s, \\{$l, $x, $y, $y\\}];",
3810               []>;
3811 def SULD_2D_ARRAY_V4I32_CLAMP
3812   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3813               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3814               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3815               "[$s, \\{$l, $x, $y, $y\\}];",
3816               []>;
3817
3818
3819 def SULD_3D_V4I8_CLAMP
3820   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3821               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3822               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3823               "[$s, \\{$x, $y, $z, $z\\}];",
3824               []>;
3825 def SULD_3D_V4I16_CLAMP
3826   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3827               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3828               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3829               "[$s, \\{$x, $y, $z, $z\\}];",
3830               []>;
3831 def SULD_3D_V4I32_CLAMP
3832   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3833               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3834               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3835               "[$s, \\{$x, $y, $z, $z\\}];",
3836               []>;
3837 }
3838
3839
3840 // .trap variant
3841 let IsSuld = 1 in {
3842 def SULD_1D_I8_TRAP
3843   : NVPTXInst<(outs Int16Regs:$r),
3844               (ins Int64Regs:$s, Int32Regs:$x),
3845               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3846               []>;
3847 def SULD_1D_I16_TRAP
3848   : NVPTXInst<(outs Int16Regs:$r),
3849               (ins Int64Regs:$s, Int32Regs:$x),
3850               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3851               []>;
3852 def SULD_1D_I32_TRAP
3853   : NVPTXInst<(outs Int32Regs:$r),
3854               (ins Int64Regs:$s, Int32Regs:$x),
3855               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3856               []>;
3857 def SULD_1D_I64_TRAP
3858   : NVPTXInst<(outs Int64Regs:$r),
3859               (ins Int64Regs:$s, Int32Regs:$x),
3860               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3861               []>;
3862
3863 def SULD_1D_ARRAY_I8_TRAP
3864   : NVPTXInst<(outs Int16Regs:$r),
3865               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3866               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3867               []>;
3868 def SULD_1D_ARRAY_I16_TRAP
3869   : NVPTXInst<(outs Int16Regs:$r),
3870               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3871               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3872               []>;
3873 def SULD_1D_ARRAY_I32_TRAP
3874   : NVPTXInst<(outs Int32Regs:$r),
3875               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3876               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3877               []>;
3878 def SULD_1D_ARRAY_I64_TRAP
3879   : NVPTXInst<(outs Int64Regs:$r),
3880               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3881               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3882               []>;
3883
3884 def SULD_2D_I8_TRAP
3885   : NVPTXInst<(outs Int16Regs:$r),
3886               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3887               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3888               []>;
3889 def SULD_2D_I16_TRAP
3890   : NVPTXInst<(outs Int16Regs:$r),
3891               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3892               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3893               []>;
3894 def SULD_2D_I32_TRAP
3895   : NVPTXInst<(outs Int32Regs:$r),
3896               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3897               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3898               []>;
3899 def SULD_2D_I64_TRAP
3900   : NVPTXInst<(outs Int64Regs:$r),
3901               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3902               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3903               []>;
3904
3905 def SULD_2D_ARRAY_I8_TRAP
3906   : NVPTXInst<(outs Int16Regs:$r),
3907               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3908               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3909               []>;
3910 def SULD_2D_ARRAY_I16_TRAP
3911   : NVPTXInst<(outs Int16Regs:$r),
3912               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3913               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3914               []>;
3915 def SULD_2D_ARRAY_I32_TRAP
3916   : NVPTXInst<(outs Int32Regs:$r),
3917               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3918               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3919               []>;
3920 def SULD_2D_ARRAY_I64_TRAP
3921   : NVPTXInst<(outs Int64Regs:$r),
3922               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3923               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3924               []>;
3925
3926 def SULD_3D_I8_TRAP
3927   : NVPTXInst<(outs Int16Regs:$r),
3928               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3929               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3930               []>;
3931 def SULD_3D_I16_TRAP
3932   : NVPTXInst<(outs Int16Regs:$r),
3933               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3934               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3935               []>;
3936 def SULD_3D_I32_TRAP
3937   : NVPTXInst<(outs Int32Regs:$r),
3938               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3939               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3940               []>;
3941 def SULD_3D_I64_TRAP
3942   : NVPTXInst<(outs Int64Regs:$r),
3943               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3944               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3945               []>;
3946 }
3947
3948 let IsSuld = 2 in {
3949 def SULD_1D_V2I8_TRAP
3950   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3951               (ins Int64Regs:$s, Int32Regs:$x),
3952               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3953               []>;
3954 def SULD_1D_V2I16_TRAP
3955   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3956               (ins Int64Regs:$s, Int32Regs:$x),
3957               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3958               []>;
3959 def SULD_1D_V2I32_TRAP
3960   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3961               (ins Int64Regs:$s, Int32Regs:$x),
3962               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3963               []>;
3964 def SULD_1D_V2I64_TRAP
3965   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3966               (ins Int64Regs:$s, Int32Regs:$x),
3967               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3968               []>;
3969
3970 def SULD_1D_ARRAY_V2I8_TRAP
3971   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3972               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3973               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3974               []>;
3975 def SULD_1D_ARRAY_V2I16_TRAP
3976   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3977               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3978               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3979               []>;
3980 def SULD_1D_ARRAY_V2I32_TRAP
3981   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3982               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3983               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3984               []>;
3985 def SULD_1D_ARRAY_V2I64_TRAP
3986   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3987               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3988               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3989               []>;
3990
3991 def SULD_2D_V2I8_TRAP
3992   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3993               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3994               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3995               []>;
3996 def SULD_2D_V2I16_TRAP
3997   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3998               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3999               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4000               []>;
4001 def SULD_2D_V2I32_TRAP
4002   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4003               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4004               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4005               []>;
4006 def SULD_2D_V2I64_TRAP
4007   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4008               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4009               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4010               []>;
4011
4012 def SULD_2D_ARRAY_V2I8_TRAP
4013   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4014               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4015               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4016               "[$s, \\{$l, $x, $y, $y\\}];",
4017               []>;
4018 def SULD_2D_ARRAY_V2I16_TRAP
4019   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4020               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4021               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4022               "[$s, \\{$l, $x, $y, $y\\}];",
4023               []>;
4024 def SULD_2D_ARRAY_V2I32_TRAP
4025   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4026               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4027               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4028               "[$s, \\{$l, $x, $y, $y\\}];",
4029               []>;
4030 def SULD_2D_ARRAY_V2I64_TRAP
4031   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4032               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4033               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4034               "[$s, \\{$l, $x, $y, $y\\}];",
4035               []>;
4036
4037 def SULD_3D_V2I8_TRAP
4038   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4039               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4040               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4041               []>;
4042 def SULD_3D_V2I16_TRAP
4043   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4044               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4045               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4046               []>;
4047 def SULD_3D_V2I32_TRAP
4048   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4049               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4050               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4051               []>;
4052 def SULD_3D_V2I64_TRAP
4053   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4054               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4055               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4056               []>;
4057 }
4058
4059 let IsSuld = 3 in {
4060 def SULD_1D_V4I8_TRAP
4061   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4062               (ins Int64Regs:$s, Int32Regs:$x),
4063               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4064               []>;
4065 def SULD_1D_V4I16_TRAP
4066   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4067               (ins Int64Regs:$s, Int32Regs:$x),
4068               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4069               []>;
4070 def SULD_1D_V4I32_TRAP
4071   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4072               (ins Int64Regs:$s, Int32Regs:$x),
4073               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4074               []>;
4075
4076 def SULD_1D_ARRAY_V4I8_TRAP
4077   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4078               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4079               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4080               "[$s, \\{$l, $x\\}];",
4081               []>;
4082 def SULD_1D_ARRAY_V4I16_TRAP
4083   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4084               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4085               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4086               "[$s, \\{$l, $x\\}];",
4087               []>;
4088 def SULD_1D_ARRAY_V4I32_TRAP
4089   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4090               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4091               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4092               "[$s, \\{$l, $x\\}];",
4093               []>;
4094
4095 def SULD_2D_V4I8_TRAP
4096   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4097               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4098               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4099               []>;
4100 def SULD_2D_V4I16_TRAP
4101   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4102               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4103               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4104               []>;
4105 def SULD_2D_V4I32_TRAP
4106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4107               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4108               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4109               []>;
4110
4111 def SULD_2D_ARRAY_V4I8_TRAP
4112   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4113               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4114               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4115               "[$s, \\{$l, $x, $y, $y\\}];",
4116               []>;
4117 def SULD_2D_ARRAY_V4I16_TRAP
4118   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4119               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4120               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4121               "[$s, \\{$l, $x, $y, $y\\}];",
4122               []>;
4123 def SULD_2D_ARRAY_V4I32_TRAP
4124   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4125               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4126               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4127               "[$s, \\{$l, $x, $y, $y\\}];",
4128               []>;
4129
4130
4131 def SULD_3D_V4I8_TRAP
4132   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4133               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4134               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4135               "[$s, \\{$x, $y, $z, $z\\}];",
4136               []>;
4137 def SULD_3D_V4I16_TRAP
4138   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4139               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4140               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4141               "[$s, \\{$x, $y, $z, $z\\}];",
4142               []>;
4143 def SULD_3D_V4I32_TRAP
4144   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4145               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4146               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4147               "[$s, \\{$x, $y, $z, $z\\}];",
4148               []>;
4149 }
4150
4151 // .zero variant
4152 let IsSuld = 1 in {
4153 def SULD_1D_I8_ZERO
4154   : NVPTXInst<(outs Int16Regs:$r),
4155               (ins Int64Regs:$s, Int32Regs:$x),
4156               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4157               []>;
4158 def SULD_1D_I16_ZERO
4159   : NVPTXInst<(outs Int16Regs:$r),
4160               (ins Int64Regs:$s, Int32Regs:$x),
4161               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4162               []>;
4163 def SULD_1D_I32_ZERO
4164   : NVPTXInst<(outs Int32Regs:$r),
4165               (ins Int64Regs:$s, Int32Regs:$x),
4166               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4167               []>;
4168 def SULD_1D_I64_ZERO
4169   : NVPTXInst<(outs Int64Regs:$r),
4170               (ins Int64Regs:$s, Int32Regs:$x),
4171               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4172               []>;
4173
4174 def SULD_1D_ARRAY_I8_ZERO
4175   : NVPTXInst<(outs Int16Regs:$r),
4176               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4177               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4178               []>;
4179 def SULD_1D_ARRAY_I16_ZERO
4180   : NVPTXInst<(outs Int16Regs:$r),
4181               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4182               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4183               []>;
4184 def SULD_1D_ARRAY_I32_ZERO
4185   : NVPTXInst<(outs Int32Regs:$r),
4186               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4187               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4188               []>;
4189 def SULD_1D_ARRAY_I64_ZERO
4190   : NVPTXInst<(outs Int64Regs:$r),
4191               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4192               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4193               []>;
4194
4195 def SULD_2D_I8_ZERO
4196   : NVPTXInst<(outs Int16Regs:$r),
4197               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4198               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4199               []>;
4200 def SULD_2D_I16_ZERO
4201   : NVPTXInst<(outs Int16Regs:$r),
4202               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4203               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4204               []>;
4205 def SULD_2D_I32_ZERO
4206   : NVPTXInst<(outs Int32Regs:$r),
4207               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4208               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4209               []>;
4210 def SULD_2D_I64_ZERO
4211   : NVPTXInst<(outs Int64Regs:$r),
4212               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4213               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4214               []>;
4215
4216 def SULD_2D_ARRAY_I8_ZERO
4217   : NVPTXInst<(outs Int16Regs:$r),
4218               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4219               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4220               []>;
4221 def SULD_2D_ARRAY_I16_ZERO
4222   : NVPTXInst<(outs Int16Regs:$r),
4223               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4224               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4225               []>;
4226 def SULD_2D_ARRAY_I32_ZERO
4227   : NVPTXInst<(outs Int32Regs:$r),
4228               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4229               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4230               []>;
4231 def SULD_2D_ARRAY_I64_ZERO
4232   : NVPTXInst<(outs Int64Regs:$r),
4233               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4234               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4235               []>;
4236
4237 def SULD_3D_I8_ZERO
4238   : NVPTXInst<(outs Int16Regs:$r),
4239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4240               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4241               []>;
4242 def SULD_3D_I16_ZERO
4243   : NVPTXInst<(outs Int16Regs:$r),
4244               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4245               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4246               []>;
4247 def SULD_3D_I32_ZERO
4248   : NVPTXInst<(outs Int32Regs:$r),
4249               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4250               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4251               []>;
4252 def SULD_3D_I64_ZERO
4253   : NVPTXInst<(outs Int64Regs:$r),
4254               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4255               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4256               []>;
4257 }
4258
4259 let IsSuld = 2 in {
4260 def SULD_1D_V2I8_ZERO
4261   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4262               (ins Int64Regs:$s, Int32Regs:$x),
4263               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4264               []>;
4265 def SULD_1D_V2I16_ZERO
4266   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4267               (ins Int64Regs:$s, Int32Regs:$x),
4268               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4269               []>;
4270 def SULD_1D_V2I32_ZERO
4271   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4272               (ins Int64Regs:$s, Int32Regs:$x),
4273               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4274               []>;
4275 def SULD_1D_V2I64_ZERO
4276   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4277               (ins Int64Regs:$s, Int32Regs:$x),
4278               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4279               []>;
4280
4281 def SULD_1D_ARRAY_V2I8_ZERO
4282   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4283               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4284               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4285               []>;
4286 def SULD_1D_ARRAY_V2I16_ZERO
4287   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4288               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4289               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4290               []>;
4291 def SULD_1D_ARRAY_V2I32_ZERO
4292   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4293               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4294               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4295               []>;
4296 def SULD_1D_ARRAY_V2I64_ZERO
4297   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4298               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4299               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4300               []>;
4301
4302 def SULD_2D_V2I8_ZERO
4303   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4304               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4305               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4306               []>;
4307 def SULD_2D_V2I16_ZERO
4308   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4309               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4310               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4311               []>;
4312 def SULD_2D_V2I32_ZERO
4313   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4314               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4315               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4316               []>;
4317 def SULD_2D_V2I64_ZERO
4318   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4319               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4320               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4321               []>;
4322
4323 def SULD_2D_ARRAY_V2I8_ZERO
4324   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4325               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4326               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4327               "[$s, \\{$l, $x, $y, $y\\}];",
4328               []>;
4329 def SULD_2D_ARRAY_V2I16_ZERO
4330   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4331               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4332               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4333               "[$s, \\{$l, $x, $y, $y\\}];",
4334               []>;
4335 def SULD_2D_ARRAY_V2I32_ZERO
4336   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4337               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4338               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4339               "[$s, \\{$l, $x, $y, $y\\}];",
4340               []>;
4341 def SULD_2D_ARRAY_V2I64_ZERO
4342   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4343               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4344               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4345               "[$s, \\{$l, $x, $y, $y\\}];",
4346               []>;
4347
4348 def SULD_3D_V2I8_ZERO
4349   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4350               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4351               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4352               []>;
4353 def SULD_3D_V2I16_ZERO
4354   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4355               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4356               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4357               []>;
4358 def SULD_3D_V2I32_ZERO
4359   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4360               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4361               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4362               []>;
4363 def SULD_3D_V2I64_ZERO
4364   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4365               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4366               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4367               []>;
4368 }
4369
4370 let IsSuld = 3 in {
4371 def SULD_1D_V4I8_ZERO
4372   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4373               (ins Int64Regs:$s, Int32Regs:$x),
4374               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4375               []>;
4376 def SULD_1D_V4I16_ZERO
4377   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4378               (ins Int64Regs:$s, Int32Regs:$x),
4379               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4380               []>;
4381 def SULD_1D_V4I32_ZERO
4382   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4383               (ins Int64Regs:$s, Int32Regs:$x),
4384               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4385               []>;
4386
4387 def SULD_1D_ARRAY_V4I8_ZERO
4388   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4389               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4390               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4391               "[$s, \\{$l, $x\\}];",
4392               []>;
4393 def SULD_1D_ARRAY_V4I16_ZERO
4394   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4395               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4396               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4397               "[$s, \\{$l, $x\\}];",
4398               []>;
4399 def SULD_1D_ARRAY_V4I32_ZERO
4400   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4401               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4402               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4403               "[$s, \\{$l, $x\\}];",
4404               []>;
4405
4406 def SULD_2D_V4I8_ZERO
4407   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4408               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4409               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4410               []>;
4411 def SULD_2D_V4I16_ZERO
4412   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4413               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4414               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4415               []>;
4416 def SULD_2D_V4I32_ZERO
4417   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4418               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4419               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4420               []>;
4421
4422 def SULD_2D_ARRAY_V4I8_ZERO
4423   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4424               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4425               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4426               "[$s, \\{$l, $x, $y, $y\\}];",
4427               []>;
4428 def SULD_2D_ARRAY_V4I16_ZERO
4429   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4430               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4431               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4432               "[$s, \\{$l, $x, $y, $y\\}];",
4433               []>;
4434 def SULD_2D_ARRAY_V4I32_ZERO
4435   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4436               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4437               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4438               "[$s, \\{$l, $x, $y, $y\\}];",
4439               []>;
4440
4441
4442 def SULD_3D_V4I8_ZERO
4443   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4444               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4446               "[$s, \\{$x, $y, $z, $z\\}];",
4447               []>;
4448 def SULD_3D_V4I16_ZERO
4449   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4450               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4451               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4452               "[$s, \\{$x, $y, $z, $z\\}];",
4453               []>;
4454 def SULD_3D_V4I32_ZERO
4455   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4456               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4457               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4458               "[$s, \\{$x, $y, $z, $z\\}];",
4459               []>;
4460 }
4461
4462 //-----------------------------------
4463 // Texture Query Intrinsics
4464 //-----------------------------------
4465
4466 let IsSurfTexQuery = 1 in {
4467 def TXQ_CHANNEL_ORDER
4468   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4469               "txq.channel_order.b32 \t$d, [$a];",
4470               []>;
4471 def TXQ_CHANNEL_DATA_TYPE
4472   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4473               "txq.channel_data_type.b32 \t$d, [$a];",
4474               []>;
4475 def TXQ_WIDTH
4476   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4477               "txq.width.b32 \t$d, [$a];",
4478               []>;
4479 def TXQ_HEIGHT
4480   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4481               "txq.height.b32 \t$d, [$a];",
4482               []>;
4483 def TXQ_DEPTH
4484   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4485               "txq.depth.b32 \t$d, [$a];",
4486               []>;
4487 def TXQ_ARRAY_SIZE
4488   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4489               "txq.array_size.b32 \t$d, [$a];",
4490               []>;
4491 def TXQ_NUM_SAMPLES
4492   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4493               "txq.num_samples.b32 \t$d, [$a];",
4494               []>;
4495 def TXQ_NUM_MIPMAP_LEVELS
4496   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4497               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4498               []>;
4499 }
4500
4501 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4502           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4503 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4504           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4505 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4506           (TXQ_WIDTH Int64Regs:$a)>;
4507 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4508           (TXQ_HEIGHT Int64Regs:$a)>;
4509 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4510           (TXQ_DEPTH Int64Regs:$a)>;
4511 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4512           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4513 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4514           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4515 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4516           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4517
4518
4519 //-----------------------------------
4520 // Surface Query Intrinsics
4521 //-----------------------------------
4522
4523 let IsSurfTexQuery = 1 in {
4524 def SUQ_CHANNEL_ORDER
4525   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4526               "suq.channel_order.b32 \t$d, [$a];",
4527               []>;
4528 def SUQ_CHANNEL_DATA_TYPE
4529   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4530               "suq.channel_data_type.b32 \t$d, [$a];",
4531               []>;
4532 def SUQ_WIDTH
4533   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4534               "suq.width.b32 \t$d, [$a];",
4535               []>;
4536 def SUQ_HEIGHT
4537   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4538               "suq.height.b32 \t$d, [$a];",
4539               []>;
4540 def SUQ_DEPTH
4541   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4542               "suq.depth.b32 \t$d, [$a];",
4543               []>;
4544 def SUQ_ARRAY_SIZE
4545   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4546               "suq.array_size.b32 \t$d, [$a];",
4547               []>;
4548 }
4549
4550 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4551           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4552 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4553           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4554 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4555           (SUQ_WIDTH Int64Regs:$a)>;
4556 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4557           (SUQ_HEIGHT Int64Regs:$a)>;
4558 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4559           (SUQ_DEPTH Int64Regs:$a)>;
4560 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4561           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4562
4563
4564 //===- Handle Query -------------------------------------------------------===//
4565
4566 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4567 def ISTYPEP_SAMPLER
4568   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4569               "istypep.samplerref \t$d, $a;",
4570               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4571 def ISTYPEP_SURFACE
4572   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4573               "istypep.surfref \t$d, $a;",
4574               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4575 def ISTYPEP_TEXTURE
4576   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4577               "istypep.texref \t$d, $a;",
4578               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4579
4580 //===- Surface Stores -----------------------------------------------------===//
4581
4582 let IsSust = 1 in {
4583 // Unformatted
4584 // .clamp variant
4585 def SUST_B_1D_B8_CLAMP
4586   : NVPTXInst<(outs),
4587               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4588               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4589               []>;
4590 def SUST_B_1D_B16_CLAMP
4591   : NVPTXInst<(outs),
4592               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4593               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4594               []>;
4595 def SUST_B_1D_B32_CLAMP
4596   : NVPTXInst<(outs),
4597               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4598               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4599               []>;
4600 def SUST_B_1D_B64_CLAMP
4601   : NVPTXInst<(outs),
4602               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4603               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4604               []>;
4605 def SUST_B_1D_V2B8_CLAMP
4606   : NVPTXInst<(outs),
4607               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4608               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4609               []>;
4610 def SUST_B_1D_V2B16_CLAMP
4611   : NVPTXInst<(outs),
4612               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4613               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4614               []>;
4615 def SUST_B_1D_V2B32_CLAMP
4616   : NVPTXInst<(outs),
4617               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4618               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4619               []>;
4620 def SUST_B_1D_V2B64_CLAMP
4621   : NVPTXInst<(outs),
4622               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4623               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4624               []>;
4625 def SUST_B_1D_V4B8_CLAMP
4626   : NVPTXInst<(outs),
4627               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4628                    Int16Regs:$b, Int16Regs:$a),
4629               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4630               []>;
4631 def SUST_B_1D_V4B16_CLAMP
4632   : NVPTXInst<(outs),
4633               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4634                    Int16Regs:$b, Int16Regs:$a),
4635               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4636               []>;
4637 def SUST_B_1D_V4B32_CLAMP
4638   : NVPTXInst<(outs),
4639               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4640                    Int32Regs:$b, Int32Regs:$a),
4641               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4642               []>;
4643
4644
4645 def SUST_B_1D_ARRAY_B8_CLAMP
4646   : NVPTXInst<(outs),
4647               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4648               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4649               []>;
4650 def SUST_B_1D_ARRAY_B16_CLAMP
4651   : NVPTXInst<(outs),
4652               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4653               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4654               []>;
4655 def SUST_B_1D_ARRAY_B32_CLAMP
4656   : NVPTXInst<(outs),
4657               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4658               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4659               []>;
4660 def SUST_B_1D_ARRAY_B64_CLAMP
4661   : NVPTXInst<(outs),
4662               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4663               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4664               []>;
4665 def SUST_B_1D_ARRAY_V2B8_CLAMP
4666   : NVPTXInst<(outs),
4667               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4668                    Int16Regs:$g),
4669               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4670               []>;
4671 def SUST_B_1D_ARRAY_V2B16_CLAMP
4672   : NVPTXInst<(outs),
4673               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4674                    Int16Regs:$g),
4675               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4676               []>;
4677 def SUST_B_1D_ARRAY_V2B32_CLAMP
4678   : NVPTXInst<(outs),
4679               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4680                    Int32Regs:$g),
4681               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4682               []>;
4683 def SUST_B_1D_ARRAY_V2B64_CLAMP
4684   : NVPTXInst<(outs),
4685               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4686                    Int64Regs:$g),
4687               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4688               []>;
4689 def SUST_B_1D_ARRAY_V4B8_CLAMP
4690   : NVPTXInst<(outs),
4691               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4692                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4693               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4694               "\\{$r, $g, $b, $a\\};",
4695               []>;
4696 def SUST_B_1D_ARRAY_V4B16_CLAMP
4697   : NVPTXInst<(outs),
4698               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4699                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4700              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4701              "\\{$r, $g, $b, $a\\};",
4702               []>;
4703 def SUST_B_1D_ARRAY_V4B32_CLAMP
4704   : NVPTXInst<(outs),
4705               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4706                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4707              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4708              "\\{$r, $g, $b, $a\\};",
4709               []>;
4710
4711
4712 def SUST_B_2D_B8_CLAMP
4713   : NVPTXInst<(outs),
4714               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4715               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4716               []>;
4717 def SUST_B_2D_B16_CLAMP
4718   : NVPTXInst<(outs),
4719               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4720               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4721               []>;
4722 def SUST_B_2D_B32_CLAMP
4723   : NVPTXInst<(outs),
4724               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4725               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4726               []>;
4727 def SUST_B_2D_B64_CLAMP
4728   : NVPTXInst<(outs),
4729               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4730               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4731               []>;
4732 def SUST_B_2D_V2B8_CLAMP
4733   : NVPTXInst<(outs),
4734               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4735                    Int16Regs:$g),
4736               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4737               []>;
4738 def SUST_B_2D_V2B16_CLAMP
4739   : NVPTXInst<(outs),
4740               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4741                    Int16Regs:$g),
4742               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4743               []>;
4744 def SUST_B_2D_V2B32_CLAMP
4745   : NVPTXInst<(outs),
4746               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4747                    Int32Regs:$g),
4748               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4749               []>;
4750 def SUST_B_2D_V2B64_CLAMP
4751   : NVPTXInst<(outs),
4752               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4753                    Int64Regs:$g),
4754               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4755               []>;
4756 def SUST_B_2D_V4B8_CLAMP
4757   : NVPTXInst<(outs),
4758               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4759                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4760               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4761               "\\{$r, $g, $b, $a\\};",
4762               []>;
4763 def SUST_B_2D_V4B16_CLAMP
4764   : NVPTXInst<(outs),
4765               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4766                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4767              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4768              "\\{$r, $g, $b, $a\\};",
4769               []>;
4770 def SUST_B_2D_V4B32_CLAMP
4771   : NVPTXInst<(outs),
4772               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4773                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4774              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4775              "\\{$r, $g, $b, $a\\};",
4776               []>;
4777
4778
4779 def SUST_B_2D_ARRAY_B8_CLAMP
4780   : NVPTXInst<(outs),
4781               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4782                    Int16Regs:$r),
4783               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4784               []>;
4785 def SUST_B_2D_ARRAY_B16_CLAMP
4786   : NVPTXInst<(outs),
4787               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4788                    Int16Regs:$r),
4789               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4790               []>;
4791 def SUST_B_2D_ARRAY_B32_CLAMP
4792   : NVPTXInst<(outs),
4793               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4794                    Int32Regs:$r),
4795               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4796               []>;
4797 def SUST_B_2D_ARRAY_B64_CLAMP
4798   : NVPTXInst<(outs),
4799               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4800                    Int64Regs:$r),
4801               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4802               []>;
4803 def SUST_B_2D_ARRAY_V2B8_CLAMP
4804   : NVPTXInst<(outs),
4805               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4806                    Int16Regs:$r, Int16Regs:$g),
4807               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4808               "\\{$r, $g\\};",
4809               []>;
4810 def SUST_B_2D_ARRAY_V2B16_CLAMP
4811   : NVPTXInst<(outs),
4812               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4813                    Int16Regs:$r, Int16Regs:$g),
4814              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4815              "\\{$r, $g\\};",
4816               []>;
4817 def SUST_B_2D_ARRAY_V2B32_CLAMP
4818   : NVPTXInst<(outs),
4819               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4820                    Int32Regs:$r, Int32Regs:$g),
4821              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4822              "\\{$r, $g\\};",
4823               []>;
4824 def SUST_B_2D_ARRAY_V2B64_CLAMP
4825   : NVPTXInst<(outs),
4826               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4827                    Int64Regs:$r, Int64Regs:$g),
4828              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4829              "\\{$r, $g\\};",
4830               []>;
4831 def SUST_B_2D_ARRAY_V4B8_CLAMP
4832   : NVPTXInst<(outs),
4833               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4834                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4835       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4836       "\\{$r, $g, $b, $a\\};",
4837               []>;
4838 def SUST_B_2D_ARRAY_V4B16_CLAMP
4839   : NVPTXInst<(outs),
4840               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4841                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4842      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4843      "\\{$r, $g, $b, $a\\};",
4844               []>;
4845 def SUST_B_2D_ARRAY_V4B32_CLAMP
4846   : NVPTXInst<(outs),
4847               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4848                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4849      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4850      "\\{$r, $g, $b, $a\\};",
4851               []>;
4852
4853
4854 def SUST_B_3D_B8_CLAMP
4855   : NVPTXInst<(outs),
4856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4857                    Int16Regs:$r),
4858               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4859               []>;
4860 def SUST_B_3D_B16_CLAMP
4861   : NVPTXInst<(outs),
4862               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4863                    Int16Regs:$r),
4864               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4865               []>;
4866 def SUST_B_3D_B32_CLAMP
4867   : NVPTXInst<(outs),
4868               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4869                    Int32Regs:$r),
4870               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4871               []>;
4872 def SUST_B_3D_B64_CLAMP
4873   : NVPTXInst<(outs),
4874               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4875                    Int64Regs:$r),
4876               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4877               []>;
4878 def SUST_B_3D_V2B8_CLAMP
4879   : NVPTXInst<(outs),
4880               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4881                    Int16Regs:$r, Int16Regs:$g),
4882               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4883               "\\{$r, $g\\};",
4884               []>;
4885 def SUST_B_3D_V2B16_CLAMP
4886   : NVPTXInst<(outs),
4887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4888                    Int16Regs:$r, Int16Regs:$g),
4889               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4890               "\\{$r, $g\\};",
4891               []>;
4892 def SUST_B_3D_V2B32_CLAMP
4893   : NVPTXInst<(outs),
4894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4895                    Int32Regs:$r, Int32Regs:$g),
4896               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4897               "\\{$r, $g\\};",
4898               []>;
4899 def SUST_B_3D_V2B64_CLAMP
4900   : NVPTXInst<(outs),
4901               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4902                    Int64Regs:$r, Int64Regs:$g),
4903               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4904               "\\{$r, $g\\};",
4905               []>;
4906 def SUST_B_3D_V4B8_CLAMP
4907   : NVPTXInst<(outs),
4908               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4909                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4910          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4911          "\\{$r, $g, $b, $a\\};",
4912               []>;
4913 def SUST_B_3D_V4B16_CLAMP
4914   : NVPTXInst<(outs),
4915               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4916                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4917         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4918         "\\{$r, $g, $b, $a\\};",
4919               []>;
4920 def SUST_B_3D_V4B32_CLAMP
4921   : NVPTXInst<(outs),
4922               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4923                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4924         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4925         "\\{$r, $g, $b, $a\\};",
4926               []>;
4927
4928
4929 // .trap variant
4930 def SUST_B_1D_B8_TRAP
4931   : NVPTXInst<(outs),
4932               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4933               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4934               []>;
4935 def SUST_B_1D_B16_TRAP
4936   : NVPTXInst<(outs),
4937               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4938               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4939               []>;
4940 def SUST_B_1D_B32_TRAP
4941   : NVPTXInst<(outs),
4942               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4943               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4944               []>;
4945 def SUST_B_1D_B64_TRAP
4946   : NVPTXInst<(outs),
4947               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4948               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4949               []>;
4950 def SUST_B_1D_V2B8_TRAP
4951   : NVPTXInst<(outs),
4952               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4953               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4954               []>;
4955 def SUST_B_1D_V2B16_TRAP
4956   : NVPTXInst<(outs),
4957               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4958               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4959               []>;
4960 def SUST_B_1D_V2B32_TRAP
4961   : NVPTXInst<(outs),
4962               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4963               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4964               []>;
4965 def SUST_B_1D_V2B64_TRAP
4966   : NVPTXInst<(outs),
4967               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4968               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4969               []>;
4970 def SUST_B_1D_V4B8_TRAP
4971   : NVPTXInst<(outs),
4972               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4973                    Int16Regs:$b, Int16Regs:$a),
4974               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4975               []>;
4976 def SUST_B_1D_V4B16_TRAP
4977   : NVPTXInst<(outs),
4978               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4979                    Int16Regs:$b, Int16Regs:$a),
4980               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4981               []>;
4982 def SUST_B_1D_V4B32_TRAP
4983   : NVPTXInst<(outs),
4984               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4985                    Int32Regs:$b, Int32Regs:$a),
4986               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4987               []>;
4988
4989
4990 def SUST_B_1D_ARRAY_B8_TRAP
4991   : NVPTXInst<(outs),
4992               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4993               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4994               []>;
4995 def SUST_B_1D_ARRAY_B16_TRAP
4996   : NVPTXInst<(outs),
4997               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4998               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4999               []>;
5000 def SUST_B_1D_ARRAY_B32_TRAP
5001   : NVPTXInst<(outs),
5002               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5003               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5004               []>;
5005 def SUST_B_1D_ARRAY_B64_TRAP
5006   : NVPTXInst<(outs),
5007               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5008               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5009               []>;
5010 def SUST_B_1D_ARRAY_V2B8_TRAP
5011   : NVPTXInst<(outs),
5012               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5013                    Int16Regs:$g),
5014               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5015               []>;
5016 def SUST_B_1D_ARRAY_V2B16_TRAP
5017   : NVPTXInst<(outs),
5018               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5019                    Int16Regs:$g),
5020               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5021               []>;
5022 def SUST_B_1D_ARRAY_V2B32_TRAP
5023   : NVPTXInst<(outs),
5024               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5025                    Int32Regs:$g),
5026               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5027               []>;
5028 def SUST_B_1D_ARRAY_V2B64_TRAP
5029   : NVPTXInst<(outs),
5030               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5031                    Int64Regs:$g),
5032               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5033               []>;
5034 def SUST_B_1D_ARRAY_V4B8_TRAP
5035   : NVPTXInst<(outs),
5036               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5037                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5038               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5039               "\\{$r, $g, $b, $a\\};",
5040               []>;
5041 def SUST_B_1D_ARRAY_V4B16_TRAP
5042   : NVPTXInst<(outs),
5043               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5044                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5046              "\\{$r, $g, $b, $a\\};",
5047               []>;
5048 def SUST_B_1D_ARRAY_V4B32_TRAP
5049   : NVPTXInst<(outs),
5050               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5051                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5052              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5053              "\\{$r, $g, $b, $a\\};",
5054               []>;
5055
5056
5057 def SUST_B_2D_B8_TRAP
5058   : NVPTXInst<(outs),
5059               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5060               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5061               []>;
5062 def SUST_B_2D_B16_TRAP
5063   : NVPTXInst<(outs),
5064               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5065               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5066               []>;
5067 def SUST_B_2D_B32_TRAP
5068   : NVPTXInst<(outs),
5069               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5070               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5071               []>;
5072 def SUST_B_2D_B64_TRAP
5073   : NVPTXInst<(outs),
5074               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5075               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5076               []>;
5077 def SUST_B_2D_V2B8_TRAP
5078   : NVPTXInst<(outs),
5079               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5080                    Int16Regs:$g),
5081               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5082               []>;
5083 def SUST_B_2D_V2B16_TRAP
5084   : NVPTXInst<(outs),
5085               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5086                    Int16Regs:$g),
5087               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5088               []>;
5089 def SUST_B_2D_V2B32_TRAP
5090   : NVPTXInst<(outs),
5091               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5092                    Int32Regs:$g),
5093               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5094               []>;
5095 def SUST_B_2D_V2B64_TRAP
5096   : NVPTXInst<(outs),
5097               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5098                    Int64Regs:$g),
5099               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5100               []>;
5101 def SUST_B_2D_V4B8_TRAP
5102   : NVPTXInst<(outs),
5103               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5104                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5106               "\\{$r, $g, $b, $a\\};",
5107               []>;
5108 def SUST_B_2D_V4B16_TRAP
5109   : NVPTXInst<(outs),
5110               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5111                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5112              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5113              "\\{$r, $g, $b, $a\\};",
5114               []>;
5115 def SUST_B_2D_V4B32_TRAP
5116   : NVPTXInst<(outs),
5117               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5118                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5119              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5120              "\\{$r, $g, $b, $a\\};",
5121               []>;
5122
5123
5124 def SUST_B_2D_ARRAY_B8_TRAP
5125   : NVPTXInst<(outs),
5126               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5127                    Int16Regs:$r),
5128               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5129               []>;
5130 def SUST_B_2D_ARRAY_B16_TRAP
5131   : NVPTXInst<(outs),
5132               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5133                    Int16Regs:$r),
5134               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5135               []>;
5136 def SUST_B_2D_ARRAY_B32_TRAP
5137   : NVPTXInst<(outs),
5138               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5139                    Int32Regs:$r),
5140               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5141               []>;
5142 def SUST_B_2D_ARRAY_B64_TRAP
5143   : NVPTXInst<(outs),
5144               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5145                    Int64Regs:$r),
5146               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5147               []>;
5148 def SUST_B_2D_ARRAY_V2B8_TRAP
5149   : NVPTXInst<(outs),
5150               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5151                    Int16Regs:$r, Int16Regs:$g),
5152               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5153               "\\{$r, $g\\};",
5154               []>;
5155 def SUST_B_2D_ARRAY_V2B16_TRAP
5156   : NVPTXInst<(outs),
5157               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5158                    Int16Regs:$r, Int16Regs:$g),
5159              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5160              "\\{$r, $g\\};",
5161               []>;
5162 def SUST_B_2D_ARRAY_V2B32_TRAP
5163   : NVPTXInst<(outs),
5164               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5165                    Int32Regs:$r, Int32Regs:$g),
5166              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5167              "\\{$r, $g\\};",
5168               []>;
5169 def SUST_B_2D_ARRAY_V2B64_TRAP
5170   : NVPTXInst<(outs),
5171               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5172                    Int64Regs:$r, Int64Regs:$g),
5173              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5174              "\\{$r, $g\\};",
5175               []>;
5176 def SUST_B_2D_ARRAY_V4B8_TRAP
5177   : NVPTXInst<(outs),
5178               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5179                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5180       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5181       "\\{$r, $g, $b, $a\\};",
5182               []>;
5183 def SUST_B_2D_ARRAY_V4B16_TRAP
5184   : NVPTXInst<(outs),
5185               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5186                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5187      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5188      "\\{$r, $g, $b, $a\\};",
5189               []>;
5190 def SUST_B_2D_ARRAY_V4B32_TRAP
5191   : NVPTXInst<(outs),
5192               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5193                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5194      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5195      "\\{$r, $g, $b, $a\\};",
5196               []>;
5197
5198
5199 def SUST_B_3D_B8_TRAP
5200   : NVPTXInst<(outs),
5201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5202                    Int16Regs:$r),
5203               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5204               []>;
5205 def SUST_B_3D_B16_TRAP
5206   : NVPTXInst<(outs),
5207               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5208                    Int16Regs:$r),
5209               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5210               []>;
5211 def SUST_B_3D_B32_TRAP
5212   : NVPTXInst<(outs),
5213               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5214                    Int32Regs:$r),
5215               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5216               []>;
5217 def SUST_B_3D_B64_TRAP
5218   : NVPTXInst<(outs),
5219               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5220                    Int64Regs:$r),
5221               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5222               []>;
5223 def SUST_B_3D_V2B8_TRAP
5224   : NVPTXInst<(outs),
5225               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5226                    Int16Regs:$r, Int16Regs:$g),
5227               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5228               "\\{$r, $g\\};",
5229               []>;
5230 def SUST_B_3D_V2B16_TRAP
5231   : NVPTXInst<(outs),
5232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5233                    Int16Regs:$r, Int16Regs:$g),
5234               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5235               "\\{$r, $g\\};",
5236               []>;
5237 def SUST_B_3D_V2B32_TRAP
5238   : NVPTXInst<(outs),
5239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5240                    Int32Regs:$r, Int32Regs:$g),
5241               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5242               "\\{$r, $g\\};",
5243               []>;
5244 def SUST_B_3D_V2B64_TRAP
5245   : NVPTXInst<(outs),
5246               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5247                    Int64Regs:$r, Int64Regs:$g),
5248               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5249               "\\{$r, $g\\};",
5250               []>;
5251 def SUST_B_3D_V4B8_TRAP
5252   : NVPTXInst<(outs),
5253               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5254                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5255          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5256          "\\{$r, $g, $b, $a\\};",
5257               []>;
5258 def SUST_B_3D_V4B16_TRAP
5259   : NVPTXInst<(outs),
5260               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5261                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5262         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5263         "\\{$r, $g, $b, $a\\};",
5264               []>;
5265 def SUST_B_3D_V4B32_TRAP
5266   : NVPTXInst<(outs),
5267               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5268                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5269         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5270         "\\{$r, $g, $b, $a\\};",
5271               []>;
5272
5273
5274 // .zero variant
5275 def SUST_B_1D_B8_ZERO
5276   : NVPTXInst<(outs),
5277               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5278               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5279               []>;
5280 def SUST_B_1D_B16_ZERO
5281   : NVPTXInst<(outs),
5282               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5283               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5284               []>;
5285 def SUST_B_1D_B32_ZERO
5286   : NVPTXInst<(outs),
5287               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5288               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5289               []>;
5290 def SUST_B_1D_B64_ZERO
5291   : NVPTXInst<(outs),
5292               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5293               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5294               []>;
5295 def SUST_B_1D_V2B8_ZERO
5296   : NVPTXInst<(outs),
5297               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5298               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5299               []>;
5300 def SUST_B_1D_V2B16_ZERO
5301   : NVPTXInst<(outs),
5302               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5303               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5304               []>;
5305 def SUST_B_1D_V2B32_ZERO
5306   : NVPTXInst<(outs),
5307               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5308               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5309               []>;
5310 def SUST_B_1D_V2B64_ZERO
5311   : NVPTXInst<(outs),
5312               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5313               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5314               []>;
5315 def SUST_B_1D_V4B8_ZERO
5316   : NVPTXInst<(outs),
5317               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5318                    Int16Regs:$b, Int16Regs:$a),
5319               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5320               []>;
5321 def SUST_B_1D_V4B16_ZERO
5322   : NVPTXInst<(outs),
5323               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5324                    Int16Regs:$b, Int16Regs:$a),
5325               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5326               []>;
5327 def SUST_B_1D_V4B32_ZERO
5328   : NVPTXInst<(outs),
5329               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5330                    Int32Regs:$b, Int32Regs:$a),
5331               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5332               []>;
5333
5334
5335 def SUST_B_1D_ARRAY_B8_ZERO
5336   : NVPTXInst<(outs),
5337               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5338               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5339               []>;
5340 def SUST_B_1D_ARRAY_B16_ZERO
5341   : NVPTXInst<(outs),
5342               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5343               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5344               []>;
5345 def SUST_B_1D_ARRAY_B32_ZERO
5346   : NVPTXInst<(outs),
5347               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5348               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5349               []>;
5350 def SUST_B_1D_ARRAY_B64_ZERO
5351   : NVPTXInst<(outs),
5352               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5353               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5354               []>;
5355 def SUST_B_1D_ARRAY_V2B8_ZERO
5356   : NVPTXInst<(outs),
5357               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5358                    Int16Regs:$g),
5359               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5360               []>;
5361 def SUST_B_1D_ARRAY_V2B16_ZERO
5362   : NVPTXInst<(outs),
5363               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5364                    Int16Regs:$g),
5365               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5366               []>;
5367 def SUST_B_1D_ARRAY_V2B32_ZERO
5368   : NVPTXInst<(outs),
5369               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5370                    Int32Regs:$g),
5371               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5372               []>;
5373 def SUST_B_1D_ARRAY_V2B64_ZERO
5374   : NVPTXInst<(outs),
5375               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5376                    Int64Regs:$g),
5377               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5378               []>;
5379 def SUST_B_1D_ARRAY_V4B8_ZERO
5380   : NVPTXInst<(outs),
5381               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5382                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5383               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5384               "\\{$r, $g, $b, $a\\};",
5385               []>;
5386 def SUST_B_1D_ARRAY_V4B16_ZERO
5387   : NVPTXInst<(outs),
5388               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5389                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5390              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5391              "\\{$r, $g, $b, $a\\};",
5392               []>;
5393 def SUST_B_1D_ARRAY_V4B32_ZERO
5394   : NVPTXInst<(outs),
5395               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5396                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5397              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5398              "\\{$r, $g, $b, $a\\};",
5399               []>;
5400
5401
5402 def SUST_B_2D_B8_ZERO
5403   : NVPTXInst<(outs),
5404               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5405               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5406               []>;
5407 def SUST_B_2D_B16_ZERO
5408   : NVPTXInst<(outs),
5409               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5410               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5411               []>;
5412 def SUST_B_2D_B32_ZERO
5413   : NVPTXInst<(outs),
5414               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5415               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5416               []>;
5417 def SUST_B_2D_B64_ZERO
5418   : NVPTXInst<(outs),
5419               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5420               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5421               []>;
5422 def SUST_B_2D_V2B8_ZERO
5423   : NVPTXInst<(outs),
5424               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5425                    Int16Regs:$g),
5426               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5427               []>;
5428 def SUST_B_2D_V2B16_ZERO
5429   : NVPTXInst<(outs),
5430               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5431                    Int16Regs:$g),
5432               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5433               []>;
5434 def SUST_B_2D_V2B32_ZERO
5435   : NVPTXInst<(outs),
5436               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5437                    Int32Regs:$g),
5438               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5439               []>;
5440 def SUST_B_2D_V2B64_ZERO
5441   : NVPTXInst<(outs),
5442               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5443                    Int64Regs:$g),
5444               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5445               []>;
5446 def SUST_B_2D_V4B8_ZERO
5447   : NVPTXInst<(outs),
5448               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5449                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5450               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5451               "\\{$r, $g, $b, $a\\};",
5452               []>;
5453 def SUST_B_2D_V4B16_ZERO
5454   : NVPTXInst<(outs),
5455               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5456                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5457              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5458              "\\{$r, $g, $b, $a\\};",
5459               []>;
5460 def SUST_B_2D_V4B32_ZERO
5461   : NVPTXInst<(outs),
5462               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5463                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5464              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5465              "\\{$r, $g, $b, $a\\};",
5466               []>;
5467
5468
5469 def SUST_B_2D_ARRAY_B8_ZERO
5470   : NVPTXInst<(outs),
5471               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5472                    Int16Regs:$r),
5473               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5474               []>;
5475 def SUST_B_2D_ARRAY_B16_ZERO
5476   : NVPTXInst<(outs),
5477               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5478                    Int16Regs:$r),
5479               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5480               []>;
5481 def SUST_B_2D_ARRAY_B32_ZERO
5482   : NVPTXInst<(outs),
5483               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5484                    Int32Regs:$r),
5485               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5486               []>;
5487 def SUST_B_2D_ARRAY_B64_ZERO
5488   : NVPTXInst<(outs),
5489               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5490                    Int64Regs:$r),
5491               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5492               []>;
5493 def SUST_B_2D_ARRAY_V2B8_ZERO
5494   : NVPTXInst<(outs),
5495               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5496                    Int16Regs:$r, Int16Regs:$g),
5497               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5498               "\\{$r, $g\\};",
5499               []>;
5500 def SUST_B_2D_ARRAY_V2B16_ZERO
5501   : NVPTXInst<(outs),
5502               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5503                    Int16Regs:$r, Int16Regs:$g),
5504              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5505              "\\{$r, $g\\};",
5506               []>;
5507 def SUST_B_2D_ARRAY_V2B32_ZERO
5508   : NVPTXInst<(outs),
5509               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5510                    Int32Regs:$r, Int32Regs:$g),
5511              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5512              "\\{$r, $g\\};",
5513               []>;
5514 def SUST_B_2D_ARRAY_V2B64_ZERO
5515   : NVPTXInst<(outs),
5516               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5517                    Int64Regs:$r, Int64Regs:$g),
5518              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5519              "\\{$r, $g\\};",
5520               []>;
5521 def SUST_B_2D_ARRAY_V4B8_ZERO
5522   : NVPTXInst<(outs),
5523               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5524                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5525       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5526       "\\{$r, $g, $b, $a\\};",
5527               []>;
5528 def SUST_B_2D_ARRAY_V4B16_ZERO
5529   : NVPTXInst<(outs),
5530               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5531                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5532      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5533      "\\{$r, $g, $b, $a\\};",
5534               []>;
5535 def SUST_B_2D_ARRAY_V4B32_ZERO
5536   : NVPTXInst<(outs),
5537               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5538                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5539      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5540      "\\{$r, $g, $b, $a\\};",
5541               []>;
5542
5543
5544 def SUST_B_3D_B8_ZERO
5545   : NVPTXInst<(outs),
5546               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5547                    Int16Regs:$r),
5548               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5549               []>;
5550 def SUST_B_3D_B16_ZERO
5551   : NVPTXInst<(outs),
5552               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5553                    Int16Regs:$r),
5554               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5555               []>;
5556 def SUST_B_3D_B32_ZERO
5557   : NVPTXInst<(outs),
5558               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5559                    Int32Regs:$r),
5560               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5561               []>;
5562 def SUST_B_3D_B64_ZERO
5563   : NVPTXInst<(outs),
5564               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5565                    Int64Regs:$r),
5566               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5567               []>;
5568 def SUST_B_3D_V2B8_ZERO
5569   : NVPTXInst<(outs),
5570               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5571                    Int16Regs:$r, Int16Regs:$g),
5572               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5573               "\\{$r, $g\\};",
5574               []>;
5575 def SUST_B_3D_V2B16_ZERO
5576   : NVPTXInst<(outs),
5577               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5578                    Int16Regs:$r, Int16Regs:$g),
5579               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5580               "\\{$r, $g\\};",
5581               []>;
5582 def SUST_B_3D_V2B32_ZERO
5583   : NVPTXInst<(outs),
5584               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5585                    Int32Regs:$r, Int32Regs:$g),
5586               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5587               "\\{$r, $g\\};",
5588               []>;
5589 def SUST_B_3D_V2B64_ZERO
5590   : NVPTXInst<(outs),
5591               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5592                    Int64Regs:$r, Int64Regs:$g),
5593               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5594               "\\{$r, $g\\};",
5595               []>;
5596 def SUST_B_3D_V4B8_ZERO
5597   : NVPTXInst<(outs),
5598               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5599                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5600          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5601          "\\{$r, $g, $b, $a\\};",
5602               []>;
5603 def SUST_B_3D_V4B16_ZERO
5604   : NVPTXInst<(outs),
5605               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5606                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5607         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5608         "\\{$r, $g, $b, $a\\};",
5609               []>;
5610 def SUST_B_3D_V4B32_ZERO
5611   : NVPTXInst<(outs),
5612               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5613                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5614         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5615         "\\{$r, $g, $b, $a\\};",
5616               []>;
5617
5618
5619
5620 // Formatted
5621
5622 def SUST_P_1D_B8_TRAP
5623   : NVPTXInst<(outs),
5624               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5625               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5626               []>;
5627 def SUST_P_1D_B16_TRAP
5628   : NVPTXInst<(outs),
5629               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5630               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5631               []>;
5632 def SUST_P_1D_B32_TRAP
5633   : NVPTXInst<(outs),
5634               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5635               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5636               []>;
5637 def SUST_P_1D_V2B8_TRAP
5638   : NVPTXInst<(outs),
5639               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5640               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5641               []>;
5642 def SUST_P_1D_V2B16_TRAP
5643   : NVPTXInst<(outs),
5644               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5645               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5646               []>;
5647 def SUST_P_1D_V2B32_TRAP
5648   : NVPTXInst<(outs),
5649               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5650               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5651               []>;
5652 def SUST_P_1D_V4B8_TRAP
5653   : NVPTXInst<(outs),
5654               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5655                    Int16Regs:$b, Int16Regs:$a),
5656               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5657               []>;
5658 def SUST_P_1D_V4B16_TRAP
5659   : NVPTXInst<(outs),
5660               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5661                    Int16Regs:$b, Int16Regs:$a),
5662               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5663               []>;
5664 def SUST_P_1D_V4B32_TRAP
5665   : NVPTXInst<(outs),
5666               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5667                    Int32Regs:$b, Int32Regs:$a),
5668               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5669               []>;
5670
5671
5672 def SUST_P_1D_ARRAY_B8_TRAP
5673   : NVPTXInst<(outs),
5674               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5675               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5676               []>;
5677 def SUST_P_1D_ARRAY_B16_TRAP
5678   : NVPTXInst<(outs),
5679               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5680               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5681               []>;
5682 def SUST_P_1D_ARRAY_B32_TRAP
5683   : NVPTXInst<(outs),
5684               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5685               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5686               []>;
5687 def SUST_P_1D_ARRAY_V2B8_TRAP
5688   : NVPTXInst<(outs),
5689               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5690                    Int16Regs:$g),
5691               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5692               []>;
5693 def SUST_P_1D_ARRAY_V2B16_TRAP
5694   : NVPTXInst<(outs),
5695               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5696                    Int16Regs:$g),
5697               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5698               []>;
5699 def SUST_P_1D_ARRAY_V2B32_TRAP
5700   : NVPTXInst<(outs),
5701               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5702                    Int32Regs:$g),
5703               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5704               []>;
5705 def SUST_P_1D_ARRAY_V4B8_TRAP
5706   : NVPTXInst<(outs),
5707               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5708                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5709               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5710               "\\{$r, $g, $b, $a\\};",
5711               []>;
5712 def SUST_P_1D_ARRAY_V4B16_TRAP
5713   : NVPTXInst<(outs),
5714               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5715                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5716              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5717              "\\{$r, $g, $b, $a\\};",
5718               []>;
5719 def SUST_P_1D_ARRAY_V4B32_TRAP
5720   : NVPTXInst<(outs),
5721               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5722                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5723              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5724              "\\{$r, $g, $b, $a\\};",
5725               []>;
5726
5727
5728 def SUST_P_2D_B8_TRAP
5729   : NVPTXInst<(outs),
5730               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5731               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5732               []>;
5733 def SUST_P_2D_B16_TRAP
5734   : NVPTXInst<(outs),
5735               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5736               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5737               []>;
5738 def SUST_P_2D_B32_TRAP
5739   : NVPTXInst<(outs),
5740               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5741               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5742               []>;
5743 def SUST_P_2D_V2B8_TRAP
5744   : NVPTXInst<(outs),
5745               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5746                    Int16Regs:$g),
5747               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5748               []>;
5749 def SUST_P_2D_V2B16_TRAP
5750   : NVPTXInst<(outs),
5751               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5752                    Int16Regs:$g),
5753               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5754               []>;
5755 def SUST_P_2D_V2B32_TRAP
5756   : NVPTXInst<(outs),
5757               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5758                    Int32Regs:$g),
5759               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5760               []>;
5761 def SUST_P_2D_V4B8_TRAP
5762   : NVPTXInst<(outs),
5763               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5764                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5765               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5766               "\\{$r, $g, $b, $a\\};",
5767               []>;
5768 def SUST_P_2D_V4B16_TRAP
5769   : NVPTXInst<(outs),
5770               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5771                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5772              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5773              "\\{$r, $g, $b, $a\\};",
5774               []>;
5775 def SUST_P_2D_V4B32_TRAP
5776   : NVPTXInst<(outs),
5777               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5778                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5779              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5780              "\\{$r, $g, $b, $a\\};",
5781               []>;
5782
5783
5784 def SUST_P_2D_ARRAY_B8_TRAP
5785   : NVPTXInst<(outs),
5786               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5787                    Int16Regs:$r),
5788               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5789               []>;
5790 def SUST_P_2D_ARRAY_B16_TRAP
5791   : NVPTXInst<(outs),
5792               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5793                    Int16Regs:$r),
5794               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5795               []>;
5796 def SUST_P_2D_ARRAY_B32_TRAP
5797   : NVPTXInst<(outs),
5798               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5799                    Int32Regs:$r),
5800               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5801               []>;
5802 def SUST_P_2D_ARRAY_V2B8_TRAP
5803   : NVPTXInst<(outs),
5804               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5805                    Int16Regs:$r, Int16Regs:$g),
5806               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5807               "\\{$r, $g\\};",
5808               []>;
5809 def SUST_P_2D_ARRAY_V2B16_TRAP
5810   : NVPTXInst<(outs),
5811               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5812                    Int16Regs:$r, Int16Regs:$g),
5813              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5814              "\\{$r, $g\\};",
5815               []>;
5816 def SUST_P_2D_ARRAY_V2B32_TRAP
5817   : NVPTXInst<(outs),
5818               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5819                    Int32Regs:$r, Int32Regs:$g),
5820              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5821              "\\{$r, $g\\};",
5822               []>;
5823 def SUST_P_2D_ARRAY_V4B8_TRAP
5824   : NVPTXInst<(outs),
5825               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5826                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5827       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5828       "\\{$r, $g, $b, $a\\};",
5829               []>;
5830 def SUST_P_2D_ARRAY_V4B16_TRAP
5831   : NVPTXInst<(outs),
5832               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5833                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5834      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5835      "\\{$r, $g, $b, $a\\};",
5836               []>;
5837 def SUST_P_2D_ARRAY_V4B32_TRAP
5838   : NVPTXInst<(outs),
5839               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5840                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5841      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5842      "\\{$r, $g, $b, $a\\};",
5843               []>;
5844
5845
5846 def SUST_P_3D_B8_TRAP
5847   : NVPTXInst<(outs),
5848               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5849                    Int16Regs:$r),
5850               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5851               []>;
5852 def SUST_P_3D_B16_TRAP
5853   : NVPTXInst<(outs),
5854               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5855                    Int16Regs:$r),
5856               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5857               []>;
5858 def SUST_P_3D_B32_TRAP
5859   : NVPTXInst<(outs),
5860               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5861                    Int32Regs:$r),
5862               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5863               []>;
5864 def SUST_P_3D_V2B8_TRAP
5865   : NVPTXInst<(outs),
5866               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5867                    Int16Regs:$r, Int16Regs:$g),
5868               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5869               "\\{$r, $g\\};",
5870               []>;
5871 def SUST_P_3D_V2B16_TRAP
5872   : NVPTXInst<(outs),
5873               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5874                    Int16Regs:$r, Int16Regs:$g),
5875               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5876               "\\{$r, $g\\};",
5877               []>;
5878 def SUST_P_3D_V2B32_TRAP
5879   : NVPTXInst<(outs),
5880               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5881                    Int32Regs:$r, Int32Regs:$g),
5882               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5883               "\\{$r, $g\\};",
5884               []>;
5885 def SUST_P_3D_V4B8_TRAP
5886   : NVPTXInst<(outs),
5887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5888                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5889          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5890          "\\{$r, $g, $b, $a\\};",
5891               []>;
5892 def SUST_P_3D_V4B16_TRAP
5893   : NVPTXInst<(outs),
5894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5895                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5896         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5897         "\\{$r, $g, $b, $a\\};",
5898               []>;
5899 def SUST_P_3D_V4B32_TRAP
5900   : NVPTXInst<(outs),
5901               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5902                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5903         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5904         "\\{$r, $g, $b, $a\\};",
5905               []>;
5906 }
5907
5908 // Surface store instruction patterns
5909 // I'm not sure why we can't just include these in the instruction definitions,
5910 // but TableGen complains of type errors :(
5911
5912 // .clamp variant
5913 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5914            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5915           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5916
5917 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5918            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5919           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5920
5921 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5922            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5923           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5924
5925 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5926            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5927           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5928
5929 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5930            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5931           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5932            Int16Regs:$r, Int16Regs:$g)>;
5933
5934 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5935            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5936           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5937            Int16Regs:$r, Int16Regs:$g)>;
5938
5939 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5940            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5941           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5942            Int32Regs:$r, Int32Regs:$g)>;
5943
5944 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5945            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5946           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5947            Int64Regs:$r, Int64Regs:$g)>;
5948
5949 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5950            Int64Regs:$s, Int32Regs:$x,
5951            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5952           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5953            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5954
5955 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5956            Int64Regs:$s, Int32Regs:$x,
5957            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5958           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5959            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5960
5961 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5962            Int64Regs:$s, Int32Regs:$x,
5963            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5964           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5965            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5966
5967
5968
5969 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5970            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5971           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5972            Int16Regs:$r)>;
5973
5974 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5975            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5976           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5977            Int16Regs:$r)>;
5978
5979 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5980            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5981           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5982            Int32Regs:$r)>;
5983
5984 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5985            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5986           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5987            Int64Regs:$r)>;
5988
5989 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5990           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5991           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5992            Int16Regs:$r, Int16Regs:$g)>;
5993
5994 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5995           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5996           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5997            Int16Regs:$r, Int16Regs:$g)>;
5998
5999 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6000           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6001           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6002            Int32Regs:$r, Int32Regs:$g)>;
6003
6004 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6005           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6006           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6007            Int64Regs:$r, Int64Regs:$g)>;
6008
6009 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6010            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6011            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6012           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6013            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6014
6015 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6016            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6017            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6018           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6019            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6020
6021 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6022            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6023            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6024           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6025            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6026
6027
6028
6029 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6030            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6031           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6032            Int16Regs:$r)>;
6033
6034 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6035            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6036           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6037            Int16Regs:$r)>;
6038
6039 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6040            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6041           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6042            Int32Regs:$r)>;
6043
6044 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6045            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6046           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6047            Int64Regs:$r)>;
6048
6049 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6050           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6051           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6052            Int16Regs:$r, Int16Regs:$g)>;
6053
6054 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6055           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6056           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6057            Int16Regs:$r, Int16Regs:$g)>;
6058
6059 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6060           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6061           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062            Int32Regs:$r, Int32Regs:$g)>;
6063
6064 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6065           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6066           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6067            Int64Regs:$r, Int64Regs:$g)>;
6068
6069 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6070            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6071            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6072           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6073            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6074
6075 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6076            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6077            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6078           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6079            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6080
6081 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6082            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6083            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6084           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6085            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6086
6087
6088
6089 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6090           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6091           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6092            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6093            Int16Regs:$r)>;
6094
6095 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6096           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6097           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6098            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6099            Int16Regs:$r)>;
6100
6101 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6102           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6103           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6104            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6105            Int32Regs:$r)>;
6106
6107 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6108           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6109           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6110            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6111            Int64Regs:$r)>;
6112
6113 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6114            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6115            Int16Regs:$r, Int16Regs:$g),
6116           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6117            Int32Regs:$x, Int32Regs:$y,
6118            Int16Regs:$r, Int16Regs:$g)>;
6119
6120 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6121            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6122            Int16Regs:$r, Int16Regs:$g),
6123           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6124            Int32Regs:$x, Int32Regs:$y,
6125            Int16Regs:$r, Int16Regs:$g)>;
6126
6127 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6128            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6129            Int32Regs:$g),
6130           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6131            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6132
6133 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6134            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6135            Int64Regs:$g),
6136           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6137            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6138
6139 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6140            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6141            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6142           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6143            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6144            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6145
6146 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6147            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6148            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6149           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6150            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6151            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6152
6153 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6154            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6155            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6156           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6157            Int32Regs:$x, Int32Regs:$y,
6158            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6159
6160
6161
6162 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6163            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6164            Int16Regs:$r),
6165           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6166            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6167            Int16Regs:$r)>;
6168
6169 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6170            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6171            Int16Regs:$r),
6172           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6173            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6174            Int16Regs:$r)>;
6175
6176 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6177            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6178            Int32Regs:$r),
6179           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6180            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6181            Int32Regs:$r)>;
6182
6183 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6184            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6185            Int64Regs:$r),
6186           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6187            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6188            Int64Regs:$r)>;
6189
6190 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6191            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6192            Int16Regs:$r, Int16Regs:$g),
6193           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6194            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6195            Int16Regs:$r, Int16Regs:$g)>;
6196
6197 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6198            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6199            Int16Regs:$r, Int16Regs:$g),
6200           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6201            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6202            Int16Regs:$r, Int16Regs:$g)>;
6203
6204 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6205            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6206            Int32Regs:$r, Int32Regs:$g),
6207           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6208            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6209            Int32Regs:$r, Int32Regs:$g)>;
6210
6211 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6212            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6213            Int64Regs:$r, Int64Regs:$g),
6214           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6215            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6216            Int64Regs:$r, Int64Regs:$g)>;
6217
6218 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6219            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6220            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6221           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6222            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6223            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6224
6225 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6226            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6227            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6228           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6229            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6230            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6231
6232 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6233            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6234            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6235           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6236            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6237            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6238
6239
6240 // .trap variant
6241 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6242            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6243           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6244
6245 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6246            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6247           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6248
6249 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6250            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6251           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6252
6253 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6254            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6255           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6256
6257 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6258            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6259           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6260            Int16Regs:$r, Int16Regs:$g)>;
6261
6262 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6263            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6264           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6265            Int16Regs:$r, Int16Regs:$g)>;
6266
6267 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6268            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6269           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6270            Int32Regs:$r, Int32Regs:$g)>;
6271
6272 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6273            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6274           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6275            Int64Regs:$r, Int64Regs:$g)>;
6276
6277 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6278            Int64Regs:$s, Int32Regs:$x,
6279            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6280           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6281            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6282
6283 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6284            Int64Regs:$s, Int32Regs:$x,
6285            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6286           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6287            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6288
6289 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6290            Int64Regs:$s, Int32Regs:$x,
6291            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6292           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6293            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6294
6295
6296
6297 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6298            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6299           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6300            Int16Regs:$r)>;
6301
6302 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6303            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6304           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6305            Int16Regs:$r)>;
6306
6307 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6308            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6309           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6310            Int32Regs:$r)>;
6311
6312 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6313            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6314           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6315            Int64Regs:$r)>;
6316
6317 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6318           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6319           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6320            Int16Regs:$r, Int16Regs:$g)>;
6321
6322 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6323           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6324           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6325            Int16Regs:$r, Int16Regs:$g)>;
6326
6327 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6328           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6329           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6330            Int32Regs:$r, Int32Regs:$g)>;
6331
6332 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6333           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6334           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6335            Int64Regs:$r, Int64Regs:$g)>;
6336
6337 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6338            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6339            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6340           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6341            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6342
6343 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6344            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6345            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6346           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6347            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6348
6349 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6350            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6351            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6352           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6353            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6354
6355
6356
6357 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6358            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6359           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6360            Int16Regs:$r)>;
6361
6362 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6363            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6364           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6365            Int16Regs:$r)>;
6366
6367 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6368            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6369           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6370            Int32Regs:$r)>;
6371
6372 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6373            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6374           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6375            Int64Regs:$r)>;
6376
6377 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6378           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6379           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6380            Int16Regs:$r, Int16Regs:$g)>;
6381
6382 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6383           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6384           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6385            Int16Regs:$r, Int16Regs:$g)>;
6386
6387 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6388           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6389           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6390            Int32Regs:$r, Int32Regs:$g)>;
6391
6392 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6393           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6394           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6395            Int64Regs:$r, Int64Regs:$g)>;
6396
6397 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6398            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6399            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6400           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6401            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6402
6403 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6404            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6405            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6406           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6407            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6408
6409 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6410            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6411            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6412           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6413            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6414
6415
6416
6417 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6418           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6419           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6420            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6421            Int16Regs:$r)>;
6422
6423 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6424           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6425           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6426            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6427            Int16Regs:$r)>;
6428
6429 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6430           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6431           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6432            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6433            Int32Regs:$r)>;
6434
6435 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6436           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6437           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6438            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6439            Int64Regs:$r)>;
6440
6441 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6442            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6443            Int16Regs:$r, Int16Regs:$g),
6444           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6445            Int32Regs:$x, Int32Regs:$y,
6446            Int16Regs:$r, Int16Regs:$g)>;
6447
6448 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6449            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6450            Int16Regs:$r, Int16Regs:$g),
6451           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6452            Int32Regs:$x, Int32Regs:$y,
6453            Int16Regs:$r, Int16Regs:$g)>;
6454
6455 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6456            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6457            Int32Regs:$g),
6458           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6459            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6460
6461 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6462            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6463            Int64Regs:$g),
6464           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6465            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6466
6467 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6468            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6469            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6470           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6471            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6472            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6473
6474 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6475            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6476            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6477           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6478            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6479            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6480
6481 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6482            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6483            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6484           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6485            Int32Regs:$x, Int32Regs:$y,
6486            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6487
6488
6489
6490 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6491            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6492            Int16Regs:$r),
6493           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6494            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6495            Int16Regs:$r)>;
6496
6497 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6498            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6499            Int16Regs:$r),
6500           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6501            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6502            Int16Regs:$r)>;
6503
6504 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6505            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6506            Int32Regs:$r),
6507           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6508            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6509            Int32Regs:$r)>;
6510
6511 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6512            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6513            Int64Regs:$r),
6514           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6515            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6516            Int64Regs:$r)>;
6517
6518 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6519            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6520            Int16Regs:$r, Int16Regs:$g),
6521           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6522            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6523            Int16Regs:$r, Int16Regs:$g)>;
6524
6525 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6526            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6527            Int16Regs:$r, Int16Regs:$g),
6528           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6529            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6530            Int16Regs:$r, Int16Regs:$g)>;
6531
6532 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6533            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6534            Int32Regs:$r, Int32Regs:$g),
6535           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6536            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6537            Int32Regs:$r, Int32Regs:$g)>;
6538
6539 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6540            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6541            Int64Regs:$r, Int64Regs:$g),
6542           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6543            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6544            Int64Regs:$r, Int64Regs:$g)>;
6545
6546 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6547            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6548            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6549           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6550            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6551            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6552
6553 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6554            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6555            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6556           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6557            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6558            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6559
6560 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6561            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6562            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6563           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6564            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6565            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6566
6567
6568 // .zero variant
6569 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6570            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6571           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6572
6573 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6574            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6575           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6576
6577 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6578            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6579           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6580
6581 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6582            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6583           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6584
6585 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6586            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6587           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6588            Int16Regs:$r, Int16Regs:$g)>;
6589
6590 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6591            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6592           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6593            Int16Regs:$r, Int16Regs:$g)>;
6594
6595 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6596            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6597           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6598            Int32Regs:$r, Int32Regs:$g)>;
6599
6600 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6601            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6602           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6603            Int64Regs:$r, Int64Regs:$g)>;
6604
6605 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6606            Int64Regs:$s, Int32Regs:$x,
6607            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6608           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6609            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6610
6611 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6612            Int64Regs:$s, Int32Regs:$x,
6613            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6614           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6615            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6616
6617 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6618            Int64Regs:$s, Int32Regs:$x,
6619            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6620           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6621            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6622
6623
6624
6625 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6626            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6627           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6628            Int16Regs:$r)>;
6629
6630 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6631            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6632           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6633            Int16Regs:$r)>;
6634
6635 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6636            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6637           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6638            Int32Regs:$r)>;
6639
6640 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6641            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6642           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6643            Int64Regs:$r)>;
6644
6645 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6646           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6647           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6648            Int16Regs:$r, Int16Regs:$g)>;
6649
6650 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6651           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6652           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6653            Int16Regs:$r, Int16Regs:$g)>;
6654
6655 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6656           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6657           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6658            Int32Regs:$r, Int32Regs:$g)>;
6659
6660 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6661           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6662           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6663            Int64Regs:$r, Int64Regs:$g)>;
6664
6665 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6666            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6667            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6668           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6669            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6670
6671 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6672            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6673            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6674           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6675            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6676
6677 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6678            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6679            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6680           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6681            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6682
6683
6684
6685 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6686            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6687           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6688            Int16Regs:$r)>;
6689
6690 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6691            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6692           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6693            Int16Regs:$r)>;
6694
6695 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6696            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6697           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6698            Int32Regs:$r)>;
6699
6700 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6701            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6702           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6703            Int64Regs:$r)>;
6704
6705 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6706           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6707           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6708            Int16Regs:$r, Int16Regs:$g)>;
6709
6710 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6711           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6712           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6713            Int16Regs:$r, Int16Regs:$g)>;
6714
6715 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6716           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6717           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6718            Int32Regs:$r, Int32Regs:$g)>;
6719
6720 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6721           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6722           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6723            Int64Regs:$r, Int64Regs:$g)>;
6724
6725 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6726            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6727            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6728           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6729            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6730
6731 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6732            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6733            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6734           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6736
6737 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6738            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6739            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6740           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6741            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6742
6743
6744
6745 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6746           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6747           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6748            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6749            Int16Regs:$r)>;
6750
6751 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6752           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6753           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6754            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6755            Int16Regs:$r)>;
6756
6757 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6758           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6759           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6760            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6761            Int32Regs:$r)>;
6762
6763 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6764           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6765           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6766            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6767            Int64Regs:$r)>;
6768
6769 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6770            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6771            Int16Regs:$r, Int16Regs:$g),
6772           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6773            Int32Regs:$x, Int32Regs:$y,
6774            Int16Regs:$r, Int16Regs:$g)>;
6775
6776 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6777            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6778            Int16Regs:$r, Int16Regs:$g),
6779           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6780            Int32Regs:$x, Int32Regs:$y,
6781            Int16Regs:$r, Int16Regs:$g)>;
6782
6783 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6784            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6785            Int32Regs:$g),
6786           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6787            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6788
6789 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6790            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6791            Int64Regs:$g),
6792           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6793            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6794
6795 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6796            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6798           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6799            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6800            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6801
6802 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6803            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6804            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6805           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6806            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6807            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6808
6809 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6810            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6811            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6812           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6813            Int32Regs:$x, Int32Regs:$y,
6814            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6815
6816
6817
6818 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6819            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6820            Int16Regs:$r),
6821           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6822            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6823            Int16Regs:$r)>;
6824
6825 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6826            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6827            Int16Regs:$r),
6828           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6829            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6830            Int16Regs:$r)>;
6831
6832 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6833            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6834            Int32Regs:$r),
6835           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6836            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6837            Int32Regs:$r)>;
6838
6839 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6840            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6841            Int64Regs:$r),
6842           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6843            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6844            Int64Regs:$r)>;
6845
6846 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6847            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6848            Int16Regs:$r, Int16Regs:$g),
6849           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6850            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6851            Int16Regs:$r, Int16Regs:$g)>;
6852
6853 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6854            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6855            Int16Regs:$r, Int16Regs:$g),
6856           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6857            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6858            Int16Regs:$r, Int16Regs:$g)>;
6859
6860 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6861            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6862            Int32Regs:$r, Int32Regs:$g),
6863           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6864            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6865            Int32Regs:$r, Int32Regs:$g)>;
6866
6867 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6868            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6869            Int64Regs:$r, Int64Regs:$g),
6870           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6871            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6872            Int64Regs:$r, Int64Regs:$g)>;
6873
6874 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6875            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6876            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6877           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6878            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6879            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6880
6881 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6882            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6883            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6884           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6885            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6886            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6887
6888 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6889            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6890            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6891           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6892            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6893            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6894
6895
6896
6897
6898 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6899            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6900           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6901
6902 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6903            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6904           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6905
6906 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6907            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6908           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6909
6910 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6911            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6912           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6913            Int16Regs:$r, Int16Regs:$g)>;
6914
6915 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6916            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6917           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6918            Int16Regs:$r, Int16Regs:$g)>;
6919
6920 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6921            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6922           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6923            Int32Regs:$r, Int32Regs:$g)>;
6924
6925 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6926            Int64Regs:$s, Int32Regs:$x,
6927            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6928           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6929            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6930
6931 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6932            Int64Regs:$s, Int32Regs:$x,
6933            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6934           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6935            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6936
6937 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6938            Int64Regs:$s, Int32Regs:$x,
6939            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6940           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6941            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6942
6943
6944
6945 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6946            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6947           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6948            Int16Regs:$r)>;
6949
6950 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6951            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6952           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6953            Int16Regs:$r)>;
6954
6955 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6956            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6957           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6958            Int32Regs:$r)>;
6959
6960 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6961           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6962           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6963            Int16Regs:$r, Int16Regs:$g)>;
6964
6965 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6966           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6967           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6968            Int16Regs:$r, Int16Regs:$g)>;
6969
6970 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6971           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6972           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6973            Int32Regs:$r, Int32Regs:$g)>;
6974
6975 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6976            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6977            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6978           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6979            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6980
6981 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6982            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6983            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6984           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6985            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6986
6987 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6988            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6989            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6990           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6991            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6992
6993
6994
6995 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6996            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6997           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6998            Int16Regs:$r)>;
6999
7000 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7001            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7002           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7003            Int16Regs:$r)>;
7004
7005 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7006            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7007           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7008            Int32Regs:$r)>;
7009
7010 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7011           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7012           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7013            Int16Regs:$r, Int16Regs:$g)>;
7014
7015 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7016           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7017           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7018            Int16Regs:$r, Int16Regs:$g)>;
7019
7020 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7021           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7022           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7023            Int32Regs:$r, Int32Regs:$g)>;
7024
7025 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7026            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7027            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7028           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7029            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7030
7031 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7032            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7033            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7034           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7035            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7036
7037 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7038            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7039            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7040           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7041            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7042
7043
7044
7045 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7046           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7047           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7048            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7049            Int16Regs:$r)>;
7050
7051 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7052           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7053           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7054            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7055            Int16Regs:$r)>;
7056
7057 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7058           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7059           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7060            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7061            Int32Regs:$r)>;
7062
7063 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7064            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7065            Int16Regs:$r, Int16Regs:$g),
7066           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7067            Int32Regs:$x, Int32Regs:$y,
7068            Int16Regs:$r, Int16Regs:$g)>;
7069
7070 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7071            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7072            Int16Regs:$r, Int16Regs:$g),
7073           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7074            Int32Regs:$x, Int32Regs:$y,
7075            Int16Regs:$r, Int16Regs:$g)>;
7076
7077 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7078            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7079            Int32Regs:$g),
7080           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7081            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7082
7083 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7084            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7085            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7086           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7087            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7088            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7089
7090 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7091            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7092            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7093           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7094            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7095            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7096
7097 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7098            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7099            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7100           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7101            Int32Regs:$x, Int32Regs:$y,
7102            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7103
7104
7105
7106 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7107            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7108            Int16Regs:$r),
7109           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7110            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7111            Int16Regs:$r)>;
7112
7113 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7114            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7115            Int16Regs:$r),
7116           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7117            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7118            Int16Regs:$r)>;
7119
7120 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7121            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7122            Int32Regs:$r),
7123           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7124            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7125            Int32Regs:$r)>;
7126
7127 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7128            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7129            Int16Regs:$r, Int16Regs:$g),
7130           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7131            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7132            Int16Regs:$r, Int16Regs:$g)>;
7133
7134 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7135            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7136            Int16Regs:$r, Int16Regs:$g),
7137           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7138            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7139            Int16Regs:$r, Int16Regs:$g)>;
7140
7141 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7142            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7143            Int32Regs:$r, Int32Regs:$g),
7144           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7145            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7146            Int32Regs:$r, Int32Regs:$g)>;
7147
7148 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7149            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7150            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7151           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7152            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7153            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7154
7155 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7156            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7157            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7158           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7159            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7160            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7161
7162 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7163            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7164            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7165           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7166            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7167            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7168
7169 //-----------------------------------
7170 // Read Special Registers
7171 //-----------------------------------
7172
7173 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7174   : NVPTXInst<(outs Int64Regs:$d), (ins),
7175               !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
7176               [(set Int64Regs:$d, (intop))]>;
7177
7178 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7179   : NVPTXInst<(outs Int32Regs:$d), (ins),
7180               !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
7181               [(set Int32Regs:$d, (intop))]>;
7182
7183 // TODO Add read vector-version of special registers
7184
7185 def INT_PTX_SREG_TID_X :
7186     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7187 def INT_PTX_SREG_TID_Y :
7188     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7189 def INT_PTX_SREG_TID_Z :
7190     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7191 def INT_PTX_SREG_TID_W :
7192     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7193
7194 def INT_PTX_SREG_NTID_X :
7195     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7196 def INT_PTX_SREG_NTID_Y :
7197     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7198 def INT_PTX_SREG_NTID_Z :
7199     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7200 def INT_PTX_SREG_NTID_W :
7201     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7202
7203 def INT_PTX_SREG_LANEID :
7204     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7205 def INT_PTX_SREG_WARPID :
7206     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7207 def INT_PTX_SREG_NWARPID :
7208     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7209
7210 def INT_PTX_SREG_CTAID_X :
7211     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7212 def INT_PTX_SREG_CTAID_Y :
7213     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7214 def INT_PTX_SREG_CTAID_Z :
7215     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7216 def INT_PTX_SREG_CTAID_W :
7217     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7218
7219 def INT_PTX_SREG_NCTAID_X :
7220     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7221 def INT_PTX_SREG_NCTAID_Y :
7222     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7223 def INT_PTX_SREG_NCTAID_Z :
7224     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7225 def INT_PTX_SREG_NCTAID_W :
7226     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7227
7228 def INT_PTX_SREG_SMID :
7229     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7230 def INT_PTX_SREG_NSMID :
7231     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7232 def INT_PTX_SREG_GRIDID :
7233     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7234
7235 def INT_PTX_SREG_LANEMASK_EQ :
7236     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7237 def INT_PTX_SREG_LANEMASK_LE :
7238     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7239 def INT_PTX_SREG_LANEMASK_LT :
7240     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7241 def INT_PTX_SREG_LANEMASK_GE :
7242     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7243 def INT_PTX_SREG_LANEMASK_GT :
7244     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7245
7246 def INT_PTX_SREG_CLOCK :
7247     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7248 def INT_PTX_SREG_CLOCK64 :
7249     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7250
7251 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7252 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7253 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7254 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7255
7256 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7257 // handle the constant.
7258 def INT_PTX_SREG_WARPSIZE :
7259     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7260               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;