]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Merge clang 7.0.1 and several follow-up changes
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 def immFloat0 : PatLeaf<(fpimm), [{
11     float f = (float)N->getValueAPF().convertToFloat();
12     return (f==0.0f);
13 }]>;
14
15 def immFloat1 : PatLeaf<(fpimm), [{
16     float f = (float)N->getValueAPF().convertToFloat();
17     return (f==1.0f);
18 }]>;
19
20 def immDouble0 : PatLeaf<(fpimm), [{
21     double d = (double)N->getValueAPF().convertToDouble();
22     return (d==0.0);
23 }]>;
24
25 def immDouble1 : PatLeaf<(fpimm), [{
26     double d = (double)N->getValueAPF().convertToDouble();
27     return (d==1.0);
28 }]>;
29
30
31
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
37                   "bar.sync \t0;",
38       [(int_nvvm_barrier0)]>;
39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
40                   "bar.sync \t$src1;",
41       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
43                   "bar.sync \t$src1, $src2;",
44       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
46   !strconcat("{{ \n\t",
47              ".reg .pred \t%p1; \n\t",
48              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
49              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
50              "}}"),
51       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
53   !strconcat("{{ \n\t",
54              ".reg .pred \t%p1; \n\t",
55              ".reg .pred \t%p2; \n\t",
56              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
57              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
58              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
59              "}}"),
60       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
62   !strconcat("{{ \n\t",
63              ".reg .pred \t%p1; \n\t",
64              ".reg .pred \t%p2; \n\t",
65              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
66              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
67              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
68              "}}"),
69       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
70
71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
72                              [(int_nvvm_bar_sync imm:$i)]>;
73
74 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
75                              [(int_nvvm_bar_warp_sync imm:$i)]>,
76         Requires<[hasPTX60, hasSM30]>;
77 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
78                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
79         Requires<[hasPTX60, hasSM30]>;
80
81 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
82                                    [(int_nvvm_barrier_sync imm:$i)]>,
83         Requires<[hasPTX60, hasSM30]>;
84 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
85                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
86         Requires<[hasPTX60, hasSM30]>;
87
88 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
89                  "barrier.sync \t$id, $cnt;",
90                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
91         Requires<[hasPTX60, hasSM30]>;
92 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
93                  "barrier.sync \t$id, $cnt;",
94                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
95         Requires<[hasPTX60, hasSM30]>;
96 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
97                  "barrier.sync \t$id, $cnt;",
98                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
99         Requires<[hasPTX60, hasSM30]>;
100 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
101                  "barrier.sync \t$id, $cnt;",
102                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
103         Requires<[hasPTX60, hasSM30]>;
104
105
106 // shfl.{up,down,bfly,idx}.b32
107 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
108   // The last two parameters to shfl can be regs or imms.  ptxas is smart
109   // enough to inline constant registers, so strictly speaking we don't need to
110   // handle immediates here.  But it's easy enough, and it makes our ptx more
111   // readable.
112   def reg : NVPTXInst<
113       (outs regclass:$dst),
114       (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
115       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
116       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
117
118   def imm1 : NVPTXInst<
119       (outs regclass:$dst),
120       (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
121       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
122       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
123
124   def imm2 : NVPTXInst<
125       (outs regclass:$dst),
126       (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
127       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
128       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
129
130   def imm3 : NVPTXInst<
131       (outs regclass:$dst),
132       (ins regclass:$src, i32imm:$offset, i32imm:$mask),
133       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
134       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
135 }
136
137 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
138 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
139 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
140 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
141 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
142 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
143 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
144 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
145
146 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
147   // Threadmask and the last two parameters to shfl.sync can be regs or imms.
148   // ptxas is smart enough to inline constant registers, so strictly speaking we
149   // don't need to handle immediates here.  But it's easy enough, and it makes
150   // our ptx more readable.
151   def rrr : NVPTXInst<
152       (outs regclass:$dst),
153       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
154       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
155       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
156                             Int32Regs:$offset, Int32Regs:$mask))]>;
157
158   def rri : NVPTXInst<
159       (outs regclass:$dst),
160       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
161       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
162       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
163                             Int32Regs:$offset, imm:$mask))]>;
164
165   def rir : NVPTXInst<
166       (outs regclass:$dst),
167       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
168       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
169       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
170                             imm:$offset, Int32Regs:$mask))]>;
171
172   def rii : NVPTXInst<
173       (outs regclass:$dst),
174       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
175       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
176       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
177                             imm:$offset, imm:$mask))]>;
178
179   def irr : NVPTXInst<
180       (outs regclass:$dst),
181       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
182       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
183       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
184                             Int32Regs:$offset, Int32Regs:$mask))]>;
185
186   def iri : NVPTXInst<
187       (outs regclass:$dst),
188       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
189       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
190       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
191                             Int32Regs:$offset, imm:$mask))]>;
192
193   def iir : NVPTXInst<
194       (outs regclass:$dst),
195       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
196       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
197       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
198                             imm:$offset, Int32Regs:$mask))]>;
199
200   def iii : NVPTXInst<
201       (outs regclass:$dst),
202       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
203       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
204       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
205                             imm:$offset, imm:$mask))]>;
206 }
207
208 // On sm_70 these don't have to be convergent, so we may eventually want to
209 // implement non-convergent variant of this intrinsic.
210 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
211 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
212 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
213 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
214 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
215 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
216 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
217 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
218
219
220 // vote.{all,any,uni,ballot}
221 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
222   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
223               "vote." # mode # " \t$dest, $pred;",
224               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
225         Requires<[hasPTX60, hasSM30]>;
226 }
227
228 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
229 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
230 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
231 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
232
233 // vote.sync.{all,any,uni,ballot}
234 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
235   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
236               "vote.sync." # mode # " \t$dest, $pred, $mask;",
237               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
238           Requires<[hasPTX60, hasSM30]>;
239   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
240               "vote.sync." # mode #" \t$dest, $pred, $mask;",
241               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
242           Requires<[hasPTX60, hasSM30]>;
243 }
244
245 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
246 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
247 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
248 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
249
250 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
251                           Operand ImmOp> {
252   def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
253               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254               [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
255            Requires<[hasPTX60, hasSM70]>;
256   def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
257               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258               [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
259            Requires<[hasPTX60, hasSM70]>;
260   def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
261               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262               [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
263            Requires<[hasPTX60, hasSM70]>;
264   def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
265               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
266               [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
267            Requires<[hasPTX60, hasSM70]>;
268 }
269
270 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
271                                         i32imm>;
272 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
273                                         i64imm>;
274
275 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
276                           Operand ImmOp> {
277   def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
278                      (ins i32imm:$mask, ImmOp:$value),
279               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
280               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
281            Requires<[hasPTX60, hasSM70]>;
282   def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
283                      (ins Int32Regs:$mask, ImmOp:$value),
284               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
285               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
286            Requires<[hasPTX60, hasSM70]>;
287   def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
288                      (ins i32imm:$mask, regclass:$value),
289               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
290               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
291            Requires<[hasPTX60, hasSM70]>;
292   def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
293                      (ins Int32Regs:$mask, regclass:$value),
294               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
295               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
296            Requires<[hasPTX60, hasSM70]>;
297 }
298 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
299                                          i32imm>;
300 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
301                                          i64imm>;
302
303 } // isConvergent = 1
304
305 //-----------------------------------
306 // Explicit Memory Fence Functions
307 //-----------------------------------
308 class MEMBAR<string StrOp, Intrinsic IntOP> :
309               NVPTXInst<(outs), (ins),
310             StrOp, [(IntOP)]>;
311
312 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
313 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
314 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
315
316
317 //-----------------------------------
318 // Math Functions
319 //-----------------------------------
320
321 // Map min(1.0, max(0.0, x)) to sat(x)
322 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
323 // NaN
324 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
325 // Same story for fmax, fmin.
326
327 def : Pat<(int_nvvm_fmin_f immFloat1,
328             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
329           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
330 def : Pat<(int_nvvm_fmin_f immFloat1,
331             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
332           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
333 def : Pat<(int_nvvm_fmin_f
334             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
335           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
336 def : Pat<(int_nvvm_fmin_f
337             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
338           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
339
340 def : Pat<(int_nvvm_fmin_d immDouble1,
341             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
342           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
343 def : Pat<(int_nvvm_fmin_d immDouble1,
344             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
345           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
346 def : Pat<(int_nvvm_fmin_d
347             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
348           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
349 def : Pat<(int_nvvm_fmin_d
350             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
351           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
352
353
354 // We need a full string for OpcStr here because we need to deal with case like
355 // INT_PTX_RECIP.
356 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
357   NVPTXRegClass src_regclass, Intrinsic IntOP>
358             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
359             OpcStr,
360         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
361
362 // We need a full string for OpcStr here because we need to deal with the case
363 // like INT_PTX_NATIVE_POWR_F.
364 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
365   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
366             : NVPTXInst<(outs t_regclass:$dst),
367               (ins s0_regclass:$src0, s1_regclass:$src1),
368             OpcStr,
369         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
370
371 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
372   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
373   NVPTXRegClass s2_regclass, Intrinsic IntOP>
374             : NVPTXInst<(outs t_regclass:$dst),
375               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
376             OpcStr,
377         [(set t_regclass:$dst,
378           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
379
380 //
381 // MISC
382 //
383
384 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
385   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
386
387 //
388 // Min Max
389 //
390
391 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
392   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
393 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
394   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
395
396 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
397   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
398 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
399   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
400
401 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
402   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
403 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
404   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
405
406
407 //
408 // Multiplication
409 //
410
411 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
412   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
413 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
414   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
415
416 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
417   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
418 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
419   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
420
421 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
422   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
423 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
424   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
425 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
426   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
427 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
428   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
429 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
430   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
431 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
432   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
433 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
434   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
435 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
436   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
437
438 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
439   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
440 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
441   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
442 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
443   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
444 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
445   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
446
447 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
448   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
449 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
450   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
451
452 //
453 // Div
454 //
455
456 def INT_NVVM_DIV_APPROX_FTZ_F
457   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
458     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
459 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
460   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
461
462 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
463   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
464 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
465   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
466 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
467   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
468 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
469   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
470 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
471   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
472 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
473   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
474 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
475   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
476 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
477   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
478
479 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
480   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
481 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
482   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
483 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
484   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
485 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
486   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
487
488 //
489 // Sad
490 //
491
492 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
493   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
494 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
495   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
496
497 //
498 // Floor  Ceil
499 //
500
501 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
502           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
503 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
504           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
505 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
506           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
507
508 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
509           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
510 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
511           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
512 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
513           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
514
515 //
516 // Abs
517 //
518
519 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
520   Float32Regs, int_nvvm_fabs_ftz_f>;
521 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
522   Float32Regs, int_nvvm_fabs_f>;
523
524 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
525   Float64Regs, int_nvvm_fabs_d>;
526
527 //
528 // Round
529 //
530
531 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
532           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
533 def : Pat<(int_nvvm_round_f Float32Regs:$a),
534           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
535 def : Pat<(int_nvvm_round_d Float64Regs:$a),
536           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
537
538 //
539 // Trunc
540 //
541
542 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
543           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
544 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
545           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
546 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
547           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
548
549 //
550 // Saturate
551 //
552
553 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
554           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
555 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
556           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
557 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
558           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
559
560 //
561 // Exp2  Log2
562 //
563
564 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
565   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
566 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
567   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
568 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
569   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
570
571 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
572   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
573 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
574   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
575 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
576   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
577
578 //
579 // Sin  Cos
580 //
581
582 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
583   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
584 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
585   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
586
587 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
588   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
589 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
590   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
591
592 //
593 // Fma
594 //
595
596 def INT_NVVM_FMA_RN_FTZ_F
597   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
598     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
599 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
600   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
601 def INT_NVVM_FMA_RZ_FTZ_F
602   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
603     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
604 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
605   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
606 def INT_NVVM_FMA_RM_FTZ_F
607   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
608     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
609 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
610   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
611 def INT_NVVM_FMA_RP_FTZ_F
612   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
613     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
614 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
615   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
616
617 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
618   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
619 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
620   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
621 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
622   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
623 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
624   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
625
626 //
627 // Rcp
628 //
629
630 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
631   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
632 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
633   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
634 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
635   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
636 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
637   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
638 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
639   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
640 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
641   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
642 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
643   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
644 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
645   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
646
647 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
648   Float64Regs, int_nvvm_rcp_rn_d>;
649 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
650   Float64Regs, int_nvvm_rcp_rz_d>;
651 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
652   Float64Regs, int_nvvm_rcp_rm_d>;
653 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
654   Float64Regs, int_nvvm_rcp_rp_d>;
655
656 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
657   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
658
659 //
660 // Sqrt
661 //
662
663 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
664   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
665 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
666   Float32Regs, int_nvvm_sqrt_rn_f>;
667 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
668   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
669 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
670   Float32Regs, int_nvvm_sqrt_rz_f>;
671 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
672   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
673 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
674   Float32Regs, int_nvvm_sqrt_rm_f>;
675 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
676   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
677 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
678   Float32Regs, int_nvvm_sqrt_rp_f>;
679 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
680   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
681 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
682   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
683
684 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
685   Float64Regs, int_nvvm_sqrt_rn_d>;
686 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
687   Float64Regs, int_nvvm_sqrt_rz_d>;
688 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
689   Float64Regs, int_nvvm_sqrt_rm_d>;
690 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
691   Float64Regs, int_nvvm_sqrt_rp_d>;
692
693 // nvvm_sqrt intrinsic
694 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
695           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
696 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
697           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
698 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
699           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
700 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
701           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
702
703 //
704 // Rsqrt
705 //
706
707 def INT_NVVM_RSQRT_APPROX_FTZ_F
708   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
709     int_nvvm_rsqrt_approx_ftz_f>;
710 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
711   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
712 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
713   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
714
715 //
716 // Add
717 //
718
719 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
720   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
721 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
722   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
723 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
724   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
725 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
726   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
727 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
728   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
729 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
730   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
731 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
732   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
733 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
734   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
735
736 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
737   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
738 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
739   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
740 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
741   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
742 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
743   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
744
745 //
746 // Convert
747 //
748
749 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
750           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
751 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
752           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
753 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
754           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
755 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
756           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
757 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
758           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
759 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
760           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
761 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
762           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
763 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
764           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
765
766 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
767           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
768 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
769           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
770 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
771           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
772 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
773           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
774
775 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
776           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
777 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
778           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
779 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
780           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
781 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
782           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
783
784 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
785           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
786 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
787           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
788 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
789           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
790 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
791           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
792
793 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
794           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
795 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
796           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
797 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
798           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
799 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
800           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
801
802 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
803           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
804 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
805           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
806 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
807           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
808 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
809           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
810 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
811           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
812 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
813           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
814 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
815           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
816 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
817           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
818
819 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
820           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
821 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
822           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
823 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
824           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
825 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
826           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
827 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
828           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
829 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
830           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
831 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
832           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
833 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
834           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
835
836 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
837           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
838 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
839           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
840 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
841           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
842 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
843           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
844
845 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
846           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
847 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
848           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
849 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
850           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
851 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
852           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
853
854 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
855   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
856
857 def INT_NVVM_D2I_LO : F_MATH_1<
858   !strconcat("{{\n\t",
859              ".reg .b32 %temp; \n\t",
860              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
861              "}}"),
862   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
863 def INT_NVVM_D2I_HI : F_MATH_1<
864   !strconcat("{{\n\t",
865              ".reg .b32 %temp; \n\t",
866              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
867              "}}"),
868   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
869
870 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
871           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
872 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
873           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
874 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
875           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
876 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
877           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
878 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
879           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
880 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
881           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
882 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
883           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
884 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
885           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
886
887 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
888           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
889 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
890           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
891 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
892           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
893 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
894           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
895 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
896           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
897 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
898           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
899 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
900           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
901 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
902           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
903
904 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
905           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
906 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
907           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
908 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
909           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
910 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
911           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
912
913 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
914           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
915 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
916           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
917 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
918           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
919 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
920           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
921
922 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
923           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
924 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
925           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
926 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
927           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
928 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
929           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
930
931 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
932           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
933 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
934           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
935 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
936           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
937 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
938           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
939
940 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
941           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
942 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
943           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
944 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
945           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
946 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
947           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
948
949 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
950           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
951 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
952           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
953 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
954           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
955 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
956           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
957
958
959 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
960           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
961 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
962           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
963
964 //
965 // Bitcast
966 //
967
968 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
969   Float32Regs, int_nvvm_bitcast_f2i>;
970 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
971   Int32Regs, int_nvvm_bitcast_i2f>;
972
973 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
974   Int64Regs, int_nvvm_bitcast_ll2d>;
975 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
976   Float64Regs, int_nvvm_bitcast_d2ll>;
977
978 //
979 // FNS
980 //
981
982 class INT_FNS_MBO<dag ins, dag Operands>
983   : NVPTXInst<(outs Int32Regs:$dst), ins,
984                "fns.b32 \t$dst, $mask, $base, $offset;",
985                [(set Int32Regs:$dst, Operands )]>,
986     Requires<[hasPTX60, hasSM30]>;
987
988 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
989                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
990 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
991                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
992 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
993                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
994 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
995                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
996 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
997                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
998 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
999                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1000 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1001                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1002 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1003                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1004
1005 //-----------------------------------
1006 // Atomic Functions
1007 //-----------------------------------
1008
1009 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1010  : PatFrag<ops, frag, [{
1011    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
1012 }]>;
1013 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1014  : PatFrag<ops, frag, [{
1015    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
1016 }]>;
1017 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1018  : PatFrag<ops, frag, [{
1019    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
1020 }]>;
1021
1022 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1023   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1024   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1025   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1026     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1027     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1028   Requires<Pred>;
1029   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1030     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1031     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1032   Requires<Pred>;
1033 }
1034 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1035   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1036   list<Predicate> Pred = []> {
1037   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1038     IntOp, IMMType, IMM, Pred>;
1039   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1040     IntOp, IMMType, IMM, Pred>;
1041 }
1042
1043 // has 2 operands, neg the second one
1044 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1045   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1046   Operand IMMType, list<Predicate> Pred> {
1047   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1048     !strconcat(
1049       "{{ \n\t",
1050       ".reg \t.s", TypeStr, " temp; \n\t",
1051       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1052       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1053       "}}"),
1054     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1055   Requires<Pred>;
1056 }
1057 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1058   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1059   list<Predicate> Pred = []> {
1060  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1061    IntOp, IMMType, Pred> ;
1062  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1063    IntOp, IMMType, Pred> ;
1064 }
1065
1066 // has 3 operands
1067 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1068   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1069   Operand IMMType, list<Predicate> Pred> {
1070   def reg : NVPTXInst<(outs regclass:$dst),
1071     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1072     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1073     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1074   Requires<Pred>;
1075
1076   def imm1 : NVPTXInst<(outs regclass:$dst),
1077     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1078     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1079     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1080   Requires<Pred>;
1081
1082   def imm2 : NVPTXInst<(outs regclass:$dst),
1083     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1084     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1085     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1086   Requires<Pred>;
1087
1088   def imm3 : NVPTXInst<(outs regclass:$dst),
1089     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1090     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1091     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1092   Requires<Pred>;
1093 }
1094 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1095   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1096   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1097     IntOp, IMMType, Pred>;
1098   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1099     IntOp, IMMType, Pred>;
1100 }
1101
1102 // atom_add
1103
1104 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1105   (atomic_load_add_32 node:$a, node:$b)>;
1106 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1107   (atomic_load_add_32 node:$a, node:$b)>;
1108 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1109   (atomic_load_add_32 node:$a, node:$b)>;
1110 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1111   (atomic_load_add_64 node:$a, node:$b)>;
1112 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1113   (atomic_load_add_64 node:$a, node:$b)>;
1114 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1115   (atomic_load_add_64 node:$a, node:$b)>;
1116 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1117   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1118 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1119   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1120 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1121   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1122 def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1123   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1124 def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1125   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1126 def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1127   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1128
1129 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1130   atomic_load_add_32_g, i32imm, imm>;
1131 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1132   atomic_load_add_32_s, i32imm, imm>;
1133 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1134   atomic_load_add_32_gen, i32imm, imm>;
1135 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1136   ".add", atomic_load_add_32_gen, i32imm, imm>;
1137
1138 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1139   atomic_load_add_64_g, i64imm, imm>;
1140 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1141   atomic_load_add_64_s, i64imm, imm>;
1142 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1143   atomic_load_add_64_gen, i64imm, imm>;
1144 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1145   ".add", atomic_load_add_64_gen, i64imm, imm>;
1146
1147 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1148   atomic_load_add_f32_g, f32imm, fpimm>;
1149 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1150   atomic_load_add_f32_s, f32imm, fpimm>;
1151 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1152   atomic_load_add_f32_gen, f32imm, fpimm>;
1153
1154 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1155   atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
1156 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1157   atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
1158 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1159   atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
1160
1161 // atom_sub
1162
1163 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1164   (atomic_load_sub_32 node:$a, node:$b)>;
1165 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1166   (atomic_load_sub_32 node:$a, node:$b)>;
1167 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1168   (atomic_load_sub_32 node:$a, node:$b)>;
1169 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1170   (atomic_load_sub_64 node:$a, node:$b)>;
1171 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1172   (atomic_load_sub_64 node:$a, node:$b)>;
1173 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1174   (atomic_load_sub_64 node:$a, node:$b)>;
1175
1176 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1177   atomic_load_sub_32_g, i32imm>;
1178 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1179   atomic_load_sub_64_g, i64imm>;
1180 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1181   atomic_load_sub_32_gen, i32imm>;
1182 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1183   ".add", atomic_load_sub_32_gen, i32imm>;
1184 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1185   atomic_load_sub_32_s, i32imm>;
1186 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1187   atomic_load_sub_64_s, i64imm>;
1188 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1189   atomic_load_sub_64_gen, i64imm>;
1190 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1191   ".add", atomic_load_sub_64_gen, i64imm>;
1192
1193 // atom_swap
1194
1195 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1196   (atomic_swap_32 node:$a, node:$b)>;
1197 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1198   (atomic_swap_32 node:$a, node:$b)>;
1199 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1200   (atomic_swap_32 node:$a, node:$b)>;
1201 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1202   (atomic_swap_64 node:$a, node:$b)>;
1203 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1204   (atomic_swap_64 node:$a, node:$b)>;
1205 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1206   (atomic_swap_64 node:$a, node:$b)>;
1207
1208 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1209   atomic_swap_32_g, i32imm, imm>;
1210 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1211   atomic_swap_32_s, i32imm, imm>;
1212 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1213   atomic_swap_32_gen, i32imm, imm>;
1214 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1215   ".exch", atomic_swap_32_gen, i32imm, imm>;
1216 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1217   atomic_swap_64_g, i64imm, imm>;
1218 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1219   atomic_swap_64_s, i64imm, imm>;
1220 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1221   atomic_swap_64_gen, i64imm, imm>;
1222 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1223   ".exch", atomic_swap_64_gen, i64imm, imm>;
1224
1225 // atom_max
1226
1227 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1228   , (atomic_load_max_32 node:$a, node:$b)>;
1229 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1230   (atomic_load_max_32 node:$a, node:$b)>;
1231 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1232   (atomic_load_max_32 node:$a, node:$b)>;
1233 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1234   , (atomic_load_max_64 node:$a, node:$b)>;
1235 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1236   (atomic_load_max_64 node:$a, node:$b)>;
1237 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1238   (atomic_load_max_64 node:$a, node:$b)>;
1239 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1240   (atomic_load_umax_32 node:$a, node:$b)>;
1241 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1242   (atomic_load_umax_32 node:$a, node:$b)>;
1243 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1244   (atomic_load_umax_32 node:$a, node:$b)>;
1245 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1246   (atomic_load_umax_64 node:$a, node:$b)>;
1247 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1248   (atomic_load_umax_64 node:$a, node:$b)>;
1249 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1250   (atomic_load_umax_64 node:$a, node:$b)>;
1251
1252 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1253   ".max", atomic_load_max_32_g, i32imm, imm>;
1254 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1255   ".max", atomic_load_max_32_s, i32imm, imm>;
1256 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1257   atomic_load_max_32_gen, i32imm, imm>;
1258 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1259   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1260 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1261   ".max", atomic_load_max_64_g, i64imm, imm>;
1262 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1263   ".max", atomic_load_max_64_s, i64imm, imm>;
1264 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1265   atomic_load_max_64_gen, i64imm, imm>;
1266 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1267   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1268 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1269   ".max", atomic_load_umax_32_g, i32imm, imm>;
1270 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1271   ".max", atomic_load_umax_32_s, i32imm, imm>;
1272 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1273   atomic_load_umax_32_gen, i32imm, imm>;
1274 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1275   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1276 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1277   ".max", atomic_load_umax_64_g, i64imm, imm>;
1278 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1279   ".max", atomic_load_umax_64_s, i64imm, imm>;
1280 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1281   atomic_load_umax_64_gen, i64imm, imm>;
1282 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1283   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1284
1285 // atom_min
1286
1287 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1288   (atomic_load_min_32 node:$a, node:$b)>;
1289 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1290   (atomic_load_min_32 node:$a, node:$b)>;
1291 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1292   (atomic_load_min_32 node:$a, node:$b)>;
1293 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1294   (atomic_load_min_64 node:$a, node:$b)>;
1295 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1296   (atomic_load_min_64 node:$a, node:$b)>;
1297 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1298   (atomic_load_min_64 node:$a, node:$b)>;
1299 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1300   (atomic_load_umin_32 node:$a, node:$b)>;
1301 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1302   (atomic_load_umin_32 node:$a, node:$b)>;
1303 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1304   (atomic_load_umin_32 node:$a, node:$b)>;
1305 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1306   (atomic_load_umin_64 node:$a, node:$b)>;
1307 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1308   (atomic_load_umin_64 node:$a, node:$b)>;
1309 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1310   (atomic_load_umin_64 node:$a, node:$b)>;
1311
1312 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1313   ".min", atomic_load_min_32_g, i32imm, imm>;
1314 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1315   ".min", atomic_load_min_32_s, i32imm, imm>;
1316 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1317   atomic_load_min_32_gen, i32imm, imm>;
1318 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1319   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1320 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1321   ".min", atomic_load_min_64_g, i64imm, imm>;
1322 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1323   ".min", atomic_load_min_64_s, i64imm, imm>;
1324 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1325   atomic_load_min_64_gen, i64imm, imm>;
1326 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1327   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1328 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1329   ".min", atomic_load_umin_32_g, i32imm, imm>;
1330 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1331   ".min", atomic_load_umin_32_s, i32imm, imm>;
1332 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1333   atomic_load_umin_32_gen, i32imm, imm>;
1334 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1335   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1336 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1337   ".min", atomic_load_umin_64_g, i64imm, imm>;
1338 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1339   ".min", atomic_load_umin_64_s, i64imm, imm>;
1340 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1341   atomic_load_umin_64_gen, i64imm, imm>;
1342 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1343   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1344
1345 // atom_inc  atom_dec
1346
1347 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1348   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1349 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1350   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1351 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1352   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1353 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1354   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1355 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1356   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1357 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1358   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1359
1360 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1361   atomic_load_inc_32_g, i32imm, imm>;
1362 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1363   atomic_load_inc_32_s, i32imm, imm>;
1364 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1365   atomic_load_inc_32_gen, i32imm, imm>;
1366 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1367   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1368 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1369   atomic_load_dec_32_g, i32imm, imm>;
1370 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1371   atomic_load_dec_32_s, i32imm, imm>;
1372 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1373   atomic_load_dec_32_gen, i32imm, imm>;
1374 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1375   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1376
1377 // atom_and
1378
1379 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1380   (atomic_load_and_32 node:$a, node:$b)>;
1381 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1382   (atomic_load_and_32 node:$a, node:$b)>;
1383 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1384   (atomic_load_and_32 node:$a, node:$b)>;
1385 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1386   (atomic_load_and_64 node:$a, node:$b)>;
1387 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1388   (atomic_load_and_64 node:$a, node:$b)>;
1389 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1390   (atomic_load_and_64 node:$a, node:$b)>;
1391
1392 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1393   atomic_load_and_32_g, i32imm, imm>;
1394 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1395   atomic_load_and_32_s, i32imm, imm>;
1396 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1397   atomic_load_and_32_gen, i32imm, imm>;
1398 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1399   ".and", atomic_load_and_32_gen, i32imm, imm>;
1400 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1401   atomic_load_and_64_g, i64imm, imm>;
1402 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1403   atomic_load_and_64_s, i64imm, imm>;
1404 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1405   atomic_load_and_64_gen, i64imm, imm>;
1406 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1407   ".and", atomic_load_and_64_gen, i64imm, imm>;
1408
1409 // atom_or
1410
1411 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1412   (atomic_load_or_32 node:$a, node:$b)>;
1413 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1414   (atomic_load_or_32 node:$a, node:$b)>;
1415 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1416   (atomic_load_or_32 node:$a, node:$b)>;
1417 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1418   (atomic_load_or_64 node:$a, node:$b)>;
1419 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1420   (atomic_load_or_64 node:$a, node:$b)>;
1421 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1422   (atomic_load_or_64 node:$a, node:$b)>;
1423
1424 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1425   atomic_load_or_32_g, i32imm, imm>;
1426 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1427   atomic_load_or_32_gen, i32imm, imm>;
1428 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1429   ".or", atomic_load_or_32_gen, i32imm, imm>;
1430 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1431   atomic_load_or_32_s, i32imm, imm>;
1432 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1433   atomic_load_or_64_g, i64imm, imm>;
1434 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1435   atomic_load_or_64_gen, i64imm, imm>;
1436 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1437   ".or", atomic_load_or_64_gen, i64imm, imm>;
1438 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1439   atomic_load_or_64_s, i64imm, imm>;
1440
1441 // atom_xor
1442
1443 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1444   (atomic_load_xor_32 node:$a, node:$b)>;
1445 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1446   (atomic_load_xor_32 node:$a, node:$b)>;
1447 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1448   (atomic_load_xor_32 node:$a, node:$b)>;
1449 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1450   (atomic_load_xor_64 node:$a, node:$b)>;
1451 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1452   (atomic_load_xor_64 node:$a, node:$b)>;
1453 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1454   (atomic_load_xor_64 node:$a, node:$b)>;
1455
1456 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1457   atomic_load_xor_32_g, i32imm, imm>;
1458 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1459   atomic_load_xor_32_s, i32imm, imm>;
1460 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1461   atomic_load_xor_32_gen, i32imm, imm>;
1462 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1463   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1464 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1465   atomic_load_xor_64_g, i64imm, imm>;
1466 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1467   atomic_load_xor_64_s, i64imm, imm>;
1468 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1469   atomic_load_xor_64_gen, i64imm, imm>;
1470 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1471   ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1472
1473 // atom_cas
1474
1475 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1476   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1477 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1478   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1479 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1480   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1481 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1482   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1483 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1484   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1485 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1486   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1487
1488 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1489   atomic_cmp_swap_32_g, i32imm>;
1490 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1491   atomic_cmp_swap_32_s, i32imm>;
1492 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1493   atomic_cmp_swap_32_gen, i32imm>;
1494 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1495   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1496 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1497   atomic_cmp_swap_64_g, i64imm>;
1498 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1499   atomic_cmp_swap_64_s, i64imm>;
1500 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1501   atomic_cmp_swap_64_gen, i64imm>;
1502 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1503   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1504
1505 // Support for scoped atomic operations.  Matches
1506 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1507 // and converts it into the appropriate instruction.
1508 // NOTE: not all possible combinations are implemented
1509 //  'space' is limited to generic as it's the only one needed to support CUDA.
1510 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1511 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1512                   dag ins, dag Operands>
1513       : NVPTXInst<(outs regclass:$result), ins,
1514                   AsmStr,
1515                   [(set regclass:$result, Operands)]>,
1516         Requires<Preds>;
1517
1518 // Define instruction variants for all addressing modes.
1519 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1520                        NVPTXRegClass regclass, Operand ImmType,
1521                        SDNode Imm, ValueType ImmTy,
1522                        list<Predicate> Preds> {
1523   let AddedComplexity = 1 in {
1524     def : ATOM23_impl<AsmStr, regclass, Preds,
1525                       (ins Int32Regs:$src, regclass:$b),
1526                       (Intr Int32Regs:$src, regclass:$b)>;
1527     def : ATOM23_impl<AsmStr, regclass, Preds,
1528                       (ins Int64Regs:$src, regclass:$b),
1529                       (Intr Int64Regs:$src, regclass:$b)>;
1530   }
1531   // tablegen can't infer argument types from Intrinsic (though it can
1532   // from Instruction) so we have to enforce specific type on
1533   // immediates via explicit cast to ImmTy.
1534   def : ATOM23_impl<AsmStr, regclass, Preds,
1535                     (ins Int32Regs:$src, ImmType:$b),
1536                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1537   def : ATOM23_impl<AsmStr, regclass, Preds,
1538                     (ins Int64Regs:$src, ImmType:$b),
1539                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1540 }
1541
1542 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1543                        NVPTXRegClass regclass, Operand ImmType,
1544                        SDNode Imm, ValueType ImmTy,
1545                        list<Predicate> Preds> {
1546   // Variants for register/immediate permutations of $b and $c
1547   let AddedComplexity = 2 in {
1548     def : ATOM23_impl<AsmStr, regclass, Preds,
1549                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1550                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1551     def : ATOM23_impl<AsmStr, regclass, Preds,
1552                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1553                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1554   }
1555   let AddedComplexity = 1 in {
1556     def : ATOM23_impl<AsmStr, regclass, Preds,
1557                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1558                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1559     def : ATOM23_impl<AsmStr, regclass, Preds,
1560                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1561                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1562     def : ATOM23_impl<AsmStr, regclass, Preds,
1563                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1564                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1565     def : ATOM23_impl<AsmStr, regclass, Preds,
1566                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1567                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1568   }
1569   def : ATOM23_impl<AsmStr, regclass, Preds,
1570                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1571                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1572   def : ATOM23_impl<AsmStr, regclass, Preds,
1573                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1574                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1575 }
1576
1577 // Constructs instrinsic name and instruction asm strings.
1578 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1579                        string ScopeStr, string SpaceStr,
1580                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1581                        ValueType ImmTy, list<Predicate> Preds> {
1582   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1583                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1584                             # "." # OpStr # "." # TypeStr
1585                             # " \t$result, [$src], $b;",
1586                      !cast<Intrinsic>(
1587                             "int_nvvm_atomic_" # OpStr
1588                             # "_" # SpaceStr # "_" # IntTypeStr
1589                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1590                      regclass, ImmType, Imm, ImmTy, Preds>;
1591 }
1592 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1593                        string ScopeStr, string SpaceStr,
1594                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1595                        ValueType ImmTy, list<Predicate> Preds> {
1596   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1597                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1598                             # "." # OpStr # "." # TypeStr
1599                             # " \t$result, [$src], $b, $c;",
1600                      !cast<Intrinsic>(
1601                             "int_nvvm_atomic_" # OpStr
1602                             # "_" # SpaceStr # "_" # IntTypeStr
1603                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1604                      regclass, ImmType, Imm, ImmTy, Preds>;
1605 }
1606
1607 // Constructs variants for different address spaces.
1608 // For now we only need variants for generic space pointers.
1609 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1610                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1611                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1612    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1613                             regclass, ImmType, Imm, ImmTy, Preds>;
1614 }
1615 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1616                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1617                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1618    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1619                             regclass, ImmType, Imm, ImmTy, Preds>;
1620 }
1621
1622 // Constructs variants for different scopes of atomic op.
1623 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1624                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1625                        ValueType ImmTy, list<Predicate> Preds> {
1626    // .gpu scope is default and is currently covered by existing
1627    // atomics w/o explicitly specified scope.
1628    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1629                            regclass, ImmType, Imm, ImmTy,
1630                            !listconcat(Preds,[hasAtomScope])>;
1631    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1632                            regclass, ImmType, Imm, ImmTy,
1633                            !listconcat(Preds,[hasAtomScope])>;
1634 }
1635 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1636            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1637            list<Predicate> Preds> {
1638    // No need to define ".gpu"-scoped atomics.  They do the same thing
1639    // as the regular, non-scoped atomics defined elsewhere.
1640    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1641                            regclass, ImmType, Imm, ImmTy,
1642                            !listconcat(Preds,[hasAtomScope])>;
1643    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1644                            regclass, ImmType, Imm, ImmTy,
1645                            !listconcat(Preds,[hasAtomScope])>;
1646 }
1647
1648 // atom.add
1649 multiclass ATOM2_add_impl<string OpStr> {
1650    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1651    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1652    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1653    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1654                             []>;
1655    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1656                             [hasAtomAddF64]>;
1657 }
1658
1659 // atom.{and,or,xor}
1660 multiclass ATOM2_bitwise_impl<string OpStr> {
1661    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1662    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1663                             [hasAtomBitwise64]>;
1664 }
1665
1666 // atom.exch
1667 multiclass ATOM2_exch_impl<string OpStr> {
1668    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1669    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1670 }
1671
1672 // atom.{min,max}
1673 multiclass ATOM2_minmax_impl<string OpStr> {
1674    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1675    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1676    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1677                             [hasAtomMinMax64]>;
1678    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1679                             [hasAtomMinMax64]>;
1680 }
1681
1682 // atom.{inc,dec}
1683 multiclass ATOM2_incdec_impl<string OpStr> {
1684    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1685 }
1686
1687 // atom.cas
1688 multiclass ATOM3_cas_impl<string OpStr> {
1689    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1690    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1691 }
1692
1693 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1694 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1695 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1696 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1697 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1698 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1699 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1700 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1701 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1702 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1703
1704 //-----------------------------------
1705 // Support for ldu on sm_20 or later
1706 //-----------------------------------
1707
1708 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1709 // read-only in a kernel.
1710
1711 // Scalar
1712
1713 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1714   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1715                !strconcat("ldu.global.", TyStr),
1716                       []>, Requires<[hasLDU]>;
1717   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1718                !strconcat("ldu.global.", TyStr),
1719                         []>, Requires<[hasLDU]>;
1720  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1721                !strconcat("ldu.global.", TyStr),
1722                       []>, Requires<[hasLDU]>;
1723  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1724                !strconcat("ldu.global.", TyStr),
1725                       []>, Requires<[hasLDU]>;
1726  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1727                !strconcat("ldu.global.", TyStr),
1728                         []>, Requires<[hasLDU]>;
1729 }
1730
1731 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1732 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1733 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1734 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1735 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1736 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1737 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1738 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1739 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1740 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1741
1742 // vector
1743
1744 // Elementized vector ldu
1745 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1746  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1747                      (ins Int32Regs:$src),
1748                      !strconcat("ldu.global.", TyStr), []>;
1749  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1750                      (ins Int64Regs:$src),
1751                      !strconcat("ldu.global.", TyStr), []>;
1752  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1753                      (ins MEMri:$src),
1754                      !strconcat("ldu.global.", TyStr), []>;
1755  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1756                      (ins MEMri64:$src),
1757                      !strconcat("ldu.global.", TyStr), []>;
1758  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1759                      (ins imemAny:$src),
1760                      !strconcat("ldu.global.", TyStr), []>;
1761 }
1762
1763 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1764  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1765                             regclass:$dst4), (ins Int32Regs:$src), 
1766                !strconcat("ldu.global.", TyStr), []>;
1767  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1768                             regclass:$dst4), (ins Int64Regs:$src), 
1769                !strconcat("ldu.global.", TyStr), []>;
1770  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1771                             regclass:$dst4), (ins MEMri:$src), 
1772                !strconcat("ldu.global.", TyStr), []>;
1773  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1774                             regclass:$dst4), (ins MEMri64:$src), 
1775                !strconcat("ldu.global.", TyStr), []>;
1776  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1777                             regclass:$dst4), (ins imemAny:$src), 
1778                !strconcat("ldu.global.", TyStr), []>;
1779 }
1780
1781 defm INT_PTX_LDU_G_v2i8_ELE
1782   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1783 defm INT_PTX_LDU_G_v2i16_ELE
1784   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1785 defm INT_PTX_LDU_G_v2i32_ELE
1786   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1787 defm INT_PTX_LDU_G_v2f16_ELE
1788   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1789 defm INT_PTX_LDU_G_v2f16x2_ELE
1790   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1791 defm INT_PTX_LDU_G_v2f32_ELE
1792   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1793 defm INT_PTX_LDU_G_v2i64_ELE
1794   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1795 defm INT_PTX_LDU_G_v2f64_ELE
1796   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1797 defm INT_PTX_LDU_G_v4i8_ELE
1798   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1799 defm INT_PTX_LDU_G_v4i16_ELE
1800   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1801     Int16Regs>;
1802 defm INT_PTX_LDU_G_v4i32_ELE
1803   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1804     Int32Regs>;
1805 defm INT_PTX_LDU_G_v4f16_ELE
1806   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1807     Float16Regs>;
1808 defm INT_PTX_LDU_G_v4f16x2_ELE
1809   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1810     Float16x2Regs>;
1811 defm INT_PTX_LDU_G_v4f32_ELE
1812   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1813     Float32Regs>;
1814
1815
1816 //-----------------------------------
1817 // Support for ldg on sm_35 or later 
1818 //-----------------------------------
1819
1820 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1821 // non-coherent texture cache, and therefore the values read must be read-only
1822 // during the lifetime of the kernel.
1823
1824 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1825   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1826                !strconcat("ld.global.nc.", TyStr),
1827                       []>, Requires<[hasLDG]>;
1828   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1829                !strconcat("ld.global.nc.", TyStr),
1830                         []>, Requires<[hasLDG]>;
1831  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1832                !strconcat("ld.global.nc.", TyStr),
1833                       []>, Requires<[hasLDG]>;
1834  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1835                !strconcat("ld.global.nc.", TyStr),
1836                       []>, Requires<[hasLDG]>;
1837  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1838                !strconcat("ld.global.nc.", TyStr),
1839                         []>, Requires<[hasLDG]>;
1840 }
1841
1842 defm INT_PTX_LDG_GLOBAL_i8
1843   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1844 defm INT_PTX_LDG_GLOBAL_i16
1845   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1846 defm INT_PTX_LDG_GLOBAL_i32
1847   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1848 defm INT_PTX_LDG_GLOBAL_i64
1849   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1850 defm INT_PTX_LDG_GLOBAL_f16
1851   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1852 defm INT_PTX_LDG_GLOBAL_f16x2
1853   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1854 defm INT_PTX_LDG_GLOBAL_f32
1855   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1856 defm INT_PTX_LDG_GLOBAL_f64
1857   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1858 defm INT_PTX_LDG_GLOBAL_p32
1859   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1860 defm INT_PTX_LDG_GLOBAL_p64
1861   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1862
1863 // vector
1864
1865 // Elementized vector ldg 
1866 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1867  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1868                      (ins Int32Regs:$src),
1869                      !strconcat("ld.global.nc.", TyStr), []>;
1870  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1871                      (ins Int64Regs:$src),
1872                      !strconcat("ld.global.nc.", TyStr), []>;
1873  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1874                      (ins MEMri:$src),
1875                      !strconcat("ld.global.nc.", TyStr), []>;
1876  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1877                      (ins MEMri64:$src),
1878                      !strconcat("ld.global.nc.", TyStr), []>;
1879  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1880                      (ins imemAny:$src),
1881                      !strconcat("ld.global.nc.", TyStr), []>;
1882 }
1883
1884 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1885   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1886                               regclass:$dst4), (ins Int32Regs:$src), 
1887                !strconcat("ld.global.nc.", TyStr), []>;
1888   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1889                                regclass:$dst4), (ins Int64Regs:$src), 
1890                !strconcat("ld.global.nc.", TyStr), []>;
1891   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1892                               regclass:$dst4), (ins MEMri:$src), 
1893                !strconcat("ld.global.nc.", TyStr), []>;
1894   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1895                               regclass:$dst4), (ins MEMri64:$src), 
1896                !strconcat("ld.global.nc.", TyStr), []>;
1897   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1898                              regclass:$dst4), (ins imemAny:$src), 
1899                !strconcat("ld.global.nc.", TyStr), []>;
1900 }
1901
1902 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1903 defm INT_PTX_LDG_G_v2i8_ELE
1904   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1905 defm INT_PTX_LDG_G_v2i16_ELE
1906   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1907 defm INT_PTX_LDG_G_v2i32_ELE
1908   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1909 defm INT_PTX_LDG_G_v2f16_ELE
1910   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1911 defm INT_PTX_LDG_G_v2f16x2_ELE
1912   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1913 defm INT_PTX_LDG_G_v2f32_ELE
1914   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1915 defm INT_PTX_LDG_G_v2i64_ELE
1916   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1917 defm INT_PTX_LDG_G_v2f64_ELE
1918   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1919 defm INT_PTX_LDG_G_v4i8_ELE
1920   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1921 defm INT_PTX_LDG_G_v4i16_ELE
1922   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1923 defm INT_PTX_LDG_G_v4i32_ELE
1924   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1925 defm INT_PTX_LDG_G_v4f16_ELE
1926   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1927 defm INT_PTX_LDG_G_v4f16x2_ELE
1928   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1929 defm INT_PTX_LDG_G_v4f32_ELE
1930   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1931
1932
1933 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1934    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1935           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1936       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1937    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1938           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1939       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1940    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1941           "{{ .reg .b64 %tmp;\n\t"
1942           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
1943           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
1944       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1945       Requires<[useShortPtr]>;
1946 }
1947
1948 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1949    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1950           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1951       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1952    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1953           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1954       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1955    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1956           "{{ .reg .b64 %tmp;\n\t"
1957           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1958           #"  cvt.u32.u64 \t$result, %tmp; }}",
1959       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1960       Requires<[useShortPtr]>;
1961 }
1962
1963 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1964 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1965 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1966 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1967
1968 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1969 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1970 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1971 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1972
1973
1974 // nvvm.ptr.gen.to.param
1975 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1976   (ins Int32Regs:$src),
1977                         "mov.u32 \t$result, $src;",
1978                               [(set Int32Regs:$result,
1979                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1980 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1981   (ins Int64Regs:$src),
1982                         "mov.u64 \t$result, $src;",
1983                               [(set Int64Regs:$result,
1984                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1985
1986
1987 // nvvm.move intrinsicc
1988 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1989                              "mov.b16 \t$r, $s;",
1990                              [(set Int16Regs:$r,
1991                                (int_nvvm_move_i16 Int16Regs:$s))]>;
1992 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1993                              "mov.b32 \t$r, $s;",
1994                              [(set Int32Regs:$r,
1995                                (int_nvvm_move_i32 Int32Regs:$s))]>;
1996 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1997                              "mov.b64 \t$r, $s;",
1998                              [(set Int64Regs:$r,
1999                                (int_nvvm_move_i64 Int64Regs:$s))]>;
2000 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2001                              "mov.f32 \t$r, $s;",
2002                              [(set Float32Regs:$r,
2003                                (int_nvvm_move_float Float32Regs:$s))]>;
2004 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2005                              "mov.f64 \t$r, $s;",
2006                              [(set Float64Regs:$r,
2007                                (int_nvvm_move_double Float64Regs:$s))]>;
2008 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2009                              "mov.u32 \t$r, $s;",
2010                              [(set Int32Regs:$r,
2011                                (int_nvvm_move_ptr Int32Regs:$s))]>;
2012 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2013                              "mov.u64 \t$r, $s;",
2014                              [(set Int64Regs:$r,
2015                                (int_nvvm_move_ptr Int64Regs:$s))]>;
2016
2017 // @TODO: Are these actually needed, or will we always just see symbols
2018 // copied to registers first?
2019 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2020                              "mov.u32 \t$r, $s;",
2021                              [(set Int32Regs:$r,
2022                              (int_nvvm_move_ptr texternalsym:$s))]>;
2023 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2024                              "mov.u64 \t$r, $s;",
2025                              [(set Int64Regs:$r,
2026                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
2027
2028
2029 // MoveParam        %r1, param
2030 // ptr_local_to_gen %r2, %r1
2031 // ptr_gen_to_local %r3, %r2
2032 // ->
2033 // mov %r1, param
2034
2035 // @TODO: Revisit this.  There is a type
2036 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2037 // instructions are not currently defined. However, we can use the ptr
2038 // variants and the asm printer will do the right thing.
2039 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2040                 (MoveParam texternalsym:$src)))),
2041                (nvvm_move_ptr64  texternalsym:$src)>;
2042 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2043                 (MoveParam texternalsym:$src)))),
2044                (nvvm_move_ptr32  texternalsym:$src)>;
2045
2046 def texsurf_handles
2047   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2048               "mov.u64 \t$result, $src;", []>;
2049
2050 //-----------------------------------
2051 // Compiler Error Warn
2052 // - Just ignore them in codegen
2053 //-----------------------------------
2054
2055 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2056                 "// llvm.nvvm.compiler.warn()",
2057                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2058 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2059                 "// llvm.nvvm.compiler.warn()",
2060                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2061 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2062                 "// llvm.nvvm.compiler.error()",
2063                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2064 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2065                 "// llvm.nvvm.compiler.error()",
2066                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2067
2068
2069 // isspacep
2070
2071 def ISSPACEP_CONST_32
2072   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2073               "isspacep.const \t$d, $a;",
2074               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2075     Requires<[hasPTX31]>;
2076 def ISSPACEP_CONST_64
2077   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2078               "isspacep.const \t$d, $a;",
2079               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2080     Requires<[hasPTX31]>;
2081 def ISSPACEP_GLOBAL_32
2082   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2083               "isspacep.global \t$d, $a;",
2084               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2085 def ISSPACEP_GLOBAL_64
2086   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2087               "isspacep.global \t$d, $a;",
2088               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2089 def ISSPACEP_LOCAL_32
2090   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2091               "isspacep.local \t$d, $a;",
2092               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2093 def ISSPACEP_LOCAL_64
2094   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2095               "isspacep.local \t$d, $a;",
2096               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2097 def ISSPACEP_SHARED_32
2098   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2099               "isspacep.shared \t$d, $a;",
2100               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2101 def ISSPACEP_SHARED_64
2102   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2103               "isspacep.shared \t$d, $a;",
2104               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2105
2106
2107 // Special register reads
2108 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2109                             (ins SpecialRegs:$r),
2110                             "mov.b32 \t$d, $r;", []>;
2111
2112 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2113 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2114 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2115 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2116 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2117 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2118 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2119 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2120 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2121 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2122 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2123 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2124 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2125 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2126 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2144
2145
2146 // rotate builtin support
2147
2148 def ROTATE_B32_HW_IMM
2149   : NVPTXInst<(outs Int32Regs:$dst),
2150               (ins  Int32Regs:$src, i32imm:$amt),
2151               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2152               [(set Int32Regs:$dst,
2153                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2154               Requires<[hasHWROT32]> ;
2155
2156 def ROTATE_B32_HW_REG
2157   : NVPTXInst<(outs Int32Regs:$dst),
2158               (ins  Int32Regs:$src, Int32Regs:$amt),
2159               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2160               [(set Int32Regs:$dst,
2161                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2162               Requires<[hasHWROT32]> ;
2163
2164 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2165           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2166       Requires<[noHWROT32]> ;
2167
2168 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2169           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2170       Requires<[noHWROT32]> ;
2171
2172 let hasSideEffects = 0 in {
2173   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2174     !strconcat("{{\n\t",
2175                ".reg .b32 %dummy;\n\t",
2176                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2177                "}}"),
2178           []> ;
2179
2180   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2181     !strconcat("{{\n\t",
2182                ".reg .b32 %dummy;\n\t",
2183                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2184                "}}"),
2185           []> ;
2186 }
2187
2188 let hasSideEffects = 0 in {
2189   def PACK_TWO_INT32
2190     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2191                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2192 }
2193
2194 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2195           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2196                           (GET_LO_INT64 Int64Regs:$src))> ;
2197
2198 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2199 // no side effects.
2200 let hasSideEffects = 0 in {
2201   def SHF_L_WRAP_B32_IMM
2202     : NVPTXInst<(outs Int32Regs:$dst),
2203                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2204                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2205       Requires<[hasHWROT32]>;
2206
2207   def SHF_L_WRAP_B32_REG
2208     : NVPTXInst<(outs Int32Regs:$dst),
2209                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2210                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2211       Requires<[hasHWROT32]>;
2212
2213   def SHF_R_WRAP_B32_IMM
2214     : NVPTXInst<(outs Int32Regs:$dst),
2215                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2216                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2217       Requires<[hasHWROT32]>;
2218
2219   def SHF_R_WRAP_B32_REG
2220     : NVPTXInst<(outs Int32Regs:$dst),
2221                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2222                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2223       Requires<[hasHWROT32]>;
2224 }
2225
2226 // HW version of rotate 64
2227 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2228           (PACK_TWO_INT32
2229             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2230                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2231             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2232                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2233       Requires<[hasHWROT32]>;
2234
2235 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2236           (PACK_TWO_INT32
2237             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2238                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2239             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2240                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2241       Requires<[hasHWROT32]>;
2242
2243
2244 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2245           (PACK_TWO_INT32
2246             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2247                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2248             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2249                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2250       Requires<[hasHWROT32]>;
2251
2252 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2253           (PACK_TWO_INT32
2254             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2255                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2256             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2257                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2258       Requires<[hasHWROT32]>;
2259
2260 // SW version of rotate 64
2261 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2262           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2263       Requires<[noHWROT32]>;
2264 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2265           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2266       Requires<[noHWROT32]>;
2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2268           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2269       Requires<[noHWROT32]>;
2270 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2271           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2272       Requires<[noHWROT32]>;
2273
2274
2275 //-----------------------------------
2276 // Texture Intrinsics
2277 //-----------------------------------
2278
2279 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2280 // also defined in NVPTXReplaceImageHandles.cpp
2281
2282 // texmode_independent
2283 let IsTex = 1, IsTexModeUnified = 0 in {
2284 // Texture fetch instructions using handles
2285 def TEX_1D_F32_S32
2286   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2287                     Float32Regs:$b, Float32Regs:$a),
2288               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2289               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2290               []>;
2291 def TEX_1D_F32_F32
2292   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2293                     Float32Regs:$b, Float32Regs:$a),
2294               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2295               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2296               []>;
2297 def TEX_1D_F32_F32_LEVEL
2298   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2299                     Float32Regs:$b, Float32Regs:$a),
2300               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2301               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2302               "[$t, $s, \\{$x\\}], $lod;",
2303               []>;
2304 def TEX_1D_F32_F32_GRAD
2305   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2306                     Float32Regs:$b, Float32Regs:$a),
2307               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2308                    Float32Regs:$gradx, Float32Regs:$grady),
2309               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2310               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2311               []>;
2312 def TEX_1D_S32_S32
2313   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2314                     Int32Regs:$b, Int32Regs:$a),
2315               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2316               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2317               []>;
2318 def TEX_1D_S32_F32
2319   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2320                     Int32Regs:$b, Int32Regs:$a),
2321               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2322               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2323               []>;
2324 def TEX_1D_S32_F32_LEVEL
2325   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2326                     Int32Regs:$b, Int32Regs:$a),
2327               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2328                    Float32Regs:$lod),
2329               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2330               "[$t, $s, \\{$x\\}], $lod;",
2331               []>;
2332 def TEX_1D_S32_F32_GRAD
2333   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2334                     Int32Regs:$b, Int32Regs:$a),
2335               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2336                    Float32Regs:$gradx, Float32Regs:$grady),
2337               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2338               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2339               []>;
2340 def TEX_1D_U32_S32
2341   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2342                     Int32Regs:$b, Int32Regs:$a),
2343               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2344               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2345               []>;
2346 def TEX_1D_U32_F32
2347   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2348                     Int32Regs:$b, Int32Regs:$a),
2349               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2350               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2351               []>;
2352 def TEX_1D_U32_F32_LEVEL
2353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2354                     Int32Regs:$b, Int32Regs:$a),
2355               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2356                    Float32Regs:$lod),
2357               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2358               "[$t, $s, \\{$x\\}], $lod;",
2359               []>;
2360 def TEX_1D_U32_F32_GRAD
2361   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2362                     Int32Regs:$b, Int32Regs:$a),
2363               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2364                    Float32Regs:$gradx, Float32Regs:$grady),
2365               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2366               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2367               []>;
2368
2369 def TEX_1D_ARRAY_F32_S32
2370   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2371                     Float32Regs:$b, Float32Regs:$a),
2372               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2373               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2374               "[$t, $s, \\{$l, $x\\}];",
2375               []>;
2376 def TEX_1D_ARRAY_F32_F32
2377   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2378                     Float32Regs:$b, Float32Regs:$a),
2379               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2380               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2381               "[$t, $s, \\{$l, $x\\}];",
2382               []>;
2383 def TEX_1D_ARRAY_F32_F32_LEVEL
2384   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2385                     Float32Regs:$b, Float32Regs:$a),
2386               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2387                    Float32Regs:$lod),
2388               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2389               "[$t, $s, \\{$l, $x\\}], $lod;",
2390               []>;
2391 def TEX_1D_ARRAY_F32_F32_GRAD
2392   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2393                     Float32Regs:$b, Float32Regs:$a),
2394               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2395                    Float32Regs:$gradx, Float32Regs:$grady),
2396               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2397               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2398               []>;
2399 def TEX_1D_ARRAY_S32_S32
2400   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2401                     Int32Regs:$b, Int32Regs:$a),
2402               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2403               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2404               "[$t, $s, \\{$l, $x\\}];",
2405               []>;
2406 def TEX_1D_ARRAY_S32_F32
2407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2408                     Int32Regs:$b, Int32Regs:$a),
2409               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2410               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2411               "[$t, $s, \\{$l, $x\\}];",
2412               []>;
2413 def TEX_1D_ARRAY_S32_F32_LEVEL
2414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2415                     Int32Regs:$b, Int32Regs:$a),
2416               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2417                    Float32Regs:$lod),
2418               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2419               "[$t, $s, \\{$l, $x\\}], $lod;",
2420               []>;
2421 def TEX_1D_ARRAY_S32_F32_GRAD
2422   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2423                     Int32Regs:$b, Int32Regs:$a),
2424               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2425                    Float32Regs:$gradx, Float32Regs:$grady),
2426               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2427               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2428               []>;
2429 def TEX_1D_ARRAY_U32_S32
2430   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2431                     Int32Regs:$b, Int32Regs:$a),
2432               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2433               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2434               "[$t, $s, \\{$l, $x\\}];",
2435               []>;
2436 def TEX_1D_ARRAY_U32_F32
2437   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2438                     Int32Regs:$b, Int32Regs:$a),
2439               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2440               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2441               "[$t, $s, \\{$l, $x\\}];",
2442               []>;
2443 def TEX_1D_ARRAY_U32_F32_LEVEL
2444   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2445                     Int32Regs:$b, Int32Regs:$a),
2446               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2447                    Float32Regs:$lod),
2448               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2449               "[$t, $s, \\{$l, $x\\}], $lod;",
2450               []>;
2451 def TEX_1D_ARRAY_U32_F32_GRAD
2452   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2453                     Int32Regs:$b, Int32Regs:$a),
2454               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2455                    Float32Regs:$gradx, Float32Regs:$grady),
2456               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2457               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2458               []>;
2459
2460 def TEX_2D_F32_S32
2461   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2462                     Float32Regs:$b, Float32Regs:$a),
2463               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2464               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2465               "[$t, $s, \\{$x, $y\\}];",
2466               []>;
2467 def TEX_2D_F32_F32
2468   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2469                     Float32Regs:$b, Float32Regs:$a),
2470               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2471               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2472               "[$t, $s, \\{$x, $y\\}];",
2473               []>;
2474 def TEX_2D_F32_F32_LEVEL
2475   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476                     Float32Regs:$b, Float32Regs:$a),
2477               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2478                    Float32Regs:$lod),
2479               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2480               "[$t, $s, \\{$x, $y\\}], $lod;",
2481               []>;
2482 def TEX_2D_F32_F32_GRAD
2483   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484                     Float32Regs:$b, Float32Regs:$a),
2485               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2486                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2487                    Float32Regs:$grady0, Float32Regs:$grady1),
2488               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2489               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2490               "\\{$grady0, $grady1\\};",
2491               []>;
2492 def TEX_2D_S32_S32
2493   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2494                     Int32Regs:$b, Int32Regs:$a),
2495               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2496               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2497               "[$t, $s, \\{$x, $y\\}];",
2498               []>;
2499 def TEX_2D_S32_F32
2500   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2501                     Int32Regs:$b, Int32Regs:$a),
2502               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2503               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2504               "[$t, $s, \\{$x, $y\\}];",
2505               []>;
2506 def TEX_2D_S32_F32_LEVEL
2507   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2508                     Int32Regs:$b, Int32Regs:$a),
2509               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2510                    Float32Regs:$lod),
2511               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2512               "[$t, $s, \\{$x, $y\\}], $lod;",
2513               []>;
2514 def TEX_2D_S32_F32_GRAD
2515   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516                     Int32Regs:$b, Int32Regs:$a),
2517               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2518                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2519                    Float32Regs:$grady0, Float32Regs:$grady1),
2520               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2521               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2522               "\\{$grady0, $grady1\\};",
2523               []>;
2524 def TEX_2D_U32_S32
2525   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2526                     Int32Regs:$b, Int32Regs:$a),
2527               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2528               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2529               "[$t, $s, \\{$x, $y\\}];",
2530               []>;
2531 def TEX_2D_U32_F32
2532   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2533                     Int32Regs:$b, Int32Regs:$a),
2534               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2535               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2536               "[$t, $s, \\{$x, $y\\}];",
2537               []>;
2538 def TEX_2D_U32_F32_LEVEL
2539   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2540                     Int32Regs:$b, Int32Regs:$a),
2541               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2542                    Float32Regs:$lod),
2543               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2544               "[$t, $s, \\{$x, $y\\}], $lod;",
2545               []>;
2546 def TEX_2D_U32_F32_GRAD
2547   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2548                     Int32Regs:$b, Int32Regs:$a),
2549               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2550                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2551                    Float32Regs:$grady0, Float32Regs:$grady1),
2552               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2553               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2554               "\\{$grady0, $grady1\\};",
2555               []>;
2556
2557 def TEX_2D_ARRAY_F32_S32
2558   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2559                     Float32Regs:$b, Float32Regs:$a),
2560               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2561                    Int32Regs:$y),
2562               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2563               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2564               []>;
2565 def TEX_2D_ARRAY_F32_F32
2566   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2567                     Float32Regs:$b, Float32Regs:$a),
2568               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2569                    Float32Regs:$y),
2570               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2571               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2572               []>;
2573 def TEX_2D_ARRAY_F32_F32_LEVEL
2574   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2575                     Float32Regs:$b, Float32Regs:$a),
2576               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2577                    Float32Regs:$y, Float32Regs:$lod),
2578               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2579               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2580               []>;
2581 def TEX_2D_ARRAY_F32_F32_GRAD
2582   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2583                     Float32Regs:$b, Float32Regs:$a),
2584               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2585                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2586                    Float32Regs:$grady0, Float32Regs:$grady1),
2587               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2588               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2589               "\\{$grady0, $grady1\\};",
2590               []>;
2591 def TEX_2D_ARRAY_S32_S32
2592   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2593                     Int32Regs:$b, Int32Regs:$a),
2594               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2595                    Int32Regs:$y),
2596               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2597               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2598               []>;
2599 def TEX_2D_ARRAY_S32_F32
2600   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2601                     Int32Regs:$b, Int32Regs:$a),
2602               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2603                    Float32Regs:$y),
2604               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2605               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2606               []>;
2607 def TEX_2D_ARRAY_S32_F32_LEVEL
2608   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2609                     Int32Regs:$b, Int32Regs:$a),
2610               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2611                    Float32Regs:$y, Float32Regs:$lod),
2612               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2613               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2614               []>;
2615 def TEX_2D_ARRAY_S32_F32_GRAD
2616   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2617                     Int32Regs:$b, Int32Regs:$a),
2618               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2619                    Float32Regs:$y,
2620                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2621                    Float32Regs:$grady0, Float32Regs:$grady1),
2622               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2623               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2624               "\\{$grady0, $grady1\\};",
2625               []>;
2626 def TEX_2D_ARRAY_U32_S32
2627   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2628                     Int32Regs:$b, Int32Regs:$a),
2629               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2630                    Int32Regs:$y),
2631               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2632               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2633               []>;
2634 def TEX_2D_ARRAY_U32_F32
2635   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2636                     Int32Regs:$b, Int32Regs:$a),
2637               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2638                    Float32Regs:$y),
2639               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2640               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2641               []>;
2642 def TEX_2D_ARRAY_U32_F32_LEVEL
2643   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2644                     Int32Regs:$b, Int32Regs:$a),
2645               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2646                    Float32Regs:$y, Float32Regs:$lod),
2647               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2648               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2649               []>;
2650 def TEX_2D_ARRAY_U32_F32_GRAD
2651   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2652                     Int32Regs:$b, Int32Regs:$a),
2653               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2654                    Float32Regs:$y,
2655                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2656                    Float32Regs:$grady0, Float32Regs:$grady1),
2657               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2658               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2659               "\\{$grady0, $grady1\\};",
2660               []>;
2661
2662 def TEX_3D_F32_S32
2663   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2664                     Float32Regs:$b, Float32Regs:$a),
2665               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2666                    Int32Regs:$z),
2667               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2668               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2669               []>;
2670 def TEX_3D_F32_F32
2671   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2672                     Float32Regs:$b, Float32Regs:$a),
2673               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2674                    Float32Regs:$z),
2675               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2676               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2677               []>;
2678 def TEX_3D_F32_F32_LEVEL
2679   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2680                     Float32Regs:$b, Float32Regs:$a),
2681               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2682                    Float32Regs:$z, Float32Regs:$lod),
2683               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2684               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2685               []>;
2686 def TEX_3D_F32_F32_GRAD
2687   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2688                     Float32Regs:$b, Float32Regs:$a),
2689               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2690                    Float32Regs:$z,
2691                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2692                    Float32Regs:$gradx2, Float32Regs:$grady0,
2693                    Float32Regs:$grady1, Float32Regs:$grady2),
2694               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2695               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2696               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2697               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2698               []>;
2699 def TEX_3D_S32_S32
2700   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2701                     Int32Regs:$b, Int32Regs:$a),
2702               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2703                    Int32Regs:$z),
2704               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2705               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2706               []>;
2707 def TEX_3D_S32_F32
2708   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2709                     Int32Regs:$b, Int32Regs:$a),
2710               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2711                    Float32Regs:$z),
2712               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2713               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2714               []>;
2715 def TEX_3D_S32_F32_LEVEL
2716   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2717                     Int32Regs:$b, Int32Regs:$a),
2718               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2719                    Float32Regs:$z, Float32Regs:$lod),
2720               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2721               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2722               []>;
2723 def TEX_3D_S32_F32_GRAD
2724   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2725                     Int32Regs:$b, Int32Regs:$a),
2726               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2727                    Float32Regs:$z,
2728                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2729                    Float32Regs:$gradx2, Float32Regs:$grady0,
2730                    Float32Regs:$grady1, Float32Regs:$grady2),
2731               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2732               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2733               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2734               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2735               []>;
2736 def TEX_3D_U32_S32
2737   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2738                     Int32Regs:$b, Int32Regs:$a),
2739               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2740                    Int32Regs:$z),
2741               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2742               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2743               []>;
2744 def TEX_3D_U32_F32
2745   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2746                     Int32Regs:$b, Int32Regs:$a),
2747               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2748                    Float32Regs:$z),
2749               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2750               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2751               []>;
2752 def TEX_3D_U32_F32_LEVEL
2753   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2754                     Int32Regs:$b, Int32Regs:$a),
2755               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2756                    Float32Regs:$z, Float32Regs:$lod),
2757               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2758               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2759               []>;
2760 def TEX_3D_U32_F32_GRAD
2761   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2762                     Int32Regs:$b, Int32Regs:$a),
2763               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2764                    Float32Regs:$z,
2765                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2766                    Float32Regs:$gradx2, Float32Regs:$grady0,
2767                    Float32Regs:$grady1, Float32Regs:$grady2),
2768               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2769               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2770               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2771               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2772               []>;
2773
2774 def TEX_CUBE_F32_F32
2775   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2776                     Float32Regs:$b, Float32Regs:$a),
2777               (ins Int64Regs:$t, Int64Regs:$s,
2778                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2779               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2780               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2781               []>;
2782 def TEX_CUBE_F32_F32_LEVEL
2783   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2784                     Float32Regs:$b, Float32Regs:$a),
2785               (ins Int64Regs:$t, Int64Regs:$s,
2786                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2787                    Float32Regs:$lod),
2788               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2789               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2790               []>;
2791 def TEX_CUBE_S32_F32
2792   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2793                     Int32Regs:$b, Int32Regs:$a),
2794               (ins Int64Regs:$t, Int64Regs:$s,
2795                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2796               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2797               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2798               []>;
2799 def TEX_CUBE_S32_F32_LEVEL
2800   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2801                     Int32Regs:$b, Int32Regs:$a),
2802               (ins Int64Regs:$t, Int64Regs:$s,
2803                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2804                    Float32Regs:$lod),
2805               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2806               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2807               []>;
2808 def TEX_CUBE_U32_F32
2809   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2810                     Int32Regs:$b, Int32Regs:$a),
2811               (ins Int64Regs:$t, Int64Regs:$s,
2812                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2813               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2814               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2815               []>;
2816 def TEX_CUBE_U32_F32_LEVEL
2817   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2818                     Int32Regs:$b, Int32Regs:$a),
2819               (ins Int64Regs:$t, Int64Regs:$s,
2820                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2821                    Float32Regs:$lod),
2822               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2823               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2824               []>;
2825
2826 def TEX_CUBE_ARRAY_F32_F32
2827   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2828                     Float32Regs:$b, Float32Regs:$a),
2829               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2830                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2831               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2832               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2833               []>;
2834 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2835   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2836                     Float32Regs:$b, Float32Regs:$a),
2837               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2838                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2839                    Float32Regs:$lod),
2840               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2841               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2842               []>;
2843 def TEX_CUBE_ARRAY_S32_F32
2844   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2845                     Int32Regs:$b, Int32Regs:$a),
2846               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2847                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2848               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2849               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2850               []>;
2851 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2852   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2853                     Int32Regs:$b, Int32Regs:$a),
2854               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2855                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2856                    Float32Regs:$lod),
2857               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2858               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2859               []>;
2860 def TEX_CUBE_ARRAY_U32_F32
2861   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2862                     Int32Regs:$b, Int32Regs:$a),
2863               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2864                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2865               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2866               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2867               []>;
2868 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2869   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2870                     Int32Regs:$b, Int32Regs:$a),
2871               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2872                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2873                    Float32Regs:$lod),
2874               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2875               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2876               []>;
2877
2878 def TLD4_R_2D_F32_F32
2879   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2880                     Float32Regs:$v2, Float32Regs:$v3),
2881               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2882               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2883               "[$t, $s, \\{$x, $y\\}];",
2884               []>;
2885 def TLD4_G_2D_F32_F32
2886   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2887                     Float32Regs:$v2, Float32Regs:$v3),
2888               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2889               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2890               "[$t, $s, \\{$x, $y\\}];",
2891               []>;
2892 def TLD4_B_2D_F32_F32
2893   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2894                     Float32Regs:$v2, Float32Regs:$v3),
2895               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2896               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2897               "[$t, $s, \\{$x, $y\\}];",
2898               []>;
2899 def TLD4_A_2D_F32_F32
2900   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2901                     Float32Regs:$v2, Float32Regs:$v3),
2902               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2903               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2904               "[$t, $s, \\{$x, $y\\}];",
2905               []>;
2906 def TLD4_R_2D_S32_F32
2907   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2908                     Int32Regs:$v2, Int32Regs:$v3),
2909               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2910               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2911               "[$t, $s, \\{$x, $y\\}];",
2912               []>;
2913 def TLD4_G_2D_S32_F32
2914   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2915                     Int32Regs:$v2, Int32Regs:$v3),
2916               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2917               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2918               "[$t, $s, \\{$x, $y\\}];",
2919               []>;
2920 def TLD4_B_2D_S32_F32
2921   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2922                     Int32Regs:$v2, Int32Regs:$v3),
2923               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2924               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2925               "[$t, $s, \\{$x, $y\\}];",
2926               []>;
2927 def TLD4_A_2D_S32_F32
2928   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2929                     Int32Regs:$v2, Int32Regs:$v3),
2930               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2931               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2932               "[$t, $s, \\{$x, $y\\}];",
2933               []>;
2934 def TLD4_R_2D_U32_F32
2935   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2936                     Int32Regs:$v2, Int32Regs:$v3),
2937               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2938               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2939               "[$t, $s, \\{$x, $y\\}];",
2940               []>;
2941 def TLD4_G_2D_U32_F32
2942   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2943                     Int32Regs:$v2, Int32Regs:$v3),
2944               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2945               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2946               "[$t, $s, \\{$x, $y\\}];",
2947               []>;
2948 def TLD4_B_2D_U32_F32
2949   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2950                     Int32Regs:$v2, Int32Regs:$v3),
2951               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2952               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2953               "[$t, $s, \\{$x, $y\\}];",
2954               []>;
2955 def TLD4_A_2D_U32_F32
2956   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2957                     Int32Regs:$v2, Int32Regs:$v3),
2958               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2959               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2960               "[$t, $s, \\{$x, $y\\}];",
2961               []>;
2962 }
2963
2964
2965 // texmode_unified
2966 let IsTex = 1, IsTexModeUnified = 1 in {
2967 // Texture fetch instructions using handles
2968 def TEX_UNIFIED_1D_F32_S32
2969   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2970                     Float32Regs:$b, Float32Regs:$a),
2971               (ins Int64Regs:$t, Int32Regs:$x),
2972               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2973               []>;
2974 def TEX_UNIFIED_1D_F32_F32
2975   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2976                     Float32Regs:$b, Float32Regs:$a),
2977               (ins Int64Regs:$t, Float32Regs:$x),
2978               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2979               []>;
2980 def TEX_UNIFIED_1D_F32_F32_LEVEL
2981   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2982                     Float32Regs:$b, Float32Regs:$a),
2983               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2984               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2985               "[$t, \\{$x\\}], $lod;",
2986               []>;
2987 def TEX_UNIFIED_1D_F32_F32_GRAD
2988   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2989                     Float32Regs:$b, Float32Regs:$a),
2990               (ins Int64Regs:$t, Float32Regs:$x,
2991                    Float32Regs:$gradx, Float32Regs:$grady),
2992               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2993               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2994               []>;
2995 def TEX_UNIFIED_1D_S32_S32
2996   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2997                     Int32Regs:$b, Int32Regs:$a),
2998               (ins Int64Regs:$t, Int32Regs:$x),
2999               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3000               []>;
3001 def TEX_UNIFIED_1D_S32_F32
3002   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3003                     Int32Regs:$b, Int32Regs:$a),
3004               (ins Int64Regs:$t, Float32Regs:$x),
3005               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3006               []>;
3007 def TEX_UNIFIED_1D_S32_F32_LEVEL
3008   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3009                     Int32Regs:$b, Int32Regs:$a),
3010               (ins Int64Regs:$t, Float32Regs:$x,
3011                    Float32Regs:$lod),
3012               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3013               "[$t, \\{$x\\}], $lod;",
3014               []>;
3015 def TEX_UNIFIED_1D_S32_F32_GRAD
3016   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3017                     Int32Regs:$b, Int32Regs:$a),
3018               (ins Int64Regs:$t, Float32Regs:$x,
3019                    Float32Regs:$gradx, Float32Regs:$grady),
3020               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3021               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3022               []>;
3023 def TEX_UNIFIED_1D_U32_S32
3024   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3025                     Int32Regs:$b, Int32Regs:$a),
3026               (ins Int64Regs:$t, Int32Regs:$x),
3027               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3028               []>;
3029 def TEX_UNIFIED_1D_U32_F32
3030   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3031                     Int32Regs:$b, Int32Regs:$a),
3032               (ins Int64Regs:$t, Float32Regs:$x),
3033               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3034               []>;
3035 def TEX_UNIFIED_1D_U32_F32_LEVEL
3036   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3037                     Int32Regs:$b, Int32Regs:$a),
3038               (ins Int64Regs:$t, Float32Regs:$x,
3039                    Float32Regs:$lod),
3040               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3041               "[$t, \\{$x\\}], $lod;",
3042               []>;
3043 def TEX_UNIFIED_1D_U32_F32_GRAD
3044   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3045                     Int32Regs:$b, Int32Regs:$a),
3046               (ins Int64Regs:$t, Float32Regs:$x,
3047                    Float32Regs:$gradx, Float32Regs:$grady),
3048               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3049               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3050               []>;
3051
3052 def TEX_UNIFIED_1D_ARRAY_F32_S32
3053   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3054                     Float32Regs:$b, Float32Regs:$a),
3055               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3056               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3057               "[$t, \\{$l, $x\\}];",
3058               []>;
3059 def TEX_UNIFIED_1D_ARRAY_F32_F32
3060   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3061                     Float32Regs:$b, Float32Regs:$a),
3062               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3063               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3064               "[$t, \\{$l, $x\\}];",
3065               []>;
3066 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3067   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3068                     Float32Regs:$b, Float32Regs:$a),
3069               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3070                    Float32Regs:$lod),
3071               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3072               "[$t, \\{$l, $x\\}], $lod;",
3073               []>;
3074 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3075   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3076                     Float32Regs:$b, Float32Regs:$a),
3077               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3078                    Float32Regs:$gradx, Float32Regs:$grady),
3079               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3080               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3081               []>;
3082 def TEX_UNIFIED_1D_ARRAY_S32_S32
3083   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3084                     Int32Regs:$b, Int32Regs:$a),
3085               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3086               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3087               "[$t, \\{$l, $x\\}];",
3088               []>;
3089 def TEX_UNIFIED_1D_ARRAY_S32_F32
3090   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3091                     Int32Regs:$b, Int32Regs:$a),
3092               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3093               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3094               "[$t, \\{$l, $x\\}];",
3095               []>;
3096 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3098                     Int32Regs:$b, Int32Regs:$a),
3099               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3100                    Float32Regs:$lod),
3101               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3102               "[$t, \\{$l, $x\\}], $lod;",
3103               []>;
3104 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3105   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3106                     Int32Regs:$b, Int32Regs:$a),
3107               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3108                    Float32Regs:$gradx, Float32Regs:$grady),
3109               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3110               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3111               []>;
3112 def TEX_UNIFIED_1D_ARRAY_U32_S32
3113   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3114                     Int32Regs:$b, Int32Regs:$a),
3115               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3116               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3117               "[$t, \\{$l, $x\\}];",
3118               []>;
3119 def TEX_UNIFIED_1D_ARRAY_U32_F32
3120   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3121                     Int32Regs:$b, Int32Regs:$a),
3122               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3123               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3124               "[$t, \\{$l, $x\\}];",
3125               []>;
3126 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3127   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3128                     Int32Regs:$b, Int32Regs:$a),
3129               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3130                    Float32Regs:$lod),
3131               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3132               "[$t, \\{$l, $x\\}], $lod;",
3133               []>;
3134 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3135   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3136                     Int32Regs:$b, Int32Regs:$a),
3137               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3138                    Float32Regs:$gradx, Float32Regs:$grady),
3139               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3140               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3141               []>;
3142
3143 def TEX_UNIFIED_2D_F32_S32
3144   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3145                     Float32Regs:$b, Float32Regs:$a),
3146               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3147               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3148               "[$t, \\{$x, $y\\}];",
3149               []>;
3150 def TEX_UNIFIED_2D_F32_F32
3151   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3152                     Float32Regs:$b, Float32Regs:$a),
3153               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3154               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3155               "[$t, \\{$x, $y\\}];",
3156               []>;
3157 def TEX_UNIFIED_2D_F32_F32_LEVEL
3158   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159                     Float32Regs:$b, Float32Regs:$a),
3160               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3161                    Float32Regs:$lod),
3162               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3163               "[$t, \\{$x, $y\\}], $lod;",
3164               []>;
3165 def TEX_UNIFIED_2D_F32_F32_GRAD
3166   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167                     Float32Regs:$b, Float32Regs:$a),
3168               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3169                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3170                    Float32Regs:$grady0, Float32Regs:$grady1),
3171               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3172               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3173               "\\{$grady0, $grady1\\};",
3174               []>;
3175 def TEX_UNIFIED_2D_S32_S32
3176   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3177                     Int32Regs:$b, Int32Regs:$a),
3178               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3179               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3180               "[$t, \\{$x, $y\\}];",
3181               []>;
3182 def TEX_UNIFIED_2D_S32_F32
3183   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3184                     Int32Regs:$b, Int32Regs:$a),
3185               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3186               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3187               "[$t, \\{$x, $y\\}];",
3188               []>;
3189 def TEX_UNIFIED_2D_S32_F32_LEVEL
3190   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3191                     Int32Regs:$b, Int32Regs:$a),
3192               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3193                    Float32Regs:$lod),
3194               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3195               "[$t, \\{$x, $y\\}], $lod;",
3196               []>;
3197 def TEX_UNIFIED_2D_S32_F32_GRAD
3198   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199                     Int32Regs:$b, Int32Regs:$a),
3200               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3201                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3202                    Float32Regs:$grady0, Float32Regs:$grady1),
3203               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3204               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3205               "\\{$grady0, $grady1\\};",
3206               []>;
3207 def TEX_UNIFIED_2D_U32_S32
3208   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3209                     Int32Regs:$b, Int32Regs:$a),
3210               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3211               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3212               "[$t, \\{$x, $y\\}];",
3213               []>;
3214 def TEX_UNIFIED_2D_U32_F32
3215   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3216                     Int32Regs:$b, Int32Regs:$a),
3217               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3218               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3219               "[$t, \\{$x, $y\\}];",
3220               []>;
3221 def TEX_UNIFIED_2D_U32_F32_LEVEL
3222   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3223                     Int32Regs:$b, Int32Regs:$a),
3224               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3225                    Float32Regs:$lod),
3226               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3227               "[$t, \\{$x, $y\\}], $lod;",
3228               []>;
3229 def TEX_UNIFIED_2D_U32_F32_GRAD
3230   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3231                     Int32Regs:$b, Int32Regs:$a),
3232               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3233                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3234                    Float32Regs:$grady0, Float32Regs:$grady1),
3235               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3236               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3237               "\\{$grady0, $grady1\\};",
3238               []>;
3239
3240 def TEX_UNIFIED_2D_ARRAY_F32_S32
3241   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3242                     Float32Regs:$b, Float32Regs:$a),
3243               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3244                    Int32Regs:$y),
3245               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3246               "[$t, \\{$l, $x, $y, $y\\}];",
3247               []>;
3248 def TEX_UNIFIED_2D_ARRAY_F32_F32
3249   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3250                     Float32Regs:$b, Float32Regs:$a),
3251               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3252                    Float32Regs:$y),
3253               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3254               "[$t, \\{$l, $x, $y, $y\\}];",
3255               []>;
3256 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3257   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3258                     Float32Regs:$b, Float32Regs:$a),
3259               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3260                    Float32Regs:$y, Float32Regs:$lod),
3261               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3262               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3263               []>;
3264 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3265   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3266                     Float32Regs:$b, Float32Regs:$a),
3267               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3268                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3269                    Float32Regs:$grady0, Float32Regs:$grady1),
3270               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3271               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3272               "\\{$grady0, $grady1\\};",
3273               []>;
3274 def TEX_UNIFIED_2D_ARRAY_S32_S32
3275   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3276                     Int32Regs:$b, Int32Regs:$a),
3277               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3278                    Int32Regs:$y),
3279               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3280               "[$t, \\{$l, $x, $y, $y\\}];",
3281               []>;
3282 def TEX_UNIFIED_2D_ARRAY_S32_F32
3283   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3284                     Int32Regs:$b, Int32Regs:$a),
3285               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3286                    Float32Regs:$y),
3287               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3288               "[$t, \\{$l, $x, $y, $y\\}];",
3289               []>;
3290 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3291   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3292                     Int32Regs:$b, Int32Regs:$a),
3293               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3294                    Float32Regs:$y, Float32Regs:$lod),
3295               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3296               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3297               []>;
3298 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3299   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3300                     Int32Regs:$b, Int32Regs:$a),
3301               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3302                    Float32Regs:$y,
3303                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3304                    Float32Regs:$grady0, Float32Regs:$grady1),
3305               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3306               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3307               "\\{$grady0, $grady1\\};",
3308               []>;
3309 def TEX_UNIFIED_2D_ARRAY_U32_S32
3310   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3311                     Int32Regs:$b, Int32Regs:$a),
3312               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3313                    Int32Regs:$y),
3314               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3315               "[$t, \\{$l, $x, $y, $y\\}];",
3316               []>;
3317 def TEX_UNIFIED_2D_ARRAY_U32_F32
3318   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3319                     Int32Regs:$b, Int32Regs:$a),
3320               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3321                    Float32Regs:$y),
3322               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3323               "[$t, \\{$l, $x, $y, $y\\}];",
3324               []>;
3325 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3326   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3327                     Int32Regs:$b, Int32Regs:$a),
3328               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3329                    Float32Regs:$y, Float32Regs:$lod),
3330               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3331               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3332               []>;
3333 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3334   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3335                     Int32Regs:$b, Int32Regs:$a),
3336               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3337                    Float32Regs:$y,
3338                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3339                    Float32Regs:$grady0, Float32Regs:$grady1),
3340               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3341               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3342               "\\{$grady0, $grady1\\};",
3343               []>;
3344
3345 def TEX_UNIFIED_3D_F32_S32
3346   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3347                     Float32Regs:$b, Float32Regs:$a),
3348               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3349                    Int32Regs:$z),
3350               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3351               "[$t, \\{$x, $y, $z, $z\\}];",
3352               []>;
3353 def TEX_UNIFIED_3D_F32_F32
3354   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3355                     Float32Regs:$b, Float32Regs:$a),
3356               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3357                    Float32Regs:$z),
3358               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3359               "[$t, \\{$x, $y, $z, $z\\}];",
3360               []>;
3361 def TEX_UNIFIED_3D_F32_F32_LEVEL
3362   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3363                     Float32Regs:$b, Float32Regs:$a),
3364               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3365                    Float32Regs:$z, Float32Regs:$lod),
3366               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3367               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3368               []>;
3369 def TEX_UNIFIED_3D_F32_F32_GRAD
3370   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3371                     Float32Regs:$b, Float32Regs:$a),
3372               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3373                    Float32Regs:$z,
3374                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3375                    Float32Regs:$gradx2, Float32Regs:$grady0,
3376                    Float32Regs:$grady1, Float32Regs:$grady2),
3377               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3378               "[$t, \\{$x, $y, $z, $z\\}], "
3379               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3380               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3381               []>;
3382 def TEX_UNIFIED_3D_S32_S32
3383   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3384                     Int32Regs:$b, Int32Regs:$a),
3385               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3386                    Int32Regs:$z),
3387               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3388               "[$t, \\{$x, $y, $z, $z\\}];",
3389               []>;
3390 def TEX_UNIFIED_3D_S32_F32
3391   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3392                     Int32Regs:$b, Int32Regs:$a),
3393               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3394                    Float32Regs:$z),
3395               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3396               "[$t, \\{$x, $y, $z, $z\\}];",
3397               []>;
3398 def TEX_UNIFIED_3D_S32_F32_LEVEL
3399   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3400                     Int32Regs:$b, Int32Regs:$a),
3401               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3402                    Float32Regs:$z, Float32Regs:$lod),
3403               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3404               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3405               []>;
3406 def TEX_UNIFIED_3D_S32_F32_GRAD
3407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3408                     Int32Regs:$b, Int32Regs:$a),
3409               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3410                    Float32Regs:$z,
3411                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3412                    Float32Regs:$gradx2, Float32Regs:$grady0,
3413                    Float32Regs:$grady1, Float32Regs:$grady2),
3414               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3415               "[$t, \\{$x, $y, $z, $z\\}], "
3416               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3417               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3418               []>;
3419 def TEX_UNIFIED_3D_U32_S32
3420   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3421                     Int32Regs:$b, Int32Regs:$a),
3422               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3423                    Int32Regs:$z),
3424               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3425               "[$t, \\{$x, $y, $z, $z\\}];",
3426               []>;
3427 def TEX_UNIFIED_3D_U32_F32
3428   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3429                     Int32Regs:$b, Int32Regs:$a),
3430               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3431                    Float32Regs:$z),
3432               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3433               "[$t, \\{$x, $y, $z, $z\\}];",
3434               []>;
3435 def TEX_UNIFIED_3D_U32_F32_LEVEL
3436   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3437                     Int32Regs:$b, Int32Regs:$a),
3438               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3439                    Float32Regs:$z, Float32Regs:$lod),
3440               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3441               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3442               []>;
3443 def TEX_UNIFIED_3D_U32_F32_GRAD
3444   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3445                     Int32Regs:$b, Int32Regs:$a),
3446               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3447                    Float32Regs:$z,
3448                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3449                    Float32Regs:$gradx2, Float32Regs:$grady0,
3450                    Float32Regs:$grady1, Float32Regs:$grady2),
3451               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3452               "[$t, \\{$x, $y, $z, $z\\}], "
3453               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3454               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3455               []>;
3456
3457 def TEX_UNIFIED_CUBE_F32_F32
3458   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3459                     Float32Regs:$b, Float32Regs:$a),
3460               (ins Int64Regs:$t,
3461                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3462               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3463               "[$t, \\{$x, $y, $z, $z\\}];",
3464               []>;
3465 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3466   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3467                     Float32Regs:$b, Float32Regs:$a),
3468               (ins Int64Regs:$t,
3469                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3470                    Float32Regs:$lod),
3471               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3472               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3473               []>;
3474 def TEX_UNIFIED_CUBE_S32_F32
3475   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3476                     Int32Regs:$b, Int32Regs:$a),
3477               (ins Int64Regs:$t,
3478                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3479               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3480               "[$t, \\{$x, $y, $z, $z\\}];",
3481               []>;
3482 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3483   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3484                     Int32Regs:$b, Int32Regs:$a),
3485               (ins Int64Regs:$t,
3486                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3487                    Float32Regs:$lod),
3488               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3489               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3490               []>;
3491 def TEX_UNIFIED_CUBE_U32_F32
3492   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3493                     Int32Regs:$b, Int32Regs:$a),
3494               (ins Int64Regs:$t,
3495                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3496               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3497               "[$t, \\{$x, $y, $z, $z\\}];",
3498               []>;
3499 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3500   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3501                     Int32Regs:$b, Int32Regs:$a),
3502               (ins Int64Regs:$t,
3503                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3504                    Float32Regs:$lod),
3505               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3506               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3507               []>;
3508
3509 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3510   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3511                     Float32Regs:$b, Float32Regs:$a),
3512               (ins Int64Regs:$t, Int32Regs:$l,
3513                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3514               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3515               "[$t, \\{$l, $x, $y, $z\\}];",
3516               []>;
3517 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3518   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3519                     Float32Regs:$b, Float32Regs:$a),
3520               (ins Int64Regs:$t, Int32Regs:$l,
3521                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3522                    Float32Regs:$lod),
3523               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3524               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3525               []>;
3526 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3527   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3528                     Int32Regs:$b, Int32Regs:$a),
3529               (ins Int64Regs:$t, Int32Regs:$l,
3530                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3531               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3532               "[$t, \\{$l, $x, $y, $z\\}];",
3533               []>;
3534 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3535   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3536                     Int32Regs:$b, Int32Regs:$a),
3537               (ins Int64Regs:$t, Int32Regs:$l,
3538                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3539                    Float32Regs:$lod),
3540               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3541               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3542               []>;
3543 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3544   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3545                     Int32Regs:$b, Int32Regs:$a),
3546               (ins Int64Regs:$t, Int32Regs:$l,
3547                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3548               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3549               "[$t, \\{$l, $x, $y, $z\\}];",
3550               []>;
3551 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3552   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3553                     Int32Regs:$b, Int32Regs:$a),
3554               (ins Int64Regs:$t, Int32Regs:$l,
3555                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3556                    Float32Regs:$lod),
3557               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3558               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3559               []>;
3560
3561 def TLD4_UNIFIED_R_2D_F32_F32
3562   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3563                     Float32Regs:$v2, Float32Regs:$v3),
3564               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3565               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3566               "[$t, \\{$x, $y\\}];",
3567               []>;
3568 def TLD4_UNIFIED_G_2D_F32_F32
3569   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3570                     Float32Regs:$v2, Float32Regs:$v3),
3571               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3572               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3573               "[$t, \\{$x, $y\\}];",
3574               []>;
3575 def TLD4_UNIFIED_B_2D_F32_F32
3576   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3577                     Float32Regs:$v2, Float32Regs:$v3),
3578               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3579               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3580               "[$t, \\{$x, $y\\}];",
3581               []>;
3582 def TLD4_UNIFIED_A_2D_F32_F32
3583   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3584                     Float32Regs:$v2, Float32Regs:$v3),
3585               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3586               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3587               "[$t, \\{$x, $y\\}];",
3588               []>;
3589 def TLD4_UNIFIED_R_2D_S32_F32
3590   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3591                     Int32Regs:$v2, Int32Regs:$v3),
3592               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3593               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3594               "[$t, \\{$x, $y\\}];",
3595               []>;
3596 def TLD4_UNIFIED_G_2D_S32_F32
3597   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3598                     Int32Regs:$v2, Int32Regs:$v3),
3599               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3600               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3601               "[$t, \\{$x, $y\\}];",
3602               []>;
3603 def TLD4_UNIFIED_B_2D_S32_F32
3604   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3605                     Int32Regs:$v2, Int32Regs:$v3),
3606               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3607               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3608               "[$t, \\{$x, $y\\}];",
3609               []>;
3610 def TLD4_UNIFIED_A_2D_S32_F32
3611   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3612                     Int32Regs:$v2, Int32Regs:$v3),
3613               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3614               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3615               "[$t, \\{$x, $y\\}];",
3616               []>;
3617 def TLD4_UNIFIED_R_2D_U32_F32
3618   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3619                     Int32Regs:$v2, Int32Regs:$v3),
3620               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3621               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3622               "[$t, \\{$x, $y\\}];",
3623               []>;
3624 def TLD4_UNIFIED_G_2D_U32_F32
3625   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3626                     Int32Regs:$v2, Int32Regs:$v3),
3627               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3628               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3629               "[$t, \\{$x, $y\\}];",
3630               []>;
3631 def TLD4_UNIFIED_B_2D_U32_F32
3632   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3633                     Int32Regs:$v2, Int32Regs:$v3),
3634               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3635               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3636               "[$t, \\{$x, $y\\}];",
3637               []>;
3638 def TLD4_UNIFIED_A_2D_U32_F32
3639   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3640                     Int32Regs:$v2, Int32Regs:$v3),
3641               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3642               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3643               "[$t, \\{$x, $y\\}];",
3644               []>;
3645 }
3646
3647
3648
3649 //=== Surface load instructions
3650 // .clamp variant
3651 let IsSuld = 1 in {
3652 def SULD_1D_I8_CLAMP
3653   : NVPTXInst<(outs Int16Regs:$r),
3654               (ins Int64Regs:$s, Int32Regs:$x),
3655               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3656               []>;
3657 def SULD_1D_I16_CLAMP
3658   : NVPTXInst<(outs Int16Regs:$r),
3659               (ins Int64Regs:$s, Int32Regs:$x),
3660               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3661               []>;
3662 def SULD_1D_I32_CLAMP
3663   : NVPTXInst<(outs Int32Regs:$r),
3664               (ins Int64Regs:$s, Int32Regs:$x),
3665               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3666               []>;
3667 def SULD_1D_I64_CLAMP
3668   : NVPTXInst<(outs Int64Regs:$r),
3669               (ins Int64Regs:$s, Int32Regs:$x),
3670               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3671               []>;
3672
3673 def SULD_1D_ARRAY_I8_CLAMP
3674   : NVPTXInst<(outs Int16Regs:$r),
3675               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3676               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3677               []>;
3678 def SULD_1D_ARRAY_I16_CLAMP
3679   : NVPTXInst<(outs Int16Regs:$r),
3680               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3681               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3682               []>;
3683 def SULD_1D_ARRAY_I32_CLAMP
3684   : NVPTXInst<(outs Int32Regs:$r),
3685               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3686               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3687               []>;
3688 def SULD_1D_ARRAY_I64_CLAMP
3689   : NVPTXInst<(outs Int64Regs:$r),
3690               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3692               []>;
3693
3694 def SULD_2D_I8_CLAMP
3695   : NVPTXInst<(outs Int16Regs:$r),
3696               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3697               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3698               []>;
3699 def SULD_2D_I16_CLAMP
3700   : NVPTXInst<(outs Int16Regs:$r),
3701               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3702               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3703               []>;
3704 def SULD_2D_I32_CLAMP
3705   : NVPTXInst<(outs Int32Regs:$r),
3706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3707               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3708               []>;
3709 def SULD_2D_I64_CLAMP
3710   : NVPTXInst<(outs Int64Regs:$r),
3711               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3712               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3713               []>;
3714
3715 def SULD_2D_ARRAY_I8_CLAMP
3716   : NVPTXInst<(outs Int16Regs:$r),
3717               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3718               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3719               []>;
3720 def SULD_2D_ARRAY_I16_CLAMP
3721   : NVPTXInst<(outs Int16Regs:$r),
3722               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3723               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3724               []>;
3725 def SULD_2D_ARRAY_I32_CLAMP
3726   : NVPTXInst<(outs Int32Regs:$r),
3727               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3728               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3729               []>;
3730 def SULD_2D_ARRAY_I64_CLAMP
3731   : NVPTXInst<(outs Int64Regs:$r),
3732               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3733               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3734               []>;
3735
3736 def SULD_3D_I8_CLAMP
3737   : NVPTXInst<(outs Int16Regs:$r),
3738               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3739               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3740               []>;
3741 def SULD_3D_I16_CLAMP
3742   : NVPTXInst<(outs Int16Regs:$r),
3743               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3744               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3745               []>;
3746 def SULD_3D_I32_CLAMP
3747   : NVPTXInst<(outs Int32Regs:$r),
3748               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3749               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3750               []>;
3751 def SULD_3D_I64_CLAMP
3752   : NVPTXInst<(outs Int64Regs:$r),
3753               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3754               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3755               []>;
3756 }
3757
3758 let IsSuld = 2 in {
3759 def SULD_1D_V2I8_CLAMP
3760   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3761               (ins Int64Regs:$s, Int32Regs:$x),
3762               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3763               []>;
3764 def SULD_1D_V2I16_CLAMP
3765   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3766               (ins Int64Regs:$s, Int32Regs:$x),
3767               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3768               []>;
3769 def SULD_1D_V2I32_CLAMP
3770   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3771               (ins Int64Regs:$s, Int32Regs:$x),
3772               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3773               []>;
3774 def SULD_1D_V2I64_CLAMP
3775   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3776               (ins Int64Regs:$s, Int32Regs:$x),
3777               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3778               []>;
3779
3780 def SULD_1D_ARRAY_V2I8_CLAMP
3781   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3782               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3783               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3784               []>;
3785 def SULD_1D_ARRAY_V2I16_CLAMP
3786   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3787               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3788               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3789               []>;
3790 def SULD_1D_ARRAY_V2I32_CLAMP
3791   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3792               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3793               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3794               []>;
3795 def SULD_1D_ARRAY_V2I64_CLAMP
3796   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3797               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3798               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3799               []>;
3800
3801 def SULD_2D_V2I8_CLAMP
3802   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3803               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3804               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3805               []>;
3806 def SULD_2D_V2I16_CLAMP
3807   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3808               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3809               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3810               []>;
3811 def SULD_2D_V2I32_CLAMP
3812   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3813               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3814               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3815               []>;
3816 def SULD_2D_V2I64_CLAMP
3817   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3818               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3819               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3820               []>;
3821
3822 def SULD_2D_ARRAY_V2I8_CLAMP
3823   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3824               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3825               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3826               "[$s, \\{$l, $x, $y, $y\\}];",
3827               []>;
3828 def SULD_2D_ARRAY_V2I16_CLAMP
3829   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3830               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3831               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3832               "[$s, \\{$l, $x, $y, $y\\}];",
3833               []>;
3834 def SULD_2D_ARRAY_V2I32_CLAMP
3835   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3836               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3837               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3838               "[$s, \\{$l, $x, $y, $y\\}];",
3839               []>;
3840 def SULD_2D_ARRAY_V2I64_CLAMP
3841   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3842               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3843               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3844               "[$s, \\{$l, $x, $y, $y\\}];",
3845               []>;
3846
3847 def SULD_3D_V2I8_CLAMP
3848   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3849               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3850               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3851               []>;
3852 def SULD_3D_V2I16_CLAMP
3853   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3854               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3855               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3856               []>;
3857 def SULD_3D_V2I32_CLAMP
3858   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3859               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3860               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3861               []>;
3862 def SULD_3D_V2I64_CLAMP
3863   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3864               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3865               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3866               []>;
3867 }
3868
3869 let IsSuld = 3 in {
3870 def SULD_1D_V4I8_CLAMP
3871   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3872               (ins Int64Regs:$s, Int32Regs:$x),
3873               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3874               []>;
3875 def SULD_1D_V4I16_CLAMP
3876   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3877               (ins Int64Regs:$s, Int32Regs:$x),
3878               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3879               []>;
3880 def SULD_1D_V4I32_CLAMP
3881   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3882               (ins Int64Regs:$s, Int32Regs:$x),
3883               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3884               []>;
3885
3886 def SULD_1D_ARRAY_V4I8_CLAMP
3887   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3888               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3889               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3890               "[$s, \\{$l, $x\\}];",
3891               []>;
3892 def SULD_1D_ARRAY_V4I16_CLAMP
3893   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3894               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3895               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3896               "[$s, \\{$l, $x\\}];",
3897               []>;
3898 def SULD_1D_ARRAY_V4I32_CLAMP
3899   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3900               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3901               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3902               "[$s, \\{$l, $x\\}];",
3903               []>;
3904
3905 def SULD_2D_V4I8_CLAMP
3906   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3907               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3908               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3909               []>;
3910 def SULD_2D_V4I16_CLAMP
3911   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3912               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3913               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3914               []>;
3915 def SULD_2D_V4I32_CLAMP
3916   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3918               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3919               []>;
3920
3921 def SULD_2D_ARRAY_V4I8_CLAMP
3922   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3923               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3924               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3925               "[$s, \\{$l, $x, $y, $y\\}];",
3926               []>;
3927 def SULD_2D_ARRAY_V4I16_CLAMP
3928   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3929               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3930               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3931               "[$s, \\{$l, $x, $y, $y\\}];",
3932               []>;
3933 def SULD_2D_ARRAY_V4I32_CLAMP
3934   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3935               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3936               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3937               "[$s, \\{$l, $x, $y, $y\\}];",
3938               []>;
3939
3940
3941 def SULD_3D_V4I8_CLAMP
3942   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3943               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3944               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3945               "[$s, \\{$x, $y, $z, $z\\}];",
3946               []>;
3947 def SULD_3D_V4I16_CLAMP
3948   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3949               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3950               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3951               "[$s, \\{$x, $y, $z, $z\\}];",
3952               []>;
3953 def SULD_3D_V4I32_CLAMP
3954   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3955               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3956               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3957               "[$s, \\{$x, $y, $z, $z\\}];",
3958               []>;
3959 }
3960
3961
3962 // .trap variant
3963 let IsSuld = 1 in {
3964 def SULD_1D_I8_TRAP
3965   : NVPTXInst<(outs Int16Regs:$r),
3966               (ins Int64Regs:$s, Int32Regs:$x),
3967               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3968               []>;
3969 def SULD_1D_I16_TRAP
3970   : NVPTXInst<(outs Int16Regs:$r),
3971               (ins Int64Regs:$s, Int32Regs:$x),
3972               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3973               []>;
3974 def SULD_1D_I32_TRAP
3975   : NVPTXInst<(outs Int32Regs:$r),
3976               (ins Int64Regs:$s, Int32Regs:$x),
3977               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3978               []>;
3979 def SULD_1D_I64_TRAP
3980   : NVPTXInst<(outs Int64Regs:$r),
3981               (ins Int64Regs:$s, Int32Regs:$x),
3982               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3983               []>;
3984
3985 def SULD_1D_ARRAY_I8_TRAP
3986   : NVPTXInst<(outs Int16Regs:$r),
3987               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3988               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3989               []>;
3990 def SULD_1D_ARRAY_I16_TRAP
3991   : NVPTXInst<(outs Int16Regs:$r),
3992               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3993               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3994               []>;
3995 def SULD_1D_ARRAY_I32_TRAP
3996   : NVPTXInst<(outs Int32Regs:$r),
3997               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3998               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3999               []>;
4000 def SULD_1D_ARRAY_I64_TRAP
4001   : NVPTXInst<(outs Int64Regs:$r),
4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4004               []>;
4005
4006 def SULD_2D_I8_TRAP
4007   : NVPTXInst<(outs Int16Regs:$r),
4008               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4009               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4010               []>;
4011 def SULD_2D_I16_TRAP
4012   : NVPTXInst<(outs Int16Regs:$r),
4013               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4014               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4015               []>;
4016 def SULD_2D_I32_TRAP
4017   : NVPTXInst<(outs Int32Regs:$r),
4018               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4019               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4020               []>;
4021 def SULD_2D_I64_TRAP
4022   : NVPTXInst<(outs Int64Regs:$r),
4023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4024               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4025               []>;
4026
4027 def SULD_2D_ARRAY_I8_TRAP
4028   : NVPTXInst<(outs Int16Regs:$r),
4029               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4030               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4031               []>;
4032 def SULD_2D_ARRAY_I16_TRAP
4033   : NVPTXInst<(outs Int16Regs:$r),
4034               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4035               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4036               []>;
4037 def SULD_2D_ARRAY_I32_TRAP
4038   : NVPTXInst<(outs Int32Regs:$r),
4039               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4040               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4041               []>;
4042 def SULD_2D_ARRAY_I64_TRAP
4043   : NVPTXInst<(outs Int64Regs:$r),
4044               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4045               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4046               []>;
4047
4048 def SULD_3D_I8_TRAP
4049   : NVPTXInst<(outs Int16Regs:$r),
4050               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4051               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4052               []>;
4053 def SULD_3D_I16_TRAP
4054   : NVPTXInst<(outs Int16Regs:$r),
4055               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4056               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4057               []>;
4058 def SULD_3D_I32_TRAP
4059   : NVPTXInst<(outs Int32Regs:$r),
4060               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4061               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4062               []>;
4063 def SULD_3D_I64_TRAP
4064   : NVPTXInst<(outs Int64Regs:$r),
4065               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4066               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4067               []>;
4068 }
4069
4070 let IsSuld = 2 in {
4071 def SULD_1D_V2I8_TRAP
4072   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4073               (ins Int64Regs:$s, Int32Regs:$x),
4074               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4075               []>;
4076 def SULD_1D_V2I16_TRAP
4077   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4078               (ins Int64Regs:$s, Int32Regs:$x),
4079               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4080               []>;
4081 def SULD_1D_V2I32_TRAP
4082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4083               (ins Int64Regs:$s, Int32Regs:$x),
4084               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4085               []>;
4086 def SULD_1D_V2I64_TRAP
4087   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4088               (ins Int64Regs:$s, Int32Regs:$x),
4089               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4090               []>;
4091
4092 def SULD_1D_ARRAY_V2I8_TRAP
4093   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4094               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4095               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4096               []>;
4097 def SULD_1D_ARRAY_V2I16_TRAP
4098   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4099               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4100               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4101               []>;
4102 def SULD_1D_ARRAY_V2I32_TRAP
4103   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4104               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4105               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4106               []>;
4107 def SULD_1D_ARRAY_V2I64_TRAP
4108   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4111               []>;
4112
4113 def SULD_2D_V2I8_TRAP
4114   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4115               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4116               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4117               []>;
4118 def SULD_2D_V2I16_TRAP
4119   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4120               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4121               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4122               []>;
4123 def SULD_2D_V2I32_TRAP
4124   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4125               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4126               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4127               []>;
4128 def SULD_2D_V2I64_TRAP
4129   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4130               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4132               []>;
4133
4134 def SULD_2D_ARRAY_V2I8_TRAP
4135   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4136               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4137               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4138               "[$s, \\{$l, $x, $y, $y\\}];",
4139               []>;
4140 def SULD_2D_ARRAY_V2I16_TRAP
4141   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4142               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4143               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4144               "[$s, \\{$l, $x, $y, $y\\}];",
4145               []>;
4146 def SULD_2D_ARRAY_V2I32_TRAP
4147   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4148               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4149               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4150               "[$s, \\{$l, $x, $y, $y\\}];",
4151               []>;
4152 def SULD_2D_ARRAY_V2I64_TRAP
4153   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4154               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4155               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4156               "[$s, \\{$l, $x, $y, $y\\}];",
4157               []>;
4158
4159 def SULD_3D_V2I8_TRAP
4160   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4161               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4162               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4163               []>;
4164 def SULD_3D_V2I16_TRAP
4165   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4166               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4167               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4168               []>;
4169 def SULD_3D_V2I32_TRAP
4170   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4171               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4172               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4173               []>;
4174 def SULD_3D_V2I64_TRAP
4175   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4176               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4177               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178               []>;
4179 }
4180
4181 let IsSuld = 3 in {
4182 def SULD_1D_V4I8_TRAP
4183   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4184               (ins Int64Regs:$s, Int32Regs:$x),
4185               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4186               []>;
4187 def SULD_1D_V4I16_TRAP
4188   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4189               (ins Int64Regs:$s, Int32Regs:$x),
4190               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4191               []>;
4192 def SULD_1D_V4I32_TRAP
4193   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4194               (ins Int64Regs:$s, Int32Regs:$x),
4195               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4196               []>;
4197
4198 def SULD_1D_ARRAY_V4I8_TRAP
4199   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4200               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4201               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4202               "[$s, \\{$l, $x\\}];",
4203               []>;
4204 def SULD_1D_ARRAY_V4I16_TRAP
4205   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4206               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4207               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4208               "[$s, \\{$l, $x\\}];",
4209               []>;
4210 def SULD_1D_ARRAY_V4I32_TRAP
4211   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4212               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4213               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4214               "[$s, \\{$l, $x\\}];",
4215               []>;
4216
4217 def SULD_2D_V4I8_TRAP
4218   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4219               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4220               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4221               []>;
4222 def SULD_2D_V4I16_TRAP
4223   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4224               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4225               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4226               []>;
4227 def SULD_2D_V4I32_TRAP
4228   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4229               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4230               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4231               []>;
4232
4233 def SULD_2D_ARRAY_V4I8_TRAP
4234   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4235               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4236               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4237               "[$s, \\{$l, $x, $y, $y\\}];",
4238               []>;
4239 def SULD_2D_ARRAY_V4I16_TRAP
4240   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4241               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4242               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4243               "[$s, \\{$l, $x, $y, $y\\}];",
4244               []>;
4245 def SULD_2D_ARRAY_V4I32_TRAP
4246   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4247               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4248               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4249               "[$s, \\{$l, $x, $y, $y\\}];",
4250               []>;
4251
4252
4253 def SULD_3D_V4I8_TRAP
4254   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4255               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4256               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4257               "[$s, \\{$x, $y, $z, $z\\}];",
4258               []>;
4259 def SULD_3D_V4I16_TRAP
4260   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4261               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4262               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4263               "[$s, \\{$x, $y, $z, $z\\}];",
4264               []>;
4265 def SULD_3D_V4I32_TRAP
4266   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4267               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4268               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4269               "[$s, \\{$x, $y, $z, $z\\}];",
4270               []>;
4271 }
4272
4273 // .zero variant
4274 let IsSuld = 1 in {
4275 def SULD_1D_I8_ZERO
4276   : NVPTXInst<(outs Int16Regs:$r),
4277               (ins Int64Regs:$s, Int32Regs:$x),
4278               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4279               []>;
4280 def SULD_1D_I16_ZERO
4281   : NVPTXInst<(outs Int16Regs:$r),
4282               (ins Int64Regs:$s, Int32Regs:$x),
4283               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4284               []>;
4285 def SULD_1D_I32_ZERO
4286   : NVPTXInst<(outs Int32Regs:$r),
4287               (ins Int64Regs:$s, Int32Regs:$x),
4288               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4289               []>;
4290 def SULD_1D_I64_ZERO
4291   : NVPTXInst<(outs Int64Regs:$r),
4292               (ins Int64Regs:$s, Int32Regs:$x),
4293               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4294               []>;
4295
4296 def SULD_1D_ARRAY_I8_ZERO
4297   : NVPTXInst<(outs Int16Regs:$r),
4298               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4299               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4300               []>;
4301 def SULD_1D_ARRAY_I16_ZERO
4302   : NVPTXInst<(outs Int16Regs:$r),
4303               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4304               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4305               []>;
4306 def SULD_1D_ARRAY_I32_ZERO
4307   : NVPTXInst<(outs Int32Regs:$r),
4308               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4309               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4310               []>;
4311 def SULD_1D_ARRAY_I64_ZERO
4312   : NVPTXInst<(outs Int64Regs:$r),
4313               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4315               []>;
4316
4317 def SULD_2D_I8_ZERO
4318   : NVPTXInst<(outs Int16Regs:$r),
4319               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4320               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4321               []>;
4322 def SULD_2D_I16_ZERO
4323   : NVPTXInst<(outs Int16Regs:$r),
4324               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4325               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4326               []>;
4327 def SULD_2D_I32_ZERO
4328   : NVPTXInst<(outs Int32Regs:$r),
4329               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4330               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4331               []>;
4332 def SULD_2D_I64_ZERO
4333   : NVPTXInst<(outs Int64Regs:$r),
4334               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4335               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4336               []>;
4337
4338 def SULD_2D_ARRAY_I8_ZERO
4339   : NVPTXInst<(outs Int16Regs:$r),
4340               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4341               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4342               []>;
4343 def SULD_2D_ARRAY_I16_ZERO
4344   : NVPTXInst<(outs Int16Regs:$r),
4345               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4346               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4347               []>;
4348 def SULD_2D_ARRAY_I32_ZERO
4349   : NVPTXInst<(outs Int32Regs:$r),
4350               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4351               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4352               []>;
4353 def SULD_2D_ARRAY_I64_ZERO
4354   : NVPTXInst<(outs Int64Regs:$r),
4355               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4356               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4357               []>;
4358
4359 def SULD_3D_I8_ZERO
4360   : NVPTXInst<(outs Int16Regs:$r),
4361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4362               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4363               []>;
4364 def SULD_3D_I16_ZERO
4365   : NVPTXInst<(outs Int16Regs:$r),
4366               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4367               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4368               []>;
4369 def SULD_3D_I32_ZERO
4370   : NVPTXInst<(outs Int32Regs:$r),
4371               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4372               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4373               []>;
4374 def SULD_3D_I64_ZERO
4375   : NVPTXInst<(outs Int64Regs:$r),
4376               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4377               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4378               []>;
4379 }
4380
4381 let IsSuld = 2 in {
4382 def SULD_1D_V2I8_ZERO
4383   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4384               (ins Int64Regs:$s, Int32Regs:$x),
4385               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4386               []>;
4387 def SULD_1D_V2I16_ZERO
4388   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4389               (ins Int64Regs:$s, Int32Regs:$x),
4390               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4391               []>;
4392 def SULD_1D_V2I32_ZERO
4393   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4394               (ins Int64Regs:$s, Int32Regs:$x),
4395               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4396               []>;
4397 def SULD_1D_V2I64_ZERO
4398   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4399               (ins Int64Regs:$s, Int32Regs:$x),
4400               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4401               []>;
4402
4403 def SULD_1D_ARRAY_V2I8_ZERO
4404   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4405               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4406               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4407               []>;
4408 def SULD_1D_ARRAY_V2I16_ZERO
4409   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4410               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4411               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4412               []>;
4413 def SULD_1D_ARRAY_V2I32_ZERO
4414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4415               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4416               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4417               []>;
4418 def SULD_1D_ARRAY_V2I64_ZERO
4419   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4420               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4421               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4422               []>;
4423
4424 def SULD_2D_V2I8_ZERO
4425   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4426               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4427               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4428               []>;
4429 def SULD_2D_V2I16_ZERO
4430   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4431               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4432               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4433               []>;
4434 def SULD_2D_V2I32_ZERO
4435   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4436               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4437               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4438               []>;
4439 def SULD_2D_V2I64_ZERO
4440   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4441               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4442               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4443               []>;
4444
4445 def SULD_2D_ARRAY_V2I8_ZERO
4446   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4447               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4448               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4449               "[$s, \\{$l, $x, $y, $y\\}];",
4450               []>;
4451 def SULD_2D_ARRAY_V2I16_ZERO
4452   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4453               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4454               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4455               "[$s, \\{$l, $x, $y, $y\\}];",
4456               []>;
4457 def SULD_2D_ARRAY_V2I32_ZERO
4458   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4459               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4460               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4461               "[$s, \\{$l, $x, $y, $y\\}];",
4462               []>;
4463 def SULD_2D_ARRAY_V2I64_ZERO
4464   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4465               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4466               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4467               "[$s, \\{$l, $x, $y, $y\\}];",
4468               []>;
4469
4470 def SULD_3D_V2I8_ZERO
4471   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4472               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4473               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4474               []>;
4475 def SULD_3D_V2I16_ZERO
4476   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4477               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4478               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4479               []>;
4480 def SULD_3D_V2I32_ZERO
4481   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4482               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4483               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4484               []>;
4485 def SULD_3D_V2I64_ZERO
4486   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4487               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4488               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4489               []>;
4490 }
4491
4492 let IsSuld = 3 in {
4493 def SULD_1D_V4I8_ZERO
4494   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4495               (ins Int64Regs:$s, Int32Regs:$x),
4496               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4497               []>;
4498 def SULD_1D_V4I16_ZERO
4499   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4500               (ins Int64Regs:$s, Int32Regs:$x),
4501               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4502               []>;
4503 def SULD_1D_V4I32_ZERO
4504   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4505               (ins Int64Regs:$s, Int32Regs:$x),
4506               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4507               []>;
4508
4509 def SULD_1D_ARRAY_V4I8_ZERO
4510   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4511               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4512               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4513               "[$s, \\{$l, $x\\}];",
4514               []>;
4515 def SULD_1D_ARRAY_V4I16_ZERO
4516   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4517               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4518               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4519               "[$s, \\{$l, $x\\}];",
4520               []>;
4521 def SULD_1D_ARRAY_V4I32_ZERO
4522   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4523               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4524               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4525               "[$s, \\{$l, $x\\}];",
4526               []>;
4527
4528 def SULD_2D_V4I8_ZERO
4529   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4531               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4532               []>;
4533 def SULD_2D_V4I16_ZERO
4534   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4535               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4536               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4537               []>;
4538 def SULD_2D_V4I32_ZERO
4539   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4540               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4541               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4542               []>;
4543
4544 def SULD_2D_ARRAY_V4I8_ZERO
4545   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4546               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4547               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4548               "[$s, \\{$l, $x, $y, $y\\}];",
4549               []>;
4550 def SULD_2D_ARRAY_V4I16_ZERO
4551   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4552               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4553               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4554               "[$s, \\{$l, $x, $y, $y\\}];",
4555               []>;
4556 def SULD_2D_ARRAY_V4I32_ZERO
4557   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4558               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4559               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4560               "[$s, \\{$l, $x, $y, $y\\}];",
4561               []>;
4562
4563
4564 def SULD_3D_V4I8_ZERO
4565   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4566               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4567               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4568               "[$s, \\{$x, $y, $z, $z\\}];",
4569               []>;
4570 def SULD_3D_V4I16_ZERO
4571   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4572               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4573               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4574               "[$s, \\{$x, $y, $z, $z\\}];",
4575               []>;
4576 def SULD_3D_V4I32_ZERO
4577   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4578               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4579               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4580               "[$s, \\{$x, $y, $z, $z\\}];",
4581               []>;
4582 }
4583
4584 //-----------------------------------
4585 // Texture Query Intrinsics
4586 //-----------------------------------
4587
4588 let IsSurfTexQuery = 1 in {
4589 def TXQ_CHANNEL_ORDER
4590   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4591               "txq.channel_order.b32 \t$d, [$a];",
4592               []>;
4593 def TXQ_CHANNEL_DATA_TYPE
4594   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4595               "txq.channel_data_type.b32 \t$d, [$a];",
4596               []>;
4597 def TXQ_WIDTH
4598   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4599               "txq.width.b32 \t$d, [$a];",
4600               []>;
4601 def TXQ_HEIGHT
4602   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4603               "txq.height.b32 \t$d, [$a];",
4604               []>;
4605 def TXQ_DEPTH
4606   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4607               "txq.depth.b32 \t$d, [$a];",
4608               []>;
4609 def TXQ_ARRAY_SIZE
4610   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4611               "txq.array_size.b32 \t$d, [$a];",
4612               []>;
4613 def TXQ_NUM_SAMPLES
4614   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4615               "txq.num_samples.b32 \t$d, [$a];",
4616               []>;
4617 def TXQ_NUM_MIPMAP_LEVELS
4618   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4619               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4620               []>;
4621 }
4622
4623 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4624           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4625 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4626           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4627 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4628           (TXQ_WIDTH Int64Regs:$a)>;
4629 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4630           (TXQ_HEIGHT Int64Regs:$a)>;
4631 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4632           (TXQ_DEPTH Int64Regs:$a)>;
4633 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4634           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4635 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4636           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4637 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4638           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4639
4640
4641 //-----------------------------------
4642 // Surface Query Intrinsics
4643 //-----------------------------------
4644
4645 let IsSurfTexQuery = 1 in {
4646 def SUQ_CHANNEL_ORDER
4647   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4648               "suq.channel_order.b32 \t$d, [$a];",
4649               []>;
4650 def SUQ_CHANNEL_DATA_TYPE
4651   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4652               "suq.channel_data_type.b32 \t$d, [$a];",
4653               []>;
4654 def SUQ_WIDTH
4655   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4656               "suq.width.b32 \t$d, [$a];",
4657               []>;
4658 def SUQ_HEIGHT
4659   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4660               "suq.height.b32 \t$d, [$a];",
4661               []>;
4662 def SUQ_DEPTH
4663   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4664               "suq.depth.b32 \t$d, [$a];",
4665               []>;
4666 def SUQ_ARRAY_SIZE
4667   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4668               "suq.array_size.b32 \t$d, [$a];",
4669               []>;
4670 }
4671
4672 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4673           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4674 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4675           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4676 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4677           (SUQ_WIDTH Int64Regs:$a)>;
4678 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4679           (SUQ_HEIGHT Int64Regs:$a)>;
4680 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4681           (SUQ_DEPTH Int64Regs:$a)>;
4682 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4683           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4684
4685
4686 //===- Handle Query -------------------------------------------------------===//
4687
4688 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4689 def ISTYPEP_SAMPLER
4690   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4691               "istypep.samplerref \t$d, $a;",
4692               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4693 def ISTYPEP_SURFACE
4694   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4695               "istypep.surfref \t$d, $a;",
4696               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4697 def ISTYPEP_TEXTURE
4698   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4699               "istypep.texref \t$d, $a;",
4700               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4701
4702 //===- Surface Stores -----------------------------------------------------===//
4703
4704 let IsSust = 1 in {
4705 // Unformatted
4706 // .clamp variant
4707 def SUST_B_1D_B8_CLAMP
4708   : NVPTXInst<(outs),
4709               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4710               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4711               []>;
4712 def SUST_B_1D_B16_CLAMP
4713   : NVPTXInst<(outs),
4714               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4715               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4716               []>;
4717 def SUST_B_1D_B32_CLAMP
4718   : NVPTXInst<(outs),
4719               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4720               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4721               []>;
4722 def SUST_B_1D_B64_CLAMP
4723   : NVPTXInst<(outs),
4724               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4725               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4726               []>;
4727 def SUST_B_1D_V2B8_CLAMP
4728   : NVPTXInst<(outs),
4729               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4730               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4731               []>;
4732 def SUST_B_1D_V2B16_CLAMP
4733   : NVPTXInst<(outs),
4734               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4735               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4736               []>;
4737 def SUST_B_1D_V2B32_CLAMP
4738   : NVPTXInst<(outs),
4739               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4740               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4741               []>;
4742 def SUST_B_1D_V2B64_CLAMP
4743   : NVPTXInst<(outs),
4744               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4745               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4746               []>;
4747 def SUST_B_1D_V4B8_CLAMP
4748   : NVPTXInst<(outs),
4749               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4750                    Int16Regs:$b, Int16Regs:$a),
4751               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4752               []>;
4753 def SUST_B_1D_V4B16_CLAMP
4754   : NVPTXInst<(outs),
4755               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4756                    Int16Regs:$b, Int16Regs:$a),
4757               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4758               []>;
4759 def SUST_B_1D_V4B32_CLAMP
4760   : NVPTXInst<(outs),
4761               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4762                    Int32Regs:$b, Int32Regs:$a),
4763               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4764               []>;
4765
4766
4767 def SUST_B_1D_ARRAY_B8_CLAMP
4768   : NVPTXInst<(outs),
4769               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4770               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4771               []>;
4772 def SUST_B_1D_ARRAY_B16_CLAMP
4773   : NVPTXInst<(outs),
4774               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4775               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4776               []>;
4777 def SUST_B_1D_ARRAY_B32_CLAMP
4778   : NVPTXInst<(outs),
4779               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4780               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4781               []>;
4782 def SUST_B_1D_ARRAY_B64_CLAMP
4783   : NVPTXInst<(outs),
4784               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4785               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4786               []>;
4787 def SUST_B_1D_ARRAY_V2B8_CLAMP
4788   : NVPTXInst<(outs),
4789               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4790                    Int16Regs:$g),
4791               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4792               []>;
4793 def SUST_B_1D_ARRAY_V2B16_CLAMP
4794   : NVPTXInst<(outs),
4795               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4796                    Int16Regs:$g),
4797               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4798               []>;
4799 def SUST_B_1D_ARRAY_V2B32_CLAMP
4800   : NVPTXInst<(outs),
4801               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4802                    Int32Regs:$g),
4803               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4804               []>;
4805 def SUST_B_1D_ARRAY_V2B64_CLAMP
4806   : NVPTXInst<(outs),
4807               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4808                    Int64Regs:$g),
4809               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4810               []>;
4811 def SUST_B_1D_ARRAY_V4B8_CLAMP
4812   : NVPTXInst<(outs),
4813               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4814                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4815               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4816               "\\{$r, $g, $b, $a\\};",
4817               []>;
4818 def SUST_B_1D_ARRAY_V4B16_CLAMP
4819   : NVPTXInst<(outs),
4820               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4821                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4822              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4823              "\\{$r, $g, $b, $a\\};",
4824               []>;
4825 def SUST_B_1D_ARRAY_V4B32_CLAMP
4826   : NVPTXInst<(outs),
4827               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4828                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4829              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4830              "\\{$r, $g, $b, $a\\};",
4831               []>;
4832
4833
4834 def SUST_B_2D_B8_CLAMP
4835   : NVPTXInst<(outs),
4836               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4837               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4838               []>;
4839 def SUST_B_2D_B16_CLAMP
4840   : NVPTXInst<(outs),
4841               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4842               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4843               []>;
4844 def SUST_B_2D_B32_CLAMP
4845   : NVPTXInst<(outs),
4846               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4847               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4848               []>;
4849 def SUST_B_2D_B64_CLAMP
4850   : NVPTXInst<(outs),
4851               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4852               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4853               []>;
4854 def SUST_B_2D_V2B8_CLAMP
4855   : NVPTXInst<(outs),
4856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4857                    Int16Regs:$g),
4858               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4859               []>;
4860 def SUST_B_2D_V2B16_CLAMP
4861   : NVPTXInst<(outs),
4862               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4863                    Int16Regs:$g),
4864               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4865               []>;
4866 def SUST_B_2D_V2B32_CLAMP
4867   : NVPTXInst<(outs),
4868               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4869                    Int32Regs:$g),
4870               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4871               []>;
4872 def SUST_B_2D_V2B64_CLAMP
4873   : NVPTXInst<(outs),
4874               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4875                    Int64Regs:$g),
4876               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4877               []>;
4878 def SUST_B_2D_V4B8_CLAMP
4879   : NVPTXInst<(outs),
4880               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4881                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4882               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4883               "\\{$r, $g, $b, $a\\};",
4884               []>;
4885 def SUST_B_2D_V4B16_CLAMP
4886   : NVPTXInst<(outs),
4887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4888                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4889              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4890              "\\{$r, $g, $b, $a\\};",
4891               []>;
4892 def SUST_B_2D_V4B32_CLAMP
4893   : NVPTXInst<(outs),
4894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4895                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4896              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4897              "\\{$r, $g, $b, $a\\};",
4898               []>;
4899
4900
4901 def SUST_B_2D_ARRAY_B8_CLAMP
4902   : NVPTXInst<(outs),
4903               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4904                    Int16Regs:$r),
4905               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4906               []>;
4907 def SUST_B_2D_ARRAY_B16_CLAMP
4908   : NVPTXInst<(outs),
4909               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4910                    Int16Regs:$r),
4911               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4912               []>;
4913 def SUST_B_2D_ARRAY_B32_CLAMP
4914   : NVPTXInst<(outs),
4915               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4916                    Int32Regs:$r),
4917               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4918               []>;
4919 def SUST_B_2D_ARRAY_B64_CLAMP
4920   : NVPTXInst<(outs),
4921               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4922                    Int64Regs:$r),
4923               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4924               []>;
4925 def SUST_B_2D_ARRAY_V2B8_CLAMP
4926   : NVPTXInst<(outs),
4927               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4928                    Int16Regs:$r, Int16Regs:$g),
4929               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4930               "\\{$r, $g\\};",
4931               []>;
4932 def SUST_B_2D_ARRAY_V2B16_CLAMP
4933   : NVPTXInst<(outs),
4934               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4935                    Int16Regs:$r, Int16Regs:$g),
4936              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4937              "\\{$r, $g\\};",
4938               []>;
4939 def SUST_B_2D_ARRAY_V2B32_CLAMP
4940   : NVPTXInst<(outs),
4941               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4942                    Int32Regs:$r, Int32Regs:$g),
4943              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4944              "\\{$r, $g\\};",
4945               []>;
4946 def SUST_B_2D_ARRAY_V2B64_CLAMP
4947   : NVPTXInst<(outs),
4948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4949                    Int64Regs:$r, Int64Regs:$g),
4950              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4951              "\\{$r, $g\\};",
4952               []>;
4953 def SUST_B_2D_ARRAY_V4B8_CLAMP
4954   : NVPTXInst<(outs),
4955               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4956                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4957       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4958       "\\{$r, $g, $b, $a\\};",
4959               []>;
4960 def SUST_B_2D_ARRAY_V4B16_CLAMP
4961   : NVPTXInst<(outs),
4962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4963                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4964      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4965      "\\{$r, $g, $b, $a\\};",
4966               []>;
4967 def SUST_B_2D_ARRAY_V4B32_CLAMP
4968   : NVPTXInst<(outs),
4969               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4970                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4971      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4972      "\\{$r, $g, $b, $a\\};",
4973               []>;
4974
4975
4976 def SUST_B_3D_B8_CLAMP
4977   : NVPTXInst<(outs),
4978               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4979                    Int16Regs:$r),
4980               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4981               []>;
4982 def SUST_B_3D_B16_CLAMP
4983   : NVPTXInst<(outs),
4984               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4985                    Int16Regs:$r),
4986               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4987               []>;
4988 def SUST_B_3D_B32_CLAMP
4989   : NVPTXInst<(outs),
4990               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4991                    Int32Regs:$r),
4992               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4993               []>;
4994 def SUST_B_3D_B64_CLAMP
4995   : NVPTXInst<(outs),
4996               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4997                    Int64Regs:$r),
4998               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4999               []>;
5000 def SUST_B_3D_V2B8_CLAMP
5001   : NVPTXInst<(outs),
5002               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5003                    Int16Regs:$r, Int16Regs:$g),
5004               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5005               "\\{$r, $g\\};",
5006               []>;
5007 def SUST_B_3D_V2B16_CLAMP
5008   : NVPTXInst<(outs),
5009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5010                    Int16Regs:$r, Int16Regs:$g),
5011               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5012               "\\{$r, $g\\};",
5013               []>;
5014 def SUST_B_3D_V2B32_CLAMP
5015   : NVPTXInst<(outs),
5016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5017                    Int32Regs:$r, Int32Regs:$g),
5018               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5019               "\\{$r, $g\\};",
5020               []>;
5021 def SUST_B_3D_V2B64_CLAMP
5022   : NVPTXInst<(outs),
5023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5024                    Int64Regs:$r, Int64Regs:$g),
5025               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5026               "\\{$r, $g\\};",
5027               []>;
5028 def SUST_B_3D_V4B8_CLAMP
5029   : NVPTXInst<(outs),
5030               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5031                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5032          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5033          "\\{$r, $g, $b, $a\\};",
5034               []>;
5035 def SUST_B_3D_V4B16_CLAMP
5036   : NVPTXInst<(outs),
5037               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5038                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5040         "\\{$r, $g, $b, $a\\};",
5041               []>;
5042 def SUST_B_3D_V4B32_CLAMP
5043   : NVPTXInst<(outs),
5044               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5045                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5046         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5047         "\\{$r, $g, $b, $a\\};",
5048               []>;
5049
5050
5051 // .trap variant
5052 def SUST_B_1D_B8_TRAP
5053   : NVPTXInst<(outs),
5054               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5055               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5056               []>;
5057 def SUST_B_1D_B16_TRAP
5058   : NVPTXInst<(outs),
5059               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5060               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5061               []>;
5062 def SUST_B_1D_B32_TRAP
5063   : NVPTXInst<(outs),
5064               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5065               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5066               []>;
5067 def SUST_B_1D_B64_TRAP
5068   : NVPTXInst<(outs),
5069               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5070               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5071               []>;
5072 def SUST_B_1D_V2B8_TRAP
5073   : NVPTXInst<(outs),
5074               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5075               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5076               []>;
5077 def SUST_B_1D_V2B16_TRAP
5078   : NVPTXInst<(outs),
5079               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5080               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5081               []>;
5082 def SUST_B_1D_V2B32_TRAP
5083   : NVPTXInst<(outs),
5084               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5085               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5086               []>;
5087 def SUST_B_1D_V2B64_TRAP
5088   : NVPTXInst<(outs),
5089               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5090               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5091               []>;
5092 def SUST_B_1D_V4B8_TRAP
5093   : NVPTXInst<(outs),
5094               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5095                    Int16Regs:$b, Int16Regs:$a),
5096               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5097               []>;
5098 def SUST_B_1D_V4B16_TRAP
5099   : NVPTXInst<(outs),
5100               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5101                    Int16Regs:$b, Int16Regs:$a),
5102               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5103               []>;
5104 def SUST_B_1D_V4B32_TRAP
5105   : NVPTXInst<(outs),
5106               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5107                    Int32Regs:$b, Int32Regs:$a),
5108               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5109               []>;
5110
5111
5112 def SUST_B_1D_ARRAY_B8_TRAP
5113   : NVPTXInst<(outs),
5114               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5115               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5116               []>;
5117 def SUST_B_1D_ARRAY_B16_TRAP
5118   : NVPTXInst<(outs),
5119               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5120               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5121               []>;
5122 def SUST_B_1D_ARRAY_B32_TRAP
5123   : NVPTXInst<(outs),
5124               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5125               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5126               []>;
5127 def SUST_B_1D_ARRAY_B64_TRAP
5128   : NVPTXInst<(outs),
5129               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5130               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5131               []>;
5132 def SUST_B_1D_ARRAY_V2B8_TRAP
5133   : NVPTXInst<(outs),
5134               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5135                    Int16Regs:$g),
5136               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5137               []>;
5138 def SUST_B_1D_ARRAY_V2B16_TRAP
5139   : NVPTXInst<(outs),
5140               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5141                    Int16Regs:$g),
5142               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5143               []>;
5144 def SUST_B_1D_ARRAY_V2B32_TRAP
5145   : NVPTXInst<(outs),
5146               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5147                    Int32Regs:$g),
5148               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5149               []>;
5150 def SUST_B_1D_ARRAY_V2B64_TRAP
5151   : NVPTXInst<(outs),
5152               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5153                    Int64Regs:$g),
5154               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5155               []>;
5156 def SUST_B_1D_ARRAY_V4B8_TRAP
5157   : NVPTXInst<(outs),
5158               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5159                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5160               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5161               "\\{$r, $g, $b, $a\\};",
5162               []>;
5163 def SUST_B_1D_ARRAY_V4B16_TRAP
5164   : NVPTXInst<(outs),
5165               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5166                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5167              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5168              "\\{$r, $g, $b, $a\\};",
5169               []>;
5170 def SUST_B_1D_ARRAY_V4B32_TRAP
5171   : NVPTXInst<(outs),
5172               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5173                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5174              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5175              "\\{$r, $g, $b, $a\\};",
5176               []>;
5177
5178
5179 def SUST_B_2D_B8_TRAP
5180   : NVPTXInst<(outs),
5181               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5182               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5183               []>;
5184 def SUST_B_2D_B16_TRAP
5185   : NVPTXInst<(outs),
5186               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5187               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5188               []>;
5189 def SUST_B_2D_B32_TRAP
5190   : NVPTXInst<(outs),
5191               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5192               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5193               []>;
5194 def SUST_B_2D_B64_TRAP
5195   : NVPTXInst<(outs),
5196               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5197               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5198               []>;
5199 def SUST_B_2D_V2B8_TRAP
5200   : NVPTXInst<(outs),
5201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5202                    Int16Regs:$g),
5203               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5204               []>;
5205 def SUST_B_2D_V2B16_TRAP
5206   : NVPTXInst<(outs),
5207               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5208                    Int16Regs:$g),
5209               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5210               []>;
5211 def SUST_B_2D_V2B32_TRAP
5212   : NVPTXInst<(outs),
5213               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5214                    Int32Regs:$g),
5215               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5216               []>;
5217 def SUST_B_2D_V2B64_TRAP
5218   : NVPTXInst<(outs),
5219               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5220                    Int64Regs:$g),
5221               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5222               []>;
5223 def SUST_B_2D_V4B8_TRAP
5224   : NVPTXInst<(outs),
5225               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5226                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5227               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5228               "\\{$r, $g, $b, $a\\};",
5229               []>;
5230 def SUST_B_2D_V4B16_TRAP
5231   : NVPTXInst<(outs),
5232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5233                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5234              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5235              "\\{$r, $g, $b, $a\\};",
5236               []>;
5237 def SUST_B_2D_V4B32_TRAP
5238   : NVPTXInst<(outs),
5239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5240                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5241              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5242              "\\{$r, $g, $b, $a\\};",
5243               []>;
5244
5245
5246 def SUST_B_2D_ARRAY_B8_TRAP
5247   : NVPTXInst<(outs),
5248               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5249                    Int16Regs:$r),
5250               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5251               []>;
5252 def SUST_B_2D_ARRAY_B16_TRAP
5253   : NVPTXInst<(outs),
5254               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5255                    Int16Regs:$r),
5256               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5257               []>;
5258 def SUST_B_2D_ARRAY_B32_TRAP
5259   : NVPTXInst<(outs),
5260               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5261                    Int32Regs:$r),
5262               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5263               []>;
5264 def SUST_B_2D_ARRAY_B64_TRAP
5265   : NVPTXInst<(outs),
5266               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5267                    Int64Regs:$r),
5268               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5269               []>;
5270 def SUST_B_2D_ARRAY_V2B8_TRAP
5271   : NVPTXInst<(outs),
5272               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5273                    Int16Regs:$r, Int16Regs:$g),
5274               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5275               "\\{$r, $g\\};",
5276               []>;
5277 def SUST_B_2D_ARRAY_V2B16_TRAP
5278   : NVPTXInst<(outs),
5279               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5280                    Int16Regs:$r, Int16Regs:$g),
5281              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5282              "\\{$r, $g\\};",
5283               []>;
5284 def SUST_B_2D_ARRAY_V2B32_TRAP
5285   : NVPTXInst<(outs),
5286               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5287                    Int32Regs:$r, Int32Regs:$g),
5288              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5289              "\\{$r, $g\\};",
5290               []>;
5291 def SUST_B_2D_ARRAY_V2B64_TRAP
5292   : NVPTXInst<(outs),
5293               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5294                    Int64Regs:$r, Int64Regs:$g),
5295              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5296              "\\{$r, $g\\};",
5297               []>;
5298 def SUST_B_2D_ARRAY_V4B8_TRAP
5299   : NVPTXInst<(outs),
5300               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5301                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5302       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5303       "\\{$r, $g, $b, $a\\};",
5304               []>;
5305 def SUST_B_2D_ARRAY_V4B16_TRAP
5306   : NVPTXInst<(outs),
5307               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5308                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5309      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5310      "\\{$r, $g, $b, $a\\};",
5311               []>;
5312 def SUST_B_2D_ARRAY_V4B32_TRAP
5313   : NVPTXInst<(outs),
5314               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5315                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5316      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5317      "\\{$r, $g, $b, $a\\};",
5318               []>;
5319
5320
5321 def SUST_B_3D_B8_TRAP
5322   : NVPTXInst<(outs),
5323               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5324                    Int16Regs:$r),
5325               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5326               []>;
5327 def SUST_B_3D_B16_TRAP
5328   : NVPTXInst<(outs),
5329               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5330                    Int16Regs:$r),
5331               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5332               []>;
5333 def SUST_B_3D_B32_TRAP
5334   : NVPTXInst<(outs),
5335               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5336                    Int32Regs:$r),
5337               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5338               []>;
5339 def SUST_B_3D_B64_TRAP
5340   : NVPTXInst<(outs),
5341               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5342                    Int64Regs:$r),
5343               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5344               []>;
5345 def SUST_B_3D_V2B8_TRAP
5346   : NVPTXInst<(outs),
5347               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5348                    Int16Regs:$r, Int16Regs:$g),
5349               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5350               "\\{$r, $g\\};",
5351               []>;
5352 def SUST_B_3D_V2B16_TRAP
5353   : NVPTXInst<(outs),
5354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5355                    Int16Regs:$r, Int16Regs:$g),
5356               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5357               "\\{$r, $g\\};",
5358               []>;
5359 def SUST_B_3D_V2B32_TRAP
5360   : NVPTXInst<(outs),
5361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5362                    Int32Regs:$r, Int32Regs:$g),
5363               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5364               "\\{$r, $g\\};",
5365               []>;
5366 def SUST_B_3D_V2B64_TRAP
5367   : NVPTXInst<(outs),
5368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5369                    Int64Regs:$r, Int64Regs:$g),
5370               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5371               "\\{$r, $g\\};",
5372               []>;
5373 def SUST_B_3D_V4B8_TRAP
5374   : NVPTXInst<(outs),
5375               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5377          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5378          "\\{$r, $g, $b, $a\\};",
5379               []>;
5380 def SUST_B_3D_V4B16_TRAP
5381   : NVPTXInst<(outs),
5382               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5384         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5385         "\\{$r, $g, $b, $a\\};",
5386               []>;
5387 def SUST_B_3D_V4B32_TRAP
5388   : NVPTXInst<(outs),
5389               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5391         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5392         "\\{$r, $g, $b, $a\\};",
5393               []>;
5394
5395
5396 // .zero variant
5397 def SUST_B_1D_B8_ZERO
5398   : NVPTXInst<(outs),
5399               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5400               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5401               []>;
5402 def SUST_B_1D_B16_ZERO
5403   : NVPTXInst<(outs),
5404               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5405               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5406               []>;
5407 def SUST_B_1D_B32_ZERO
5408   : NVPTXInst<(outs),
5409               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5410               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5411               []>;
5412 def SUST_B_1D_B64_ZERO
5413   : NVPTXInst<(outs),
5414               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5415               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5416               []>;
5417 def SUST_B_1D_V2B8_ZERO
5418   : NVPTXInst<(outs),
5419               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5420               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5421               []>;
5422 def SUST_B_1D_V2B16_ZERO
5423   : NVPTXInst<(outs),
5424               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5425               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5426               []>;
5427 def SUST_B_1D_V2B32_ZERO
5428   : NVPTXInst<(outs),
5429               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5430               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5431               []>;
5432 def SUST_B_1D_V2B64_ZERO
5433   : NVPTXInst<(outs),
5434               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5435               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5436               []>;
5437 def SUST_B_1D_V4B8_ZERO
5438   : NVPTXInst<(outs),
5439               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5440                    Int16Regs:$b, Int16Regs:$a),
5441               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5442               []>;
5443 def SUST_B_1D_V4B16_ZERO
5444   : NVPTXInst<(outs),
5445               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5446                    Int16Regs:$b, Int16Regs:$a),
5447               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5448               []>;
5449 def SUST_B_1D_V4B32_ZERO
5450   : NVPTXInst<(outs),
5451               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5452                    Int32Regs:$b, Int32Regs:$a),
5453               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5454               []>;
5455
5456
5457 def SUST_B_1D_ARRAY_B8_ZERO
5458   : NVPTXInst<(outs),
5459               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5460               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5461               []>;
5462 def SUST_B_1D_ARRAY_B16_ZERO
5463   : NVPTXInst<(outs),
5464               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5465               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5466               []>;
5467 def SUST_B_1D_ARRAY_B32_ZERO
5468   : NVPTXInst<(outs),
5469               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5470               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5471               []>;
5472 def SUST_B_1D_ARRAY_B64_ZERO
5473   : NVPTXInst<(outs),
5474               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5475               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5476               []>;
5477 def SUST_B_1D_ARRAY_V2B8_ZERO
5478   : NVPTXInst<(outs),
5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5480                    Int16Regs:$g),
5481               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5482               []>;
5483 def SUST_B_1D_ARRAY_V2B16_ZERO
5484   : NVPTXInst<(outs),
5485               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5486                    Int16Regs:$g),
5487               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5488               []>;
5489 def SUST_B_1D_ARRAY_V2B32_ZERO
5490   : NVPTXInst<(outs),
5491               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5492                    Int32Regs:$g),
5493               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5494               []>;
5495 def SUST_B_1D_ARRAY_V2B64_ZERO
5496   : NVPTXInst<(outs),
5497               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5498                    Int64Regs:$g),
5499               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5500               []>;
5501 def SUST_B_1D_ARRAY_V4B8_ZERO
5502   : NVPTXInst<(outs),
5503               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5504                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5505               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5506               "\\{$r, $g, $b, $a\\};",
5507               []>;
5508 def SUST_B_1D_ARRAY_V4B16_ZERO
5509   : NVPTXInst<(outs),
5510               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5511                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5512              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5513              "\\{$r, $g, $b, $a\\};",
5514               []>;
5515 def SUST_B_1D_ARRAY_V4B32_ZERO
5516   : NVPTXInst<(outs),
5517               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5518                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5519              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5520              "\\{$r, $g, $b, $a\\};",
5521               []>;
5522
5523
5524 def SUST_B_2D_B8_ZERO
5525   : NVPTXInst<(outs),
5526               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5527               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5528               []>;
5529 def SUST_B_2D_B16_ZERO
5530   : NVPTXInst<(outs),
5531               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5532               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5533               []>;
5534 def SUST_B_2D_B32_ZERO
5535   : NVPTXInst<(outs),
5536               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5537               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5538               []>;
5539 def SUST_B_2D_B64_ZERO
5540   : NVPTXInst<(outs),
5541               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5542               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5543               []>;
5544 def SUST_B_2D_V2B8_ZERO
5545   : NVPTXInst<(outs),
5546               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5547                    Int16Regs:$g),
5548               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5549               []>;
5550 def SUST_B_2D_V2B16_ZERO
5551   : NVPTXInst<(outs),
5552               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5553                    Int16Regs:$g),
5554               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5555               []>;
5556 def SUST_B_2D_V2B32_ZERO
5557   : NVPTXInst<(outs),
5558               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5559                    Int32Regs:$g),
5560               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5561               []>;
5562 def SUST_B_2D_V2B64_ZERO
5563   : NVPTXInst<(outs),
5564               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5565                    Int64Regs:$g),
5566               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5567               []>;
5568 def SUST_B_2D_V4B8_ZERO
5569   : NVPTXInst<(outs),
5570               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5571                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5572               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5573               "\\{$r, $g, $b, $a\\};",
5574               []>;
5575 def SUST_B_2D_V4B16_ZERO
5576   : NVPTXInst<(outs),
5577               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5578                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5579              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5580              "\\{$r, $g, $b, $a\\};",
5581               []>;
5582 def SUST_B_2D_V4B32_ZERO
5583   : NVPTXInst<(outs),
5584               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5585                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5586              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5587              "\\{$r, $g, $b, $a\\};",
5588               []>;
5589
5590
5591 def SUST_B_2D_ARRAY_B8_ZERO
5592   : NVPTXInst<(outs),
5593               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5594                    Int16Regs:$r),
5595               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5596               []>;
5597 def SUST_B_2D_ARRAY_B16_ZERO
5598   : NVPTXInst<(outs),
5599               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5600                    Int16Regs:$r),
5601               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5602               []>;
5603 def SUST_B_2D_ARRAY_B32_ZERO
5604   : NVPTXInst<(outs),
5605               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5606                    Int32Regs:$r),
5607               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5608               []>;
5609 def SUST_B_2D_ARRAY_B64_ZERO
5610   : NVPTXInst<(outs),
5611               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5612                    Int64Regs:$r),
5613               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5614               []>;
5615 def SUST_B_2D_ARRAY_V2B8_ZERO
5616   : NVPTXInst<(outs),
5617               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5618                    Int16Regs:$r, Int16Regs:$g),
5619               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5620               "\\{$r, $g\\};",
5621               []>;
5622 def SUST_B_2D_ARRAY_V2B16_ZERO
5623   : NVPTXInst<(outs),
5624               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5625                    Int16Regs:$r, Int16Regs:$g),
5626              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5627              "\\{$r, $g\\};",
5628               []>;
5629 def SUST_B_2D_ARRAY_V2B32_ZERO
5630   : NVPTXInst<(outs),
5631               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5632                    Int32Regs:$r, Int32Regs:$g),
5633              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5634              "\\{$r, $g\\};",
5635               []>;
5636 def SUST_B_2D_ARRAY_V2B64_ZERO
5637   : NVPTXInst<(outs),
5638               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5639                    Int64Regs:$r, Int64Regs:$g),
5640              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5641              "\\{$r, $g\\};",
5642               []>;
5643 def SUST_B_2D_ARRAY_V4B8_ZERO
5644   : NVPTXInst<(outs),
5645               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5646                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5647       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5648       "\\{$r, $g, $b, $a\\};",
5649               []>;
5650 def SUST_B_2D_ARRAY_V4B16_ZERO
5651   : NVPTXInst<(outs),
5652               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5653                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5654      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5655      "\\{$r, $g, $b, $a\\};",
5656               []>;
5657 def SUST_B_2D_ARRAY_V4B32_ZERO
5658   : NVPTXInst<(outs),
5659               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5660                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5661      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5662      "\\{$r, $g, $b, $a\\};",
5663               []>;
5664
5665
5666 def SUST_B_3D_B8_ZERO
5667   : NVPTXInst<(outs),
5668               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5669                    Int16Regs:$r),
5670               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5671               []>;
5672 def SUST_B_3D_B16_ZERO
5673   : NVPTXInst<(outs),
5674               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675                    Int16Regs:$r),
5676               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5677               []>;
5678 def SUST_B_3D_B32_ZERO
5679   : NVPTXInst<(outs),
5680               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5681                    Int32Regs:$r),
5682               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5683               []>;
5684 def SUST_B_3D_B64_ZERO
5685   : NVPTXInst<(outs),
5686               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5687                    Int64Regs:$r),
5688               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5689               []>;
5690 def SUST_B_3D_V2B8_ZERO
5691   : NVPTXInst<(outs),
5692               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5693                    Int16Regs:$r, Int16Regs:$g),
5694               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5695               "\\{$r, $g\\};",
5696               []>;
5697 def SUST_B_3D_V2B16_ZERO
5698   : NVPTXInst<(outs),
5699               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5700                    Int16Regs:$r, Int16Regs:$g),
5701               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5702               "\\{$r, $g\\};",
5703               []>;
5704 def SUST_B_3D_V2B32_ZERO
5705   : NVPTXInst<(outs),
5706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5707                    Int32Regs:$r, Int32Regs:$g),
5708               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5709               "\\{$r, $g\\};",
5710               []>;
5711 def SUST_B_3D_V2B64_ZERO
5712   : NVPTXInst<(outs),
5713               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5714                    Int64Regs:$r, Int64Regs:$g),
5715               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5716               "\\{$r, $g\\};",
5717               []>;
5718 def SUST_B_3D_V4B8_ZERO
5719   : NVPTXInst<(outs),
5720               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5721                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5722          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5723          "\\{$r, $g, $b, $a\\};",
5724               []>;
5725 def SUST_B_3D_V4B16_ZERO
5726   : NVPTXInst<(outs),
5727               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5728                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5729         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5730         "\\{$r, $g, $b, $a\\};",
5731               []>;
5732 def SUST_B_3D_V4B32_ZERO
5733   : NVPTXInst<(outs),
5734               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5735                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5736         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5737         "\\{$r, $g, $b, $a\\};",
5738               []>;
5739
5740
5741
5742 // Formatted
5743
5744 def SUST_P_1D_B8_TRAP
5745   : NVPTXInst<(outs),
5746               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5747               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5748               []>;
5749 def SUST_P_1D_B16_TRAP
5750   : NVPTXInst<(outs),
5751               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5752               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5753               []>;
5754 def SUST_P_1D_B32_TRAP
5755   : NVPTXInst<(outs),
5756               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5757               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5758               []>;
5759 def SUST_P_1D_V2B8_TRAP
5760   : NVPTXInst<(outs),
5761               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5762               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5763               []>;
5764 def SUST_P_1D_V2B16_TRAP
5765   : NVPTXInst<(outs),
5766               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5767               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5768               []>;
5769 def SUST_P_1D_V2B32_TRAP
5770   : NVPTXInst<(outs),
5771               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5772               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5773               []>;
5774 def SUST_P_1D_V4B8_TRAP
5775   : NVPTXInst<(outs),
5776               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5777                    Int16Regs:$b, Int16Regs:$a),
5778               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5779               []>;
5780 def SUST_P_1D_V4B16_TRAP
5781   : NVPTXInst<(outs),
5782               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5783                    Int16Regs:$b, Int16Regs:$a),
5784               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5785               []>;
5786 def SUST_P_1D_V4B32_TRAP
5787   : NVPTXInst<(outs),
5788               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5789                    Int32Regs:$b, Int32Regs:$a),
5790               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5791               []>;
5792
5793
5794 def SUST_P_1D_ARRAY_B8_TRAP
5795   : NVPTXInst<(outs),
5796               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5797               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5798               []>;
5799 def SUST_P_1D_ARRAY_B16_TRAP
5800   : NVPTXInst<(outs),
5801               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5802               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5803               []>;
5804 def SUST_P_1D_ARRAY_B32_TRAP
5805   : NVPTXInst<(outs),
5806               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5807               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5808               []>;
5809 def SUST_P_1D_ARRAY_V2B8_TRAP
5810   : NVPTXInst<(outs),
5811               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5812                    Int16Regs:$g),
5813               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5814               []>;
5815 def SUST_P_1D_ARRAY_V2B16_TRAP
5816   : NVPTXInst<(outs),
5817               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5818                    Int16Regs:$g),
5819               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5820               []>;
5821 def SUST_P_1D_ARRAY_V2B32_TRAP
5822   : NVPTXInst<(outs),
5823               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5824                    Int32Regs:$g),
5825               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5826               []>;
5827 def SUST_P_1D_ARRAY_V4B8_TRAP
5828   : NVPTXInst<(outs),
5829               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5830                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5831               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5832               "\\{$r, $g, $b, $a\\};",
5833               []>;
5834 def SUST_P_1D_ARRAY_V4B16_TRAP
5835   : NVPTXInst<(outs),
5836               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5837                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5838              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5839              "\\{$r, $g, $b, $a\\};",
5840               []>;
5841 def SUST_P_1D_ARRAY_V4B32_TRAP
5842   : NVPTXInst<(outs),
5843               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5844                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5845              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5846              "\\{$r, $g, $b, $a\\};",
5847               []>;
5848
5849
5850 def SUST_P_2D_B8_TRAP
5851   : NVPTXInst<(outs),
5852               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5853               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5854               []>;
5855 def SUST_P_2D_B16_TRAP
5856   : NVPTXInst<(outs),
5857               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5858               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5859               []>;
5860 def SUST_P_2D_B32_TRAP
5861   : NVPTXInst<(outs),
5862               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5863               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5864               []>;
5865 def SUST_P_2D_V2B8_TRAP
5866   : NVPTXInst<(outs),
5867               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5868                    Int16Regs:$g),
5869               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5870               []>;
5871 def SUST_P_2D_V2B16_TRAP
5872   : NVPTXInst<(outs),
5873               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5874                    Int16Regs:$g),
5875               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5876               []>;
5877 def SUST_P_2D_V2B32_TRAP
5878   : NVPTXInst<(outs),
5879               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5880                    Int32Regs:$g),
5881               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5882               []>;
5883 def SUST_P_2D_V4B8_TRAP
5884   : NVPTXInst<(outs),
5885               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5886                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5887               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5888               "\\{$r, $g, $b, $a\\};",
5889               []>;
5890 def SUST_P_2D_V4B16_TRAP
5891   : NVPTXInst<(outs),
5892               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5893                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5894              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5895              "\\{$r, $g, $b, $a\\};",
5896               []>;
5897 def SUST_P_2D_V4B32_TRAP
5898   : NVPTXInst<(outs),
5899               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5900                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5901              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5902              "\\{$r, $g, $b, $a\\};",
5903               []>;
5904
5905
5906 def SUST_P_2D_ARRAY_B8_TRAP
5907   : NVPTXInst<(outs),
5908               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5909                    Int16Regs:$r),
5910               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5911               []>;
5912 def SUST_P_2D_ARRAY_B16_TRAP
5913   : NVPTXInst<(outs),
5914               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5915                    Int16Regs:$r),
5916               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5917               []>;
5918 def SUST_P_2D_ARRAY_B32_TRAP
5919   : NVPTXInst<(outs),
5920               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5921                    Int32Regs:$r),
5922               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5923               []>;
5924 def SUST_P_2D_ARRAY_V2B8_TRAP
5925   : NVPTXInst<(outs),
5926               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5927                    Int16Regs:$r, Int16Regs:$g),
5928               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5929               "\\{$r, $g\\};",
5930               []>;
5931 def SUST_P_2D_ARRAY_V2B16_TRAP
5932   : NVPTXInst<(outs),
5933               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5934                    Int16Regs:$r, Int16Regs:$g),
5935              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5936              "\\{$r, $g\\};",
5937               []>;
5938 def SUST_P_2D_ARRAY_V2B32_TRAP
5939   : NVPTXInst<(outs),
5940               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5941                    Int32Regs:$r, Int32Regs:$g),
5942              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5943              "\\{$r, $g\\};",
5944               []>;
5945 def SUST_P_2D_ARRAY_V4B8_TRAP
5946   : NVPTXInst<(outs),
5947               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5948                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5949       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5950       "\\{$r, $g, $b, $a\\};",
5951               []>;
5952 def SUST_P_2D_ARRAY_V4B16_TRAP
5953   : NVPTXInst<(outs),
5954               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5955                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5956      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5957      "\\{$r, $g, $b, $a\\};",
5958               []>;
5959 def SUST_P_2D_ARRAY_V4B32_TRAP
5960   : NVPTXInst<(outs),
5961               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5962                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5963      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5964      "\\{$r, $g, $b, $a\\};",
5965               []>;
5966
5967
5968 def SUST_P_3D_B8_TRAP
5969   : NVPTXInst<(outs),
5970               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5971                    Int16Regs:$r),
5972               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5973               []>;
5974 def SUST_P_3D_B16_TRAP
5975   : NVPTXInst<(outs),
5976               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5977                    Int16Regs:$r),
5978               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5979               []>;
5980 def SUST_P_3D_B32_TRAP
5981   : NVPTXInst<(outs),
5982               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5983                    Int32Regs:$r),
5984               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5985               []>;
5986 def SUST_P_3D_V2B8_TRAP
5987   : NVPTXInst<(outs),
5988               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5989                    Int16Regs:$r, Int16Regs:$g),
5990               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5991               "\\{$r, $g\\};",
5992               []>;
5993 def SUST_P_3D_V2B16_TRAP
5994   : NVPTXInst<(outs),
5995               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996                    Int16Regs:$r, Int16Regs:$g),
5997               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5998               "\\{$r, $g\\};",
5999               []>;
6000 def SUST_P_3D_V2B32_TRAP
6001   : NVPTXInst<(outs),
6002               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003                    Int32Regs:$r, Int32Regs:$g),
6004               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6005               "\\{$r, $g\\};",
6006               []>;
6007 def SUST_P_3D_V4B8_TRAP
6008   : NVPTXInst<(outs),
6009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6012          "\\{$r, $g, $b, $a\\};",
6013               []>;
6014 def SUST_P_3D_V4B16_TRAP
6015   : NVPTXInst<(outs),
6016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6017                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6018         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6019         "\\{$r, $g, $b, $a\\};",
6020               []>;
6021 def SUST_P_3D_V4B32_TRAP
6022   : NVPTXInst<(outs),
6023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6024                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6025         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6026         "\\{$r, $g, $b, $a\\};",
6027               []>;
6028 }
6029
6030 // Surface store instruction patterns
6031 // I'm not sure why we can't just include these in the instruction definitions,
6032 // but TableGen complains of type errors :(
6033
6034 // .clamp variant
6035 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6036            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6037           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6038
6039 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6040            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6041           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6042
6043 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6044            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6045           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6046
6047 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6048            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6049           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6050
6051 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6052            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6053           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6054            Int16Regs:$r, Int16Regs:$g)>;
6055
6056 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6057            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6058           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6059            Int16Regs:$r, Int16Regs:$g)>;
6060
6061 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6062            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6063           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6064            Int32Regs:$r, Int32Regs:$g)>;
6065
6066 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6067            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6068           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6069            Int64Regs:$r, Int64Regs:$g)>;
6070
6071 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6072            Int64Regs:$s, Int32Regs:$x,
6073            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6074           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6075            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6076
6077 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6078            Int64Regs:$s, Int32Regs:$x,
6079            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6080           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6081            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6082
6083 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6084            Int64Regs:$s, Int32Regs:$x,
6085            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6086           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6087            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6088
6089
6090
6091 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6092            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6093           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6094            Int16Regs:$r)>;
6095
6096 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6097            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6098           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099            Int16Regs:$r)>;
6100
6101 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6102            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6103           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6104            Int32Regs:$r)>;
6105
6106 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6107            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6108           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109            Int64Regs:$r)>;
6110
6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6112           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6113           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114            Int16Regs:$r, Int16Regs:$g)>;
6115
6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6118           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119            Int16Regs:$r, Int16Regs:$g)>;
6120
6121 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6122           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6123           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6124            Int32Regs:$r, Int32Regs:$g)>;
6125
6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6127           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6128           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129            Int64Regs:$r, Int64Regs:$g)>;
6130
6131 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6132            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6133            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6134           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6135            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6136
6137 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6138            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6140           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6141            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6142
6143 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6144            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6145            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6146           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6147            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6148
6149
6150
6151 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6153           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6154            Int16Regs:$r)>;
6155
6156 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6157            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6158           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159            Int16Regs:$r)>;
6160
6161 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6162            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6163           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6164            Int32Regs:$r)>;
6165
6166 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6167            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6168           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169            Int64Regs:$r)>;
6170
6171 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6173           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174            Int16Regs:$r, Int16Regs:$g)>;
6175
6176 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6178           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179            Int16Regs:$r, Int16Regs:$g)>;
6180
6181 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6182           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6183           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184            Int32Regs:$r, Int32Regs:$g)>;
6185
6186 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6188           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189            Int64Regs:$r, Int64Regs:$g)>;
6190
6191 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6192            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6193            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6194           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6195            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6196
6197 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6198            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6200           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6201            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6202
6203 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6204            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6205            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6206           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6207            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6208
6209
6210
6211 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6212           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6213           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6214            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6215            Int16Regs:$r)>;
6216
6217 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6218           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6219           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6220            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6221            Int16Regs:$r)>;
6222
6223 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6224           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6225           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6226            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6227            Int32Regs:$r)>;
6228
6229 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6230           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6231           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6232            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6233            Int64Regs:$r)>;
6234
6235 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6236            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6237            Int16Regs:$r, Int16Regs:$g),
6238           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6239            Int32Regs:$x, Int32Regs:$y,
6240            Int16Regs:$r, Int16Regs:$g)>;
6241
6242 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6243            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6244            Int16Regs:$r, Int16Regs:$g),
6245           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6246            Int32Regs:$x, Int32Regs:$y,
6247            Int16Regs:$r, Int16Regs:$g)>;
6248
6249 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6250            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6251            Int32Regs:$g),
6252           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6253            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6254
6255 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6256            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6257            Int64Regs:$g),
6258           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6259            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6260
6261 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6262            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6263            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6264           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6265            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6266            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6267
6268 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6269            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6270            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6271           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6272            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6273            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6274
6275 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6276            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6277            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6278           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6279            Int32Regs:$x, Int32Regs:$y,
6280            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6281
6282
6283
6284 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6285            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286            Int16Regs:$r),
6287           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6288            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6289            Int16Regs:$r)>;
6290
6291 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6292            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293            Int16Regs:$r),
6294           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6295            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6296            Int16Regs:$r)>;
6297
6298 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6299            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300            Int32Regs:$r),
6301           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6302            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6303            Int32Regs:$r)>;
6304
6305 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6306            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307            Int64Regs:$r),
6308           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6309            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6310            Int64Regs:$r)>;
6311
6312 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6313            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314            Int16Regs:$r, Int16Regs:$g),
6315           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6316            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6317            Int16Regs:$r, Int16Regs:$g)>;
6318
6319 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6320            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321            Int16Regs:$r, Int16Regs:$g),
6322           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6323            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324            Int16Regs:$r, Int16Regs:$g)>;
6325
6326 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6327            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6328            Int32Regs:$r, Int32Regs:$g),
6329           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6330            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331            Int32Regs:$r, Int32Regs:$g)>;
6332
6333 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6334            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6335            Int64Regs:$r, Int64Regs:$g),
6336           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6337            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338            Int64Regs:$r, Int64Regs:$g)>;
6339
6340 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6341            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6342            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6343           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6344            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6346
6347 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6348            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6349            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6350           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6351            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6352            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6353
6354 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6355            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6356            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6357           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6358            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6359            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6360
6361
6362 // .trap variant
6363 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6364            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6365           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6366
6367 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6368            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6369           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6370
6371 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6372            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6373           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6374
6375 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6376            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6377           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6378
6379 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6380            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6381           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6382            Int16Regs:$r, Int16Regs:$g)>;
6383
6384 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6385            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6386           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6387            Int16Regs:$r, Int16Regs:$g)>;
6388
6389 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6390            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6391           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6392            Int32Regs:$r, Int32Regs:$g)>;
6393
6394 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6395            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6396           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6397            Int64Regs:$r, Int64Regs:$g)>;
6398
6399 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6400            Int64Regs:$s, Int32Regs:$x,
6401            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6402           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6403            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6404
6405 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6406            Int64Regs:$s, Int32Regs:$x,
6407            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6408           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6409            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6410
6411 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6412            Int64Regs:$s, Int32Regs:$x,
6413            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6414           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6415            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6416
6417
6418
6419 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6420            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6421           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6422            Int16Regs:$r)>;
6423
6424 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6425            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6426           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6427            Int16Regs:$r)>;
6428
6429 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6430            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6431           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6432            Int32Regs:$r)>;
6433
6434 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6435            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6436           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437            Int64Regs:$r)>;
6438
6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6440           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6441           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442            Int16Regs:$r, Int16Regs:$g)>;
6443
6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6446           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447            Int16Regs:$r, Int16Regs:$g)>;
6448
6449 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6450           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6451           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6452            Int32Regs:$r, Int32Regs:$g)>;
6453
6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6455           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6456           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457            Int64Regs:$r, Int64Regs:$g)>;
6458
6459 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6460            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6461            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6462           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6463            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6464
6465 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6466            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6468           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6469            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6470
6471 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6472            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6473            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6474           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6475            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6476
6477
6478
6479 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6480            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6481           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6482            Int16Regs:$r)>;
6483
6484 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6485            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6486           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6487            Int16Regs:$r)>;
6488
6489 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6490            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6491           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6492            Int32Regs:$r)>;
6493
6494 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6495            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6496           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497            Int64Regs:$r)>;
6498
6499 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6500           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6501           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502            Int16Regs:$r, Int16Regs:$g)>;
6503
6504 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6505           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6506           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507            Int16Regs:$r, Int16Regs:$g)>;
6508
6509 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6510           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6511           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6512            Int32Regs:$r, Int32Regs:$g)>;
6513
6514 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6515           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6516           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517            Int64Regs:$r, Int64Regs:$g)>;
6518
6519 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6520            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6521            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6522           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6523            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6524
6525 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6526            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6528           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6529            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6530
6531 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6532            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6533            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6534           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6535            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6536
6537
6538
6539 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6540           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6541           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6542            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6543            Int16Regs:$r)>;
6544
6545 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6546           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6547           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6548            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6549            Int16Regs:$r)>;
6550
6551 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6552           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6553           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6554            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6555            Int32Regs:$r)>;
6556
6557 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6558           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6559           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6560            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6561            Int64Regs:$r)>;
6562
6563 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6564            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6565            Int16Regs:$r, Int16Regs:$g),
6566           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6567            Int32Regs:$x, Int32Regs:$y,
6568            Int16Regs:$r, Int16Regs:$g)>;
6569
6570 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6571            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6572            Int16Regs:$r, Int16Regs:$g),
6573           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6574            Int32Regs:$x, Int32Regs:$y,
6575            Int16Regs:$r, Int16Regs:$g)>;
6576
6577 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6578            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6579            Int32Regs:$g),
6580           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6581            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6582
6583 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6584            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6585            Int64Regs:$g),
6586           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6587            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6588
6589 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6590            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6591            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6592           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6593            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6594            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6595
6596 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6597            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6598            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6599           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6600            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6601            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6602
6603 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6604            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6605            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6606           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6607            Int32Regs:$x, Int32Regs:$y,
6608            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6609
6610
6611
6612 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6613            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614            Int16Regs:$r),
6615           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6616            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6617            Int16Regs:$r)>;
6618
6619 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6620            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621            Int16Regs:$r),
6622           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6623            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6624            Int16Regs:$r)>;
6625
6626 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6627            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628            Int32Regs:$r),
6629           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6630            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6631            Int32Regs:$r)>;
6632
6633 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6634            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635            Int64Regs:$r),
6636           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6637            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6638            Int64Regs:$r)>;
6639
6640 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6641            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642            Int16Regs:$r, Int16Regs:$g),
6643           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6644            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6645            Int16Regs:$r, Int16Regs:$g)>;
6646
6647 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6648            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649            Int16Regs:$r, Int16Regs:$g),
6650           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6651            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6652            Int16Regs:$r, Int16Regs:$g)>;
6653
6654 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6655            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6656            Int32Regs:$r, Int32Regs:$g),
6657           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6658            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6659            Int32Regs:$r, Int32Regs:$g)>;
6660
6661 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6662            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6663            Int64Regs:$r, Int64Regs:$g),
6664           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6665            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6666            Int64Regs:$r, Int64Regs:$g)>;
6667
6668 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6669            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6670            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6671           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6672            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6673            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6674
6675 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6676            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6677            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6678           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6679            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6680            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6681
6682 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6683            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6684            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6685           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6686            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6687            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6688
6689
6690 // .zero variant
6691 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6692            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6693           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6694
6695 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6696            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6697           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6698
6699 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6700            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6701           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6702
6703 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6704            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6705           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6706
6707 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6708            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6709           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6710            Int16Regs:$r, Int16Regs:$g)>;
6711
6712 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6713            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6714           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6715            Int16Regs:$r, Int16Regs:$g)>;
6716
6717 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6718            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6719           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6720            Int32Regs:$r, Int32Regs:$g)>;
6721
6722 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6723            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6724           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6725            Int64Regs:$r, Int64Regs:$g)>;
6726
6727 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6728            Int64Regs:$s, Int32Regs:$x,
6729            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6730           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6731            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6732
6733 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6734            Int64Regs:$s, Int32Regs:$x,
6735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6736           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6737            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6738
6739 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6740            Int64Regs:$s, Int32Regs:$x,
6741            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6742           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6743            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6744
6745
6746
6747 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6748            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6749           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6750            Int16Regs:$r)>;
6751
6752 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6753            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6754           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6755            Int16Regs:$r)>;
6756
6757 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6758            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6759           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6760            Int32Regs:$r)>;
6761
6762 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6763            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6764           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765            Int64Regs:$r)>;
6766
6767 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6768           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6769           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6770            Int16Regs:$r, Int16Regs:$g)>;
6771
6772 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6773           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6774           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775            Int16Regs:$r, Int16Regs:$g)>;
6776
6777 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6778           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6779           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6780            Int32Regs:$r, Int32Regs:$g)>;
6781
6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6783           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6784           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6785            Int64Regs:$r, Int64Regs:$g)>;
6786
6787 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6788            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6789            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6790           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6791            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6792
6793 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6794            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6796           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6798
6799 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6800            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6801            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6802           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6803            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6804
6805
6806
6807 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6808            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6809           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6810            Int16Regs:$r)>;
6811
6812 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6813            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6814           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6815            Int16Regs:$r)>;
6816
6817 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6818            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6819           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6820            Int32Regs:$r)>;
6821
6822 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6823            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6824           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825            Int64Regs:$r)>;
6826
6827 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6828           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6829           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6830            Int16Regs:$r, Int16Regs:$g)>;
6831
6832 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6833           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6834           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6835            Int16Regs:$r, Int16Regs:$g)>;
6836
6837 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6838           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6839           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6840            Int32Regs:$r, Int32Regs:$g)>;
6841
6842 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6843           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6844           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845            Int64Regs:$r, Int64Regs:$g)>;
6846
6847 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6848            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6849            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6850           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6851            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6852
6853 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6854            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6855            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6856           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6857            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6858
6859 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6860            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6861            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6862           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6863            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6864
6865
6866
6867 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6868           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6869           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6870            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6871            Int16Regs:$r)>;
6872
6873 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6874           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6875           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6876            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6877            Int16Regs:$r)>;
6878
6879 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6880           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6881           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6882            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6883            Int32Regs:$r)>;
6884
6885 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6886           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6887           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6888            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6889            Int64Regs:$r)>;
6890
6891 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6892            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6893            Int16Regs:$r, Int16Regs:$g),
6894           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6895            Int32Regs:$x, Int32Regs:$y,
6896            Int16Regs:$r, Int16Regs:$g)>;
6897
6898 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6899            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6900            Int16Regs:$r, Int16Regs:$g),
6901           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6902            Int32Regs:$x, Int32Regs:$y,
6903            Int16Regs:$r, Int16Regs:$g)>;
6904
6905 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6906            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6907            Int32Regs:$g),
6908           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6909            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6910
6911 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6912            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6913            Int64Regs:$g),
6914           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6915            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6916
6917 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6918            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6919            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6920           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6921            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6922            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6923
6924 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6925            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6926            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6927           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6928            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6929            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6930
6931 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6932            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6933            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6934           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6935            Int32Regs:$x, Int32Regs:$y,
6936            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6937
6938
6939
6940 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6941            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6942            Int16Regs:$r),
6943           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6944            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6945            Int16Regs:$r)>;
6946
6947 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6948            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6949            Int16Regs:$r),
6950           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6951            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6952            Int16Regs:$r)>;
6953
6954 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6955            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6956            Int32Regs:$r),
6957           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6958            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6959            Int32Regs:$r)>;
6960
6961 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6962            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6963            Int64Regs:$r),
6964           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6965            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6966            Int64Regs:$r)>;
6967
6968 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6969            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6970            Int16Regs:$r, Int16Regs:$g),
6971           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6972            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6973            Int16Regs:$r, Int16Regs:$g)>;
6974
6975 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6976            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6977            Int16Regs:$r, Int16Regs:$g),
6978           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6979            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6980            Int16Regs:$r, Int16Regs:$g)>;
6981
6982 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6983            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6984            Int32Regs:$r, Int32Regs:$g),
6985           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6986            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6987            Int32Regs:$r, Int32Regs:$g)>;
6988
6989 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6990            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6991            Int64Regs:$r, Int64Regs:$g),
6992           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6993            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6994            Int64Regs:$r, Int64Regs:$g)>;
6995
6996 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6997            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6998            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6999           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7000            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7001            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7002
7003 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7004            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7005            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7006           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7007            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7008            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7009
7010 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7011            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7012            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7013           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7014            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7015            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7016
7017
7018
7019
7020 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7021            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7022           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7023
7024 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7025            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7026           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7027
7028 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7029            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7030           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7031
7032 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7033            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7034           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7035            Int16Regs:$r, Int16Regs:$g)>;
7036
7037 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7038            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7039           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7040            Int16Regs:$r, Int16Regs:$g)>;
7041
7042 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7043            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7044           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7045            Int32Regs:$r, Int32Regs:$g)>;
7046
7047 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7048            Int64Regs:$s, Int32Regs:$x,
7049            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7050           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7051            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7052
7053 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7054            Int64Regs:$s, Int32Regs:$x,
7055            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7056           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7057            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7058
7059 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7060            Int64Regs:$s, Int32Regs:$x,
7061            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7062           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7063            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7064
7065
7066
7067 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7068            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7069           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7070            Int16Regs:$r)>;
7071
7072 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7073            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7074           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7075            Int16Regs:$r)>;
7076
7077 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7078            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7079           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7080            Int32Regs:$r)>;
7081
7082 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7083           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7084           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7085            Int16Regs:$r, Int16Regs:$g)>;
7086
7087 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7088           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7089           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7090            Int16Regs:$r, Int16Regs:$g)>;
7091
7092 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7093           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7094           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7095            Int32Regs:$r, Int32Regs:$g)>;
7096
7097 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7098            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7099            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7100           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7101            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7102
7103 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7104            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7105            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7106           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7107            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7108
7109 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7110            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7111            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7112           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7113            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7114
7115
7116
7117 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7118            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7119           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7120            Int16Regs:$r)>;
7121
7122 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7123            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7124           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7125            Int16Regs:$r)>;
7126
7127 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7128            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7129           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7130            Int32Regs:$r)>;
7131
7132 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7133           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7134           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7135            Int16Regs:$r, Int16Regs:$g)>;
7136
7137 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7138           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7139           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7140            Int16Regs:$r, Int16Regs:$g)>;
7141
7142 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7143           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7144           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7145            Int32Regs:$r, Int32Regs:$g)>;
7146
7147 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7148            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7149            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7150           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7151            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7152
7153 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7154            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7155            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7156           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7157            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7158
7159 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7160            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7161            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7162           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7163            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7164
7165
7166
7167 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7168           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7169           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7170            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7171            Int16Regs:$r)>;
7172
7173 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7174           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7175           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7176            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7177            Int16Regs:$r)>;
7178
7179 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7180           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7181           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7182            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7183            Int32Regs:$r)>;
7184
7185 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7186            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7187            Int16Regs:$r, Int16Regs:$g),
7188           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7189            Int32Regs:$x, Int32Regs:$y,
7190            Int16Regs:$r, Int16Regs:$g)>;
7191
7192 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7193            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7194            Int16Regs:$r, Int16Regs:$g),
7195           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7196            Int32Regs:$x, Int32Regs:$y,
7197            Int16Regs:$r, Int16Regs:$g)>;
7198
7199 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7200            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7201            Int32Regs:$g),
7202           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7203            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7204
7205 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7206            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7207            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7208           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7209            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7210            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7211
7212 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7213            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7214            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7215           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7216            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7217            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7218
7219 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7220            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7221            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7222           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7223            Int32Regs:$x, Int32Regs:$y,
7224            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7225
7226
7227
7228 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7229            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7230            Int16Regs:$r),
7231           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7232            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7233            Int16Regs:$r)>;
7234
7235 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7236            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7237            Int16Regs:$r),
7238           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7239            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7240            Int16Regs:$r)>;
7241
7242 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7243            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7244            Int32Regs:$r),
7245           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7246            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7247            Int32Regs:$r)>;
7248
7249 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7250            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7251            Int16Regs:$r, Int16Regs:$g),
7252           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7253            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7254            Int16Regs:$r, Int16Regs:$g)>;
7255
7256 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7257            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7258            Int16Regs:$r, Int16Regs:$g),
7259           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7260            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7261            Int16Regs:$r, Int16Regs:$g)>;
7262
7263 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7264            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7265            Int32Regs:$r, Int32Regs:$g),
7266           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7267            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7268            Int32Regs:$r, Int32Regs:$g)>;
7269
7270 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7271            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7272            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7273           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7274            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7275            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7276
7277 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7278            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7279            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7280           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7281            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7282            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7283
7284 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7285            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7286            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7287           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7288            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7289            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7290
7291 //-----------------------------------
7292 // Read Special Registers
7293 //-----------------------------------
7294
7295 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7296   : NVPTXInst<(outs Int64Regs:$d), (ins),
7297               !strconcat("mov.u64 \t$d, %", regname, ";"),
7298               [(set Int64Regs:$d, (intop))]>;
7299
7300 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7301   : NVPTXInst<(outs Int32Regs:$d), (ins),
7302               !strconcat("mov.u32 \t$d, %", regname, ";"),
7303               [(set Int32Regs:$d, (intop))]>;
7304
7305 // TODO Add read vector-version of special registers
7306
7307 def INT_PTX_SREG_TID_X :
7308     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7309 def INT_PTX_SREG_TID_Y :
7310     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7311 def INT_PTX_SREG_TID_Z :
7312     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7313 def INT_PTX_SREG_TID_W :
7314     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7315
7316 def INT_PTX_SREG_NTID_X :
7317     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7318 def INT_PTX_SREG_NTID_Y :
7319     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7320 def INT_PTX_SREG_NTID_Z :
7321     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7322 def INT_PTX_SREG_NTID_W :
7323     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7324
7325 def INT_PTX_SREG_LANEID :
7326     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7327 def INT_PTX_SREG_WARPID :
7328     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7329 def INT_PTX_SREG_NWARPID :
7330     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7331
7332 def INT_PTX_SREG_CTAID_X :
7333     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7334 def INT_PTX_SREG_CTAID_Y :
7335     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7336 def INT_PTX_SREG_CTAID_Z :
7337     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7338 def INT_PTX_SREG_CTAID_W :
7339     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7340
7341 def INT_PTX_SREG_NCTAID_X :
7342     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7343 def INT_PTX_SREG_NCTAID_Y :
7344     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7345 def INT_PTX_SREG_NCTAID_Z :
7346     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7347 def INT_PTX_SREG_NCTAID_W :
7348     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7349
7350 def INT_PTX_SREG_SMID :
7351     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7352 def INT_PTX_SREG_NSMID :
7353     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7354 def INT_PTX_SREG_GRIDID :
7355     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7356
7357 def INT_PTX_SREG_LANEMASK_EQ :
7358     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7359 def INT_PTX_SREG_LANEMASK_LE :
7360     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7361 def INT_PTX_SREG_LANEMASK_LT :
7362     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7363 def INT_PTX_SREG_LANEMASK_GE :
7364     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7365 def INT_PTX_SREG_LANEMASK_GT :
7366     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7367
7368 def INT_PTX_SREG_CLOCK :
7369     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7370 def INT_PTX_SREG_CLOCK64 :
7371     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7372
7373 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7374 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7375 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7376 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7377
7378 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7379 // handle the constant.
7380 def INT_PTX_SREG_WARPSIZE :
7381     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7382               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7383
7384 //
7385 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7386 //
7387
7388 class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
7389
7390 class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
7391                         string Space, string Type, NVPTXRegClass regclass,
7392                         DAGOperand SrcOp, bit WithStride>
7393   : EmptyNVPTXInst,
7394     Requires<[!if(!eq(Geometry, "m16n16k16"),
7395                   hasPTX60,
7396                   hasPTX61),
7397               hasSM70]> {
7398   // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
7399   // for this function.
7400   PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
7401                                        # Geometry # "_load_"
7402                                        # !subst("c", "c_" # Type, Abc)
7403                                        # "_" # Layout
7404                                        # !subst(".", "_", Space)
7405                                        # !if(WithStride,"_stride", "")
7406                                        # "_Intr");
7407   dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
7408   dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
7409   dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
7410
7411   dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
7412   dag Ins = !con((ins SrcOp:$src), StrideArg);
7413
7414   // Build a dag pattern that matches the intrinsic call.
7415   // We want a dag that looks like this:
7416   // (set <output args>, (intrinsic <input arguments>)) where input and
7417   // output arguments are named patterns that would match corresponding
7418   // input/output arguments of the instruction.
7419   //
7420   // First we construct (set <output arguments>) from instruction's outs dag by
7421   // replacing dag operator 'outs' with 'set'.
7422   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
7423   // Similarly, construct (intrinsic <input arguments>) sub-dag from
7424   // instruction's input arguments, only now we also need to replace operands
7425   // with patterns that would match them and the operator 'ins' with the
7426   // intrinsic.
7427   dag PatArgs = !foreach(tmp, Ins,
7428                               !subst(imem, ADDRvar,
7429                               !subst(MEMri64, ADDRri64,
7430                               !subst(MEMri, ADDRri,
7431                               !subst(ins, IntrMatcher, tmp)))));
7432   // Finally, consatenate both parts together. !con() requires both dags to have
7433   // the same operator, so we wrap PatArgs in a (set ...) dag.
7434   let Pattern = [!con(PatOuts, (set PatArgs))];
7435   let OutOperandList = Outs;
7436   let InOperandList = Ins;
7437   let AsmString = "wmma.load."
7438                   # Abc
7439                   # ".sync"
7440                   # "." # Layout
7441                   # "." # Geometry
7442                   # Space
7443                   # "." # Type # " \t"
7444                   # !if(!eq(Abc#Type, "cf16"),
7445                         "{{$r0, $r1, $r2, $r3}}",
7446                         "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
7447                   # ", [$src]"
7448                   # !if(WithStride, ", $ldm", "")
7449                   # ";";
7450 }
7451
7452 class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
7453                             string Space, string Type, bit WithStride>
7454                            : PatFrag <(ops),(ops)> {
7455   // Intrinsic that matches this instruction.
7456   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
7457                                     # "_" # Geometry # "_load_"
7458                                     # Abc # "_" # Type # "_" # Layout
7459                                     # !if(WithStride,"_stride", ""));
7460   code match_generic = [{
7461    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
7462   }];
7463   code match_shared = [{
7464    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
7465   }];
7466   code match_global = [{
7467    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
7468   }];
7469
7470   let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7471   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
7472   let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
7473                       !if(!eq(Space, ".global"), match_global, match_generic));
7474 }
7475
7476 multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
7477                             string Space, string Type, NVPTXRegClass regclass,
7478                             bit WithStride> {
7479   def _avar:  WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7480                                 imem, WithStride>;
7481   def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7482                                 Int32Regs, WithStride>;
7483   def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7484                                 Int64Regs, WithStride>;
7485   def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7486                                 MEMri, WithStride>;
7487   def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7488                                 MEMri64, WithStride>;
7489 }
7490
7491 multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
7492                              string Space, string Type, NVPTXRegClass regclass,
7493                              bit WithStride> {
7494   // Define a PatFrag that matches appropriate intrinsic that loads from the
7495   // given address space.
7496   def _Intr:  WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
7497                                     WithStride>;
7498   defm NAME:  WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
7499                                WithStride>;
7500 }
7501
7502 multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
7503                            string Space, string Type, NVPTXRegClass regclass> {
7504   defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
7505   defm NAME:    WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
7506 }
7507
7508 multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
7509                           string Type, NVPTXRegClass regclass> {
7510   defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
7511                                 Type, regclass>;
7512   defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
7513                                 Type, regclass>;
7514   defm NAME:    WMMA_LOAD_GALST<Geometry, Abc, Layout,        "",
7515                                 Type, regclass>;
7516 }
7517
7518 multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
7519                          string Type, NVPTXRegClass regclass> {
7520   defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
7521   defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
7522 }
7523
7524 multiclass WMMA_LOAD_G<string Geometry> {
7525   defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
7526   defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
7527   defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
7528   defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
7529 }
7530
7531 defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
7532 defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
7533 defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
7534
7535 //
7536 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7537 //
7538 class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
7539                           string Type, NVPTXRegClass regclass,
7540                           bit WithStride, DAGOperand DstOp>
7541   : EmptyNVPTXInst,
7542     Requires<[!if(!eq(Geometry, "m16n16k16"),
7543                   hasPTX60,
7544                   hasPTX61),
7545               hasSM70]> {
7546   PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
7547                                        # "_" # Geometry # "_store_d"
7548                                        # "_" # Type
7549                                        # "_" # Layout
7550                                        # !subst(".", "_", Space)
7551                                        # !if(WithStride,"_stride", "")
7552                                        # "_Intr");
7553   dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
7554                                 regclass:$r2, regclass:$r3);
7555   dag InsR47 = (ins regclass:$r4, regclass:$r5,
7556                     regclass:$r6, regclass:$r7);
7557   dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
7558   dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
7559   dag Ins = !con(InsR, StrideArg);
7560
7561   // Construct the pattern to match corresponding intrinsic call. See the
7562   // details in the comments in WMMA_LOAD_ALSTOS.
7563   dag PatArgs = !foreach(tmp, Ins,
7564                               !subst(imem, ADDRvar,
7565                               !subst(MEMri64, ADDRri64,
7566                               !subst(MEMri, ADDRri,
7567                               !subst(ins, IntrMatcher, tmp)))));
7568   let Pattern = [PatArgs];
7569   let OutOperandList = (outs);
7570   let InOperandList = Ins;
7571   let AsmString = "wmma.store.d.sync."
7572                   # Layout
7573                   # "." # Geometry
7574                   # Space
7575                   # "." # Type
7576                   # " \t[$src],"
7577                   # !if(!eq(Type,"f16"),
7578                         "{{$r0, $r1, $r2, $r3}}",
7579                         "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
7580                   # !if(WithStride, ", $ldm", "")
7581                   # ";";
7582
7583 }
7584
7585 class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
7586                              string Type, bit WithStride>
7587                             : PatFrag <(ops),(ops)> {
7588   // Intrinsic that matches this instruction.
7589   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
7590                                     # Geometry
7591                                     # "_store_d"
7592                                     # "_" # Type
7593                                     # "_" # Layout
7594                                     # !if(WithStride, "_stride", ""));
7595   code match_generic = [{
7596    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
7597   }];
7598   code match_shared = [{
7599    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
7600   }];
7601   code match_global = [{
7602    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
7603   }];
7604
7605   dag Args = !if(!eq(Type,"f16"),
7606                  (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
7607                  (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
7608                                  node:$r4, node:$r5, node:$r6, node:$r7));
7609   dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
7610   let Operands = !con(Args, StrideArg);
7611   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
7612   let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
7613                       !if(!eq(Space, ".global"), match_global, match_generic));
7614 }
7615
7616 multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
7617                               string Type, NVPTXRegClass regclass,
7618                               bit WithStride> {
7619   def _avar:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7620                                    WithStride, imem>;
7621   def _areg:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7622                                    WithStride, Int32Regs>;
7623   def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7624                                    WithStride, Int64Regs>;
7625   def _ari:    WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7626                                    WithStride, MEMri>;
7627   def _ari64:  WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7628                                    WithStride, MEMri64>;
7629 }
7630
7631 multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
7632                                string Type, NVPTXRegClass regclass,
7633                                bit WithStride> {
7634   // Define a PatFrag that matches appropriate intrinsic that loads from the
7635   // given address space.
7636   def _Intr:    WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
7637                                        WithStride>;
7638   defm NAME:    WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
7639                                    WithStride>;
7640 }
7641
7642 multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
7643                              string Type, NVPTXRegClass regclass > {
7644   defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
7645   defm NAME:    WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
7646 }
7647
7648 multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
7649                            string Type, NVPTXRegClass regclass> {
7650   defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
7651   defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
7652   defm NAME:    WMMA_STORE_D_GLST<Geometry, Layout,        "", Type, regclass>;
7653 }
7654
7655 multiclass WMMA_STORE_D_GT<string Geometry, string Type,
7656                            NVPTXRegClass regclass> {
7657   defm _row:    WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
7658   defm _col:    WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
7659 }
7660
7661 multiclass WMMA_STORE_D_G<string Geometry> {
7662   defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
7663   defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
7664 }
7665
7666 defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
7667 defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
7668 defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
7669
7670 // WMMA.MMA
7671 class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
7672                      string DType, NVPTXRegClass d_reg,
7673                      string CType, NVPTXRegClass c_reg,
7674                      NVPTXRegClass ab_reg,
7675                      string Satfinite = "">
7676   : EmptyNVPTXInst,
7677     Requires<[!if(!eq(Geometry, "m16n16k16"),
7678                   hasPTX60,
7679                   hasPTX61),
7680               hasSM70]> {
7681   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
7682                                     # Geometry
7683                                     # "_mma"
7684                                     # "_" # ALayout
7685                                     # "_" # BLayout
7686                                     # "_" # DType
7687                                     # "_" # CType
7688                                     # !subst(".", "_", Satfinite));
7689   dag Outs = !if(!eq(DType,"f16"),
7690                  (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
7691                  (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
7692                        d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
7693   dag InsExtraCArgs = !if(!eq(CType,"f16"),
7694                           (ins),
7695                           (ins c_reg:$c4,  c_reg:$c5,  c_reg:$c6,  c_reg:$c7));
7696   dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
7697                       ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
7698                       ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
7699                       ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
7700                       c_reg:$c0,  c_reg:$c1,  c_reg:$c2,  c_reg:$c3),
7701                   InsExtraCArgs);
7702
7703   // Construct the pattern to match corresponding intrinsic call. See the
7704   // details in the comments in WMMA_LOAD_ALSTOS.
7705   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
7706   dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
7707   let Pattern = [!con(PatOuts, (set PatArgs))];
7708   let OutOperandList = Outs;
7709   let InOperandList  = Ins;
7710   let AsmString = "wmma.mma.sync."
7711                   # ALayout
7712                   # "." # BLayout
7713                   # "." # Geometry
7714                   # "." # DType
7715                   # "." # CType
7716                   # Satfinite # "\n\t\t"
7717                   # !if(!eq(DType,"f16"),
7718                         "{{$d0, $d1, $d2, $d3}}, \n\t\t",
7719                         "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
7720                   # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
7721                   # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
7722                   # !if(!eq(CType,"f16"),
7723                         "{{$c0, $c1, $c2, $c3}};",
7724                         "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
7725 }
7726
7727 multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
7728                          string DType, NVPTXRegClass d_reg,
7729                          string CType, NVPTXRegClass c_reg> {
7730   def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
7731                                  DType, d_reg, CType, c_reg,
7732                                  Float16x2Regs, ".satfinite">;
7733   def NAME:       WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
7734                                  DType, d_reg, CType, c_reg,
7735                                  Float16x2Regs>;
7736 }
7737
7738 multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
7739                         string DType, NVPTXRegClass d_reg> {
7740   defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
7741                             "f16", Float16x2Regs>;
7742   defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
7743                             "f32", Float32Regs>;
7744 }
7745
7746 multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
7747   defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
7748   defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
7749 }
7750
7751 multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
7752   defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
7753   defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
7754 }
7755
7756 multiclass WMMA_MMA_G<string Geometry> {
7757   defm _col: WMMA_MMA_GA<Geometry, "col">;
7758   defm _row: WMMA_MMA_GA<Geometry, "row">;
7759 }
7760
7761 defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
7762 defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
7763 defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;