]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
contrib/tzdata: import tzdata 2020f
[FreeBSD/FreeBSD.git] / contrib / llvm-project / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 def immFloat0 : PatLeaf<(fpimm), [{
10     float f = (float)N->getValueAPF().convertToFloat();
11     return (f==0.0f);
12 }]>;
13
14 def immFloat1 : PatLeaf<(fpimm), [{
15     float f = (float)N->getValueAPF().convertToFloat();
16     return (f==1.0f);
17 }]>;
18
19 def immDouble0 : PatLeaf<(fpimm), [{
20     double d = (double)N->getValueAPF().convertToDouble();
21     return (d==0.0);
22 }]>;
23
24 def immDouble1 : PatLeaf<(fpimm), [{
25     double d = (double)N->getValueAPF().convertToDouble();
26     return (d==1.0);
27 }]>;
28
29 def AS_match {
30   code generic = [{
31    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32   }];
33   code shared = [{
34    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35   }];
36   code global = [{
37    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38   }];
39 }
40
41 // A node that will be replaced with the current PTX version.
42 class PTX {
43   SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44     return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45   }]>;
46   // (i32 0) will be XForm'ed to the currently used PTX version.
47   dag version = (PTXVerXform (i32 0));
48 }
49 def ptx : PTX;
50
51 // Generates list of n sequential register names.
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53 class RegSeq<int n, string prefix> {
54   list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55                                         [prefix # !add(n, -1)]),
56                             []);
57 }
58
59 class THREADMASK_INFO<bit sync> {
60   list<bit> ret = !if(sync, [0,1], [0]);
61 }
62
63 //-----------------------------------
64 // Synchronization and shuffle functions
65 //-----------------------------------
66 let isConvergent = 1 in {
67 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                   "bar.sync \t0;",
69       [(int_nvvm_barrier0)]>;
70 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                   "bar.sync \t$src1;",
72       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                   "bar.sync \t$src1, $src2;",
75       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77   !strconcat("{{ \n\t",
78              ".reg .pred \t%p1; \n\t",
79              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81              "}}"),
82       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84   !strconcat("{{ \n\t",
85              ".reg .pred \t%p1; \n\t",
86              ".reg .pred \t%p2; \n\t",
87              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90              "}}"),
91       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93   !strconcat("{{ \n\t",
94              ".reg .pred \t%p1; \n\t",
95              ".reg .pred \t%p2; \n\t",
96              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99              "}}"),
100       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                              [(int_nvvm_bar_sync imm:$i)]>;
104
105 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                              [(int_nvvm_bar_warp_sync imm:$i)]>,
107         Requires<[hasPTX60, hasSM30]>;
108 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110         Requires<[hasPTX60, hasSM30]>;
111
112 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                    [(int_nvvm_barrier_sync imm:$i)]>,
114         Requires<[hasPTX60, hasSM30]>;
115 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117         Requires<[hasPTX60, hasSM30]>;
118
119 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                  "barrier.sync \t$id, $cnt;",
121                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122         Requires<[hasPTX60, hasSM30]>;
123 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                  "barrier.sync \t$id, $cnt;",
125                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126         Requires<[hasPTX60, hasSM30]>;
127 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                  "barrier.sync \t$id, $cnt;",
129                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130         Requires<[hasPTX60, hasSM30]>;
131 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                  "barrier.sync \t$id, $cnt;",
133                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134         Requires<[hasPTX60, hasSM30]>;
135
136 class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                  bit offset_imm, bit mask_imm, bit threadmask_imm>
138       : NVPTXInst<(outs), (ins), "?", []> {
139   NVPTXRegClass rc = !cond(
140     !eq(reg, "i32"): Int32Regs,
141     !eq(reg, "f32"): Float32Regs);
142   string IntrName = "int_nvvm_shfl_"
143                     # !if(sync, "sync_", "")
144                     # mode
145                     # "_" # reg
146                     # !if(return_pred, "p", "");
147   Intrinsic Intr = !cast<Intrinsic>(IntrName);
148   let InOperandList = !con(
149     !if(sync,
150         !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151         (ins)),
152     (ins rc:$src),
153     !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154     !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155     );
156   let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157   let AsmString = "shfl."
158      # !if(sync, "sync.", "")
159      # mode # ".b32\t"
160      # "$dst"
161      # !if(return_pred, "|$pred", "") # ", "
162      # "$src, $offset, $mask"
163      # !if(sync, ", $threadmask", "")
164      # ";"
165      ;
166   let Pattern = [!con(
167       !foreach(tmp, OutOperandList,
168              !subst(outs, set,
169              !subst(i32imm, imm, tmp))),
170       (set !foreach(tmp, InOperandList,
171              !subst(ins, Intr,
172              !subst(i32imm, imm, tmp))))
173   )];
174 }
175
176 foreach sync = [0, 1] in {
177   foreach mode = ["up", "down", "bfly", "idx"] in {
178     foreach regclass = ["i32", "f32"] in {
179       foreach return_pred = [0, 1] in {
180         foreach offset_imm = [0, 1] in {
181           foreach mask_imm = [0, 1] in {
182             foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183               def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                                offset_imm, mask_imm, threadmask_imm>,
185                     Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186             }
187           }
188         }
189       }
190     }
191   }
192 }
193
194 // vote.{all,any,uni,ballot}
195 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197               "vote." # mode # " \t$dest, $pred;",
198               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199         Requires<[hasPTX60, hasSM30]>;
200 }
201
202 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207 // vote.sync.{all,any,uni,ballot}
208 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210               "vote.sync." # mode # " \t$dest, $pred, $mask;",
211               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212           Requires<[hasPTX60, hasSM30]>;
213   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214               "vote.sync." # mode #" \t$dest, $pred, $mask;",
215               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216           Requires<[hasPTX60, hasSM30]>;
217 }
218
219 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                           Operand ImmOp> {
226   def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228               [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229            Requires<[hasPTX60, hasSM70]>;
230   def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232               [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233            Requires<[hasPTX60, hasSM70]>;
234   def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236               [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237            Requires<[hasPTX60, hasSM70]>;
238   def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240               [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241            Requires<[hasPTX60, hasSM70]>;
242 }
243
244 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                         i32imm>;
246 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                         i64imm>;
248
249 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                           Operand ImmOp> {
251   def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                      (ins i32imm:$mask, ImmOp:$value),
253               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255            Requires<[hasPTX60, hasSM70]>;
256   def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                      (ins Int32Regs:$mask, ImmOp:$value),
258               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260            Requires<[hasPTX60, hasSM70]>;
261   def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                      (ins i32imm:$mask, regclass:$value),
263               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265            Requires<[hasPTX60, hasSM70]>;
266   def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                      (ins Int32Regs:$mask, regclass:$value),
268               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270            Requires<[hasPTX60, hasSM70]>;
271 }
272 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                          i32imm>;
274 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                          i64imm>;
276
277 } // isConvergent = 1
278
279 //-----------------------------------
280 // Explicit Memory Fence Functions
281 //-----------------------------------
282 class MEMBAR<string StrOp, Intrinsic IntOP> :
283               NVPTXInst<(outs), (ins),
284             StrOp, [(IntOP)]>;
285
286 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
287 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
288 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
289
290
291 //-----------------------------------
292 // Math Functions
293 //-----------------------------------
294
295 // Map min(1.0, max(0.0, x)) to sat(x)
296 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
297 // NaN
298 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
299 // Same story for fmax, fmin.
300
301 def : Pat<(int_nvvm_fmin_f immFloat1,
302             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
303           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
304 def : Pat<(int_nvvm_fmin_f immFloat1,
305             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
306           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
307 def : Pat<(int_nvvm_fmin_f
308             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
309           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
310 def : Pat<(int_nvvm_fmin_f
311             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
312           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
313
314 def : Pat<(int_nvvm_fmin_d immDouble1,
315             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
316           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
317 def : Pat<(int_nvvm_fmin_d immDouble1,
318             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
319           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
320 def : Pat<(int_nvvm_fmin_d
321             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
322           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
323 def : Pat<(int_nvvm_fmin_d
324             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
325           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
326
327
328 // We need a full string for OpcStr here because we need to deal with case like
329 // INT_PTX_RECIP.
330 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
331   NVPTXRegClass src_regclass, Intrinsic IntOP>
332             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
333             OpcStr,
334         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
335
336 // We need a full string for OpcStr here because we need to deal with the case
337 // like INT_PTX_NATIVE_POWR_F.
338 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
339   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
340             : NVPTXInst<(outs t_regclass:$dst),
341               (ins s0_regclass:$src0, s1_regclass:$src1),
342             OpcStr,
343         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
344
345 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
346   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
347   NVPTXRegClass s2_regclass, Intrinsic IntOP>
348             : NVPTXInst<(outs t_regclass:$dst),
349               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
350             OpcStr,
351         [(set t_regclass:$dst,
352           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
353
354 //
355 // MISC
356 //
357
358 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
359   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
360
361 //
362 // Min Max
363 //
364
365 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
366   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
367 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
368   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
369
370 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
371   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
372 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
373   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
374
375 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
376   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
377 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
378   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
379
380
381 //
382 // Multiplication
383 //
384
385 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
386   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
387 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
388   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
389
390 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
391   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
392 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
393   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
394
395 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
396   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
397 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
398   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
399 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
400   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
401 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
402   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
403 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
404   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
405 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
406   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
407 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
408   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
409 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
410   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
411
412 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
413   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
414 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
415   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
416 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
417   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
418 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
419   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
420
421 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
422   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
423 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
424   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
425
426 //
427 // Div
428 //
429
430 def INT_NVVM_DIV_APPROX_FTZ_F
431   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
432     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
433 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
434   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
435
436 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
437   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
438 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
439   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
440 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
441   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
442 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
443   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
444 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
445   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
446 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
447   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
448 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
449   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
450 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
451   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
452
453 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
454   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
455 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
456   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
457 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
458   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
459 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
460   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
461
462 //
463 // Sad
464 //
465
466 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
467   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
468 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
469   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
470
471 //
472 // Floor  Ceil
473 //
474
475 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
476           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
477 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
478           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
479 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
480           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
481
482 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
483           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
484 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
485           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
486 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
487           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
488
489 //
490 // Abs
491 //
492
493 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
494   Float32Regs, int_nvvm_fabs_ftz_f>;
495 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
496   Float32Regs, int_nvvm_fabs_f>;
497
498 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
499   Float64Regs, int_nvvm_fabs_d>;
500
501 //
502 // Round
503 //
504
505 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
506           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
507 def : Pat<(int_nvvm_round_f Float32Regs:$a),
508           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
509 def : Pat<(int_nvvm_round_d Float64Regs:$a),
510           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
511
512 //
513 // Trunc
514 //
515
516 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
517           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
518 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
519           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
520 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
521           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
522
523 //
524 // Saturate
525 //
526
527 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
528           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
529 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
530           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
532           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
533
534 //
535 // Exp2  Log2
536 //
537
538 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
539   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
540 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
541   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
542 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
543   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
544
545 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
546   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
547 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
548   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
549 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
550   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
551
552 //
553 // Sin  Cos
554 //
555
556 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
557   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
558 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
559   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
560
561 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
562   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
563 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
564   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
565
566 //
567 // Fma
568 //
569
570 def INT_NVVM_FMA_RN_FTZ_F
571   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
572     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
573 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
574   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
575 def INT_NVVM_FMA_RZ_FTZ_F
576   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
577     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
578 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
579   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
580 def INT_NVVM_FMA_RM_FTZ_F
581   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
582     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
583 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
584   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
585 def INT_NVVM_FMA_RP_FTZ_F
586   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
587     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
588 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
589   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
590
591 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
592   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
593 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
594   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
595 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
596   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
597 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
598   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
599
600 //
601 // Rcp
602 //
603
604 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
605   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
606 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
607   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
608 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
609   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
610 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
611   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
612 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
613   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
614 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
615   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
616 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
617   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
618 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
619   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
620
621 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
622   Float64Regs, int_nvvm_rcp_rn_d>;
623 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
624   Float64Regs, int_nvvm_rcp_rz_d>;
625 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
626   Float64Regs, int_nvvm_rcp_rm_d>;
627 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
628   Float64Regs, int_nvvm_rcp_rp_d>;
629
630 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
631   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
632
633 //
634 // Sqrt
635 //
636
637 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
638   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
639 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
640   Float32Regs, int_nvvm_sqrt_rn_f>;
641 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
642   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
643 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
644   Float32Regs, int_nvvm_sqrt_rz_f>;
645 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
646   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
647 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
648   Float32Regs, int_nvvm_sqrt_rm_f>;
649 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
650   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
651 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
652   Float32Regs, int_nvvm_sqrt_rp_f>;
653 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
654   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
655 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
656   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
657
658 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
659   Float64Regs, int_nvvm_sqrt_rn_d>;
660 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
661   Float64Regs, int_nvvm_sqrt_rz_d>;
662 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
663   Float64Regs, int_nvvm_sqrt_rm_d>;
664 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
665   Float64Regs, int_nvvm_sqrt_rp_d>;
666
667 // nvvm_sqrt intrinsic
668 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
669           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
670 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
671           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
672 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
673           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
674 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
675           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
676
677 //
678 // Rsqrt
679 //
680
681 def INT_NVVM_RSQRT_APPROX_FTZ_F
682   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
683     int_nvvm_rsqrt_approx_ftz_f>;
684 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
685   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
686 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
687   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
688
689 //
690 // Add
691 //
692
693 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
694   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
695 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
696   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
697 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
698   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
699 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
700   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
701 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
702   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
703 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
704   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
705 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
706   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
707 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
708   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
709
710 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
711   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
712 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
713   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
714 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
715   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
716 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
717   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
718
719 //
720 // Convert
721 //
722
723 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
724           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
725 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
726           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
727 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
728           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
729 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
730           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
731 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
732           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
733 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
734           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
735 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
736           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
737 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
738           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
739
740 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
741           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
742 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
743           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
744 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
745           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
746 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
747           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
748
749 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
750           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
751 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
752           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
753 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
754           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
755 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
756           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
757
758 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
759           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
760 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
761           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
762 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
763           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
764 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
765           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
766
767 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
768           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
769 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
770           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
771 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
772           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
773 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
774           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
775
776 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
777           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
778 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
779           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
780 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
781           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
782 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
783           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
784 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
785           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
786 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
787           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
788 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
789           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
790 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
791           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
792
793 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
794           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
795 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
796           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
797 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
798           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
799 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
800           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
801 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
802           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
803 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
804           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
805 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
806           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
807 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
808           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
809
810 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
811           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
812 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
813           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
814 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
815           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
816 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
817           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
818
819 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
820           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
821 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
822           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
823 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
824           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
825 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
826           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
827
828 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
829   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
830
831 def INT_NVVM_D2I_LO : F_MATH_1<
832   !strconcat("{{\n\t",
833              ".reg .b32 %temp; \n\t",
834              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
835              "}}"),
836   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
837 def INT_NVVM_D2I_HI : F_MATH_1<
838   !strconcat("{{\n\t",
839              ".reg .b32 %temp; \n\t",
840              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
841              "}}"),
842   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
843
844 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
845           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
846 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
847           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
848 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
849           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
850 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
851           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
852 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
853           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
854 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
855           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
856 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
857           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
858 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
859           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
860
861 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
862           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
863 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
864           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
865 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
866           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
867 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
868           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
869 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
870           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
871 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
872           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
873 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
874           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
875 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
876           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
877
878 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
879           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
880 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
881           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
882 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
883           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
884 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
885           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
886
887 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
888           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
889 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
890           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
891 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
892           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
893 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
894           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
895
896 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
897           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
898 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
899           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
900 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
901           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
902 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
903           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
904
905 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
906           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
907 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
908           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
909 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
910           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
911 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
912           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
913
914 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
915           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
916 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
917           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
918 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
919           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
920 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
921           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
922
923 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
924           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
925 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
926           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
927 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
928           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
929 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
930           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
931
932
933 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
934           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
935 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
936           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
937
938 //
939 // Bitcast
940 //
941
942 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
943   Float32Regs, int_nvvm_bitcast_f2i>;
944 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
945   Int32Regs, int_nvvm_bitcast_i2f>;
946
947 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
948   Int64Regs, int_nvvm_bitcast_ll2d>;
949 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
950   Float64Regs, int_nvvm_bitcast_d2ll>;
951
952 //
953 // FNS
954 //
955
956 class INT_FNS_MBO<dag ins, dag Operands>
957   : NVPTXInst<(outs Int32Regs:$dst), ins,
958                "fns.b32 \t$dst, $mask, $base, $offset;",
959                [(set Int32Regs:$dst, Operands )]>,
960     Requires<[hasPTX60, hasSM30]>;
961
962 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
963                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
964 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
965                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
966 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
967                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
968 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
969                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
970 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
971                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
972 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
973                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
974 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
975                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
976 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
977                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
978
979 //-----------------------------------
980 // Atomic Functions
981 //-----------------------------------
982
983 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
984  : PatFrag<ops, frag, AS_match.global>;
985 class ATOMIC_SHARED_CHK <dag ops, dag frag>
986  : PatFrag<ops, frag, AS_match.shared>;
987 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
988  : PatFrag<ops, frag, AS_match.generic>;
989
990 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
991   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
992   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
993   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
994     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
995     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
996   Requires<Pred>;
997   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
998     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
999     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1000   Requires<Pred>;
1001 }
1002 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1003   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1004   list<Predicate> Pred = []> {
1005   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1006     IntOp, IMMType, IMM, Pred>;
1007   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1008     IntOp, IMMType, IMM, Pred>;
1009 }
1010
1011 // has 2 operands, neg the second one
1012 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1013   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1014   Operand IMMType, list<Predicate> Pred> {
1015   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1016     !strconcat(
1017       "{{ \n\t",
1018       ".reg \t.s", TypeStr, " temp; \n\t",
1019       "neg.s", TypeStr, " \ttemp, $b; \n\t",
1020       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1021       "}}"),
1022     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1023   Requires<Pred>;
1024 }
1025 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1026   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1027   list<Predicate> Pred = []> {
1028  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1029    IntOp, IMMType, Pred> ;
1030  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1031    IntOp, IMMType, Pred> ;
1032 }
1033
1034 // has 3 operands
1035 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1036   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1037   Operand IMMType, list<Predicate> Pred> {
1038   def reg : NVPTXInst<(outs regclass:$dst),
1039     (ins ptrclass:$addr, regclass:$b, regclass:$c),
1040     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1041     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1042   Requires<Pred>;
1043
1044   def imm1 : NVPTXInst<(outs regclass:$dst),
1045     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1046     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1047     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1048   Requires<Pred>;
1049
1050   def imm2 : NVPTXInst<(outs regclass:$dst),
1051     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1052     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1053     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1054   Requires<Pred>;
1055
1056   def imm3 : NVPTXInst<(outs regclass:$dst),
1057     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1058     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1059     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1060   Requires<Pred>;
1061 }
1062 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1063   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1064   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1065     IntOp, IMMType, Pred>;
1066   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1067     IntOp, IMMType, Pred>;
1068 }
1069
1070 // atom_add
1071
1072 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1073   (atomic_load_add_32 node:$a, node:$b)>;
1074 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1075   (atomic_load_add_32 node:$a, node:$b)>;
1076 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1077   (atomic_load_add_32 node:$a, node:$b)>;
1078 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1079   (atomic_load_add_64 node:$a, node:$b)>;
1080 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1081   (atomic_load_add_64 node:$a, node:$b)>;
1082 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1083   (atomic_load_add_64 node:$a, node:$b)>;
1084 def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1085   (atomic_load_fadd node:$a, node:$b)>;
1086 def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1087   (atomic_load_fadd node:$a, node:$b)>;
1088 def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1089   (atomic_load_fadd node:$a, node:$b)>;
1090
1091 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1092   atomic_load_add_32_g, i32imm, imm>;
1093 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1094   atomic_load_add_32_s, i32imm, imm>;
1095 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1096   atomic_load_add_32_gen, i32imm, imm>;
1097 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1098   ".add", atomic_load_add_32_gen, i32imm, imm>;
1099
1100 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1101   atomic_load_add_64_g, i64imm, imm>;
1102 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1103   atomic_load_add_64_s, i64imm, imm>;
1104 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1105   atomic_load_add_64_gen, i64imm, imm>;
1106 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1107   ".add", atomic_load_add_64_gen, i64imm, imm>;
1108
1109 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1110   atomic_load_add_g, f32imm, fpimm>;
1111 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1112   atomic_load_add_s, f32imm, fpimm>;
1113 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1114   atomic_load_add_gen, f32imm, fpimm>;
1115
1116 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1117   atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1118 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1119   atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1120 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1121   atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1122
1123 // atom_sub
1124
1125 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126   (atomic_load_sub_32 node:$a, node:$b)>;
1127 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128   (atomic_load_sub_32 node:$a, node:$b)>;
1129 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130   (atomic_load_sub_32 node:$a, node:$b)>;
1131 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132   (atomic_load_sub_64 node:$a, node:$b)>;
1133 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134   (atomic_load_sub_64 node:$a, node:$b)>;
1135 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136   (atomic_load_sub_64 node:$a, node:$b)>;
1137
1138 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1139   atomic_load_sub_32_g, i32imm>;
1140 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1141   atomic_load_sub_64_g, i64imm>;
1142 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1143   atomic_load_sub_32_gen, i32imm>;
1144 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1145   ".add", atomic_load_sub_32_gen, i32imm>;
1146 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1147   atomic_load_sub_32_s, i32imm>;
1148 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1149   atomic_load_sub_64_s, i64imm>;
1150 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1151   atomic_load_sub_64_gen, i64imm>;
1152 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1153   ".add", atomic_load_sub_64_gen, i64imm>;
1154
1155 // atom_swap
1156
1157 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1158   (atomic_swap_32 node:$a, node:$b)>;
1159 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1160   (atomic_swap_32 node:$a, node:$b)>;
1161 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1162   (atomic_swap_32 node:$a, node:$b)>;
1163 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1164   (atomic_swap_64 node:$a, node:$b)>;
1165 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1166   (atomic_swap_64 node:$a, node:$b)>;
1167 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1168   (atomic_swap_64 node:$a, node:$b)>;
1169
1170 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1171   atomic_swap_32_g, i32imm, imm>;
1172 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1173   atomic_swap_32_s, i32imm, imm>;
1174 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1175   atomic_swap_32_gen, i32imm, imm>;
1176 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1177   ".exch", atomic_swap_32_gen, i32imm, imm>;
1178 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1179   atomic_swap_64_g, i64imm, imm>;
1180 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1181   atomic_swap_64_s, i64imm, imm>;
1182 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1183   atomic_swap_64_gen, i64imm, imm>;
1184 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1185   ".exch", atomic_swap_64_gen, i64imm, imm>;
1186
1187 // atom_max
1188
1189 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1190   , (atomic_load_max_32 node:$a, node:$b)>;
1191 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1192   (atomic_load_max_32 node:$a, node:$b)>;
1193 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1194   (atomic_load_max_32 node:$a, node:$b)>;
1195 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1196   , (atomic_load_max_64 node:$a, node:$b)>;
1197 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1198   (atomic_load_max_64 node:$a, node:$b)>;
1199 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1200   (atomic_load_max_64 node:$a, node:$b)>;
1201 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1202   (atomic_load_umax_32 node:$a, node:$b)>;
1203 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1204   (atomic_load_umax_32 node:$a, node:$b)>;
1205 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1206   (atomic_load_umax_32 node:$a, node:$b)>;
1207 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1208   (atomic_load_umax_64 node:$a, node:$b)>;
1209 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1210   (atomic_load_umax_64 node:$a, node:$b)>;
1211 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1212   (atomic_load_umax_64 node:$a, node:$b)>;
1213
1214 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1215   ".max", atomic_load_max_32_g, i32imm, imm>;
1216 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1217   ".max", atomic_load_max_32_s, i32imm, imm>;
1218 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1219   atomic_load_max_32_gen, i32imm, imm>;
1220 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1221   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1222 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1223   ".max", atomic_load_max_64_g, i64imm, imm>;
1224 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1225   ".max", atomic_load_max_64_s, i64imm, imm>;
1226 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1227   atomic_load_max_64_gen, i64imm, imm>;
1228 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1229   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1230 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1231   ".max", atomic_load_umax_32_g, i32imm, imm>;
1232 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1233   ".max", atomic_load_umax_32_s, i32imm, imm>;
1234 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1235   atomic_load_umax_32_gen, i32imm, imm>;
1236 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1237   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1238 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1239   ".max", atomic_load_umax_64_g, i64imm, imm>;
1240 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1241   ".max", atomic_load_umax_64_s, i64imm, imm>;
1242 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1243   atomic_load_umax_64_gen, i64imm, imm>;
1244 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1245   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1246
1247 // atom_min
1248
1249 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1250   (atomic_load_min_32 node:$a, node:$b)>;
1251 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1252   (atomic_load_min_32 node:$a, node:$b)>;
1253 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1254   (atomic_load_min_32 node:$a, node:$b)>;
1255 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1256   (atomic_load_min_64 node:$a, node:$b)>;
1257 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1258   (atomic_load_min_64 node:$a, node:$b)>;
1259 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1260   (atomic_load_min_64 node:$a, node:$b)>;
1261 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1262   (atomic_load_umin_32 node:$a, node:$b)>;
1263 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1264   (atomic_load_umin_32 node:$a, node:$b)>;
1265 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1266   (atomic_load_umin_32 node:$a, node:$b)>;
1267 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1268   (atomic_load_umin_64 node:$a, node:$b)>;
1269 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1270   (atomic_load_umin_64 node:$a, node:$b)>;
1271 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1272   (atomic_load_umin_64 node:$a, node:$b)>;
1273
1274 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1275   ".min", atomic_load_min_32_g, i32imm, imm>;
1276 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1277   ".min", atomic_load_min_32_s, i32imm, imm>;
1278 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1279   atomic_load_min_32_gen, i32imm, imm>;
1280 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1281   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1282 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1283   ".min", atomic_load_min_64_g, i64imm, imm>;
1284 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1285   ".min", atomic_load_min_64_s, i64imm, imm>;
1286 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1287   atomic_load_min_64_gen, i64imm, imm>;
1288 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1289   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1290 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1291   ".min", atomic_load_umin_32_g, i32imm, imm>;
1292 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1293   ".min", atomic_load_umin_32_s, i32imm, imm>;
1294 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1295   atomic_load_umin_32_gen, i32imm, imm>;
1296 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1297   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1298 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1299   ".min", atomic_load_umin_64_g, i64imm, imm>;
1300 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1301   ".min", atomic_load_umin_64_s, i64imm, imm>;
1302 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1303   atomic_load_umin_64_gen, i64imm, imm>;
1304 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1305   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1306
1307 // atom_inc  atom_dec
1308
1309 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1310   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1311 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1312   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1313 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1314   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1315 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1316   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1317 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1318   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1319 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1320   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1321
1322 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1323   atomic_load_inc_32_g, i32imm, imm>;
1324 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1325   atomic_load_inc_32_s, i32imm, imm>;
1326 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1327   atomic_load_inc_32_gen, i32imm, imm>;
1328 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1329   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1330 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1331   atomic_load_dec_32_g, i32imm, imm>;
1332 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1333   atomic_load_dec_32_s, i32imm, imm>;
1334 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1335   atomic_load_dec_32_gen, i32imm, imm>;
1336 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1337   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1338
1339 // atom_and
1340
1341 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1342   (atomic_load_and_32 node:$a, node:$b)>;
1343 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1344   (atomic_load_and_32 node:$a, node:$b)>;
1345 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1346   (atomic_load_and_32 node:$a, node:$b)>;
1347 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1348   (atomic_load_and_64 node:$a, node:$b)>;
1349 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1350   (atomic_load_and_64 node:$a, node:$b)>;
1351 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1352   (atomic_load_and_64 node:$a, node:$b)>;
1353
1354 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1355   atomic_load_and_32_g, i32imm, imm>;
1356 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1357   atomic_load_and_32_s, i32imm, imm>;
1358 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1359   atomic_load_and_32_gen, i32imm, imm>;
1360 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1361   ".and", atomic_load_and_32_gen, i32imm, imm>;
1362 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1363   atomic_load_and_64_g, i64imm, imm>;
1364 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1365   atomic_load_and_64_s, i64imm, imm>;
1366 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1367   atomic_load_and_64_gen, i64imm, imm>;
1368 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1369   ".and", atomic_load_and_64_gen, i64imm, imm>;
1370
1371 // atom_or
1372
1373 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1374   (atomic_load_or_32 node:$a, node:$b)>;
1375 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1376   (atomic_load_or_32 node:$a, node:$b)>;
1377 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1378   (atomic_load_or_32 node:$a, node:$b)>;
1379 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1380   (atomic_load_or_64 node:$a, node:$b)>;
1381 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1382   (atomic_load_or_64 node:$a, node:$b)>;
1383 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1384   (atomic_load_or_64 node:$a, node:$b)>;
1385
1386 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1387   atomic_load_or_32_g, i32imm, imm>;
1388 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1389   atomic_load_or_32_gen, i32imm, imm>;
1390 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1391   ".or", atomic_load_or_32_gen, i32imm, imm>;
1392 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1393   atomic_load_or_32_s, i32imm, imm>;
1394 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1395   atomic_load_or_64_g, i64imm, imm>;
1396 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1397   atomic_load_or_64_gen, i64imm, imm>;
1398 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1399   ".or", atomic_load_or_64_gen, i64imm, imm>;
1400 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1401   atomic_load_or_64_s, i64imm, imm>;
1402
1403 // atom_xor
1404
1405 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1406   (atomic_load_xor_32 node:$a, node:$b)>;
1407 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1408   (atomic_load_xor_32 node:$a, node:$b)>;
1409 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1410   (atomic_load_xor_32 node:$a, node:$b)>;
1411 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1412   (atomic_load_xor_64 node:$a, node:$b)>;
1413 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1414   (atomic_load_xor_64 node:$a, node:$b)>;
1415 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1416   (atomic_load_xor_64 node:$a, node:$b)>;
1417
1418 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1419   atomic_load_xor_32_g, i32imm, imm>;
1420 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1421   atomic_load_xor_32_s, i32imm, imm>;
1422 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1423   atomic_load_xor_32_gen, i32imm, imm>;
1424 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1425   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1426 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1427   atomic_load_xor_64_g, i64imm, imm>;
1428 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1429   atomic_load_xor_64_s, i64imm, imm>;
1430 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1431   atomic_load_xor_64_gen, i64imm, imm>;
1432 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1433   ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1434
1435 // atom_cas
1436
1437 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1438   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1439 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1440   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1441 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1442   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1443 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1444   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1445 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1446   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1447 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1448   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1449
1450 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1451   atomic_cmp_swap_32_g, i32imm>;
1452 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1453   atomic_cmp_swap_32_s, i32imm>;
1454 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1455   atomic_cmp_swap_32_gen, i32imm>;
1456 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1457   ".cas", atomic_cmp_swap_32_gen, i32imm>;
1458 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1459   atomic_cmp_swap_64_g, i64imm>;
1460 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1461   atomic_cmp_swap_64_s, i64imm>;
1462 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1463   atomic_cmp_swap_64_gen, i64imm>;
1464 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1465   ".cas", atomic_cmp_swap_64_gen, i64imm>;
1466
1467 // Support for scoped atomic operations.  Matches
1468 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1469 // and converts it into the appropriate instruction.
1470 // NOTE: not all possible combinations are implemented
1471 //  'space' is limited to generic as it's the only one needed to support CUDA.
1472 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1473 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1474                   dag ins, dag Operands>
1475       : NVPTXInst<(outs regclass:$result), ins,
1476                   AsmStr,
1477                   [(set regclass:$result, Operands)]>,
1478         Requires<Preds>;
1479
1480 // Define instruction variants for all addressing modes.
1481 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1482                        NVPTXRegClass regclass, Operand ImmType,
1483                        SDNode Imm, ValueType ImmTy,
1484                        list<Predicate> Preds> {
1485   let AddedComplexity = 1 in {
1486     def : ATOM23_impl<AsmStr, regclass, Preds,
1487                       (ins Int32Regs:$src, regclass:$b),
1488                       (Intr Int32Regs:$src, regclass:$b)>;
1489     def : ATOM23_impl<AsmStr, regclass, Preds,
1490                       (ins Int64Regs:$src, regclass:$b),
1491                       (Intr Int64Regs:$src, regclass:$b)>;
1492   }
1493   // tablegen can't infer argument types from Intrinsic (though it can
1494   // from Instruction) so we have to enforce specific type on
1495   // immediates via explicit cast to ImmTy.
1496   def : ATOM23_impl<AsmStr, regclass, Preds,
1497                     (ins Int32Regs:$src, ImmType:$b),
1498                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1499   def : ATOM23_impl<AsmStr, regclass, Preds,
1500                     (ins Int64Regs:$src, ImmType:$b),
1501                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1502 }
1503
1504 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1505                        NVPTXRegClass regclass, Operand ImmType,
1506                        SDNode Imm, ValueType ImmTy,
1507                        list<Predicate> Preds> {
1508   // Variants for register/immediate permutations of $b and $c
1509   let AddedComplexity = 2 in {
1510     def : ATOM23_impl<AsmStr, regclass, Preds,
1511                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
1512                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1513     def : ATOM23_impl<AsmStr, regclass, Preds,
1514                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
1515                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1516   }
1517   let AddedComplexity = 1 in {
1518     def : ATOM23_impl<AsmStr, regclass, Preds,
1519                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1520                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1521     def : ATOM23_impl<AsmStr, regclass, Preds,
1522                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1523                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1524     def : ATOM23_impl<AsmStr, regclass, Preds,
1525                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1526                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1527     def : ATOM23_impl<AsmStr, regclass, Preds,
1528                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1529                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1530   }
1531   def : ATOM23_impl<AsmStr, regclass, Preds,
1532                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1533                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1534   def : ATOM23_impl<AsmStr, regclass, Preds,
1535                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1536                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1537 }
1538
1539 // Constructs instrinsic name and instruction asm strings.
1540 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1541                        string ScopeStr, string SpaceStr,
1542                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1543                        ValueType ImmTy, list<Predicate> Preds> {
1544   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1545                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1546                             # "." # OpStr # "." # TypeStr
1547                             # " \t$result, [$src], $b;",
1548                      !cast<Intrinsic>(
1549                             "int_nvvm_atomic_" # OpStr
1550                             # "_" # SpaceStr # "_" # IntTypeStr
1551                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1552                      regclass, ImmType, Imm, ImmTy, Preds>;
1553 }
1554 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1555                        string ScopeStr, string SpaceStr,
1556                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1557                        ValueType ImmTy, list<Predicate> Preds> {
1558   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1559                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1560                             # "." # OpStr # "." # TypeStr
1561                             # " \t$result, [$src], $b, $c;",
1562                      !cast<Intrinsic>(
1563                             "int_nvvm_atomic_" # OpStr
1564                             # "_" # SpaceStr # "_" # IntTypeStr
1565                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1566                      regclass, ImmType, Imm, ImmTy, Preds>;
1567 }
1568
1569 // Constructs variants for different address spaces.
1570 // For now we only need variants for generic space pointers.
1571 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1572                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1573                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1574    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1575                             regclass, ImmType, Imm, ImmTy, Preds>;
1576 }
1577 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1578                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1579                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1580    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1581                             regclass, ImmType, Imm, ImmTy, Preds>;
1582 }
1583
1584 // Constructs variants for different scopes of atomic op.
1585 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1586                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1587                        ValueType ImmTy, list<Predicate> Preds> {
1588    // .gpu scope is default and is currently covered by existing
1589    // atomics w/o explicitly specified scope.
1590    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1591                            regclass, ImmType, Imm, ImmTy,
1592                            !listconcat(Preds,[hasAtomScope])>;
1593    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1594                            regclass, ImmType, Imm, ImmTy,
1595                            !listconcat(Preds,[hasAtomScope])>;
1596 }
1597 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1598            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1599            list<Predicate> Preds> {
1600    // No need to define ".gpu"-scoped atomics.  They do the same thing
1601    // as the regular, non-scoped atomics defined elsewhere.
1602    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1603                            regclass, ImmType, Imm, ImmTy,
1604                            !listconcat(Preds,[hasAtomScope])>;
1605    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1606                            regclass, ImmType, Imm, ImmTy,
1607                            !listconcat(Preds,[hasAtomScope])>;
1608 }
1609
1610 // atom.add
1611 multiclass ATOM2_add_impl<string OpStr> {
1612    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1613    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1614    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1615    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1616                             []>;
1617    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1618                             [hasAtomAddF64]>;
1619 }
1620
1621 // atom.{and,or,xor}
1622 multiclass ATOM2_bitwise_impl<string OpStr> {
1623    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1624    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1625                             [hasAtomBitwise64]>;
1626 }
1627
1628 // atom.exch
1629 multiclass ATOM2_exch_impl<string OpStr> {
1630    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1631    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1632 }
1633
1634 // atom.{min,max}
1635 multiclass ATOM2_minmax_impl<string OpStr> {
1636    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1637    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1638    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1639                             [hasAtomMinMax64]>;
1640    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1641                             [hasAtomMinMax64]>;
1642 }
1643
1644 // atom.{inc,dec}
1645 multiclass ATOM2_incdec_impl<string OpStr> {
1646    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1647 }
1648
1649 // atom.cas
1650 multiclass ATOM3_cas_impl<string OpStr> {
1651    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1652    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1653 }
1654
1655 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1656 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1657 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1658 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1659 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1660 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1661 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1662 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1663 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1664 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1665
1666 //-----------------------------------
1667 // Support for ldu on sm_20 or later
1668 //-----------------------------------
1669
1670 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1671 // read-only in a kernel.
1672
1673 // Scalar
1674
1675 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1676   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1677                !strconcat("ldu.global.", TyStr),
1678                       []>, Requires<[hasLDU]>;
1679   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1680                !strconcat("ldu.global.", TyStr),
1681                         []>, Requires<[hasLDU]>;
1682  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1683                !strconcat("ldu.global.", TyStr),
1684                       []>, Requires<[hasLDU]>;
1685  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1686                !strconcat("ldu.global.", TyStr),
1687                       []>, Requires<[hasLDU]>;
1688  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1689                !strconcat("ldu.global.", TyStr),
1690                         []>, Requires<[hasLDU]>;
1691 }
1692
1693 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1694 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1695 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1696 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1697 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1698 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1699 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1700 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1701 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1702 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1703
1704 // vector
1705
1706 // Elementized vector ldu
1707 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1708  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1709                      (ins Int32Regs:$src),
1710                      !strconcat("ldu.global.", TyStr), []>;
1711  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1712                      (ins Int64Regs:$src),
1713                      !strconcat("ldu.global.", TyStr), []>;
1714  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1715                      (ins MEMri:$src),
1716                      !strconcat("ldu.global.", TyStr), []>;
1717  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1718                      (ins MEMri64:$src),
1719                      !strconcat("ldu.global.", TyStr), []>;
1720  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1721                      (ins imemAny:$src),
1722                      !strconcat("ldu.global.", TyStr), []>;
1723 }
1724
1725 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1726  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1727                             regclass:$dst4), (ins Int32Regs:$src), 
1728                !strconcat("ldu.global.", TyStr), []>;
1729  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1730                             regclass:$dst4), (ins Int64Regs:$src), 
1731                !strconcat("ldu.global.", TyStr), []>;
1732  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1733                             regclass:$dst4), (ins MEMri:$src), 
1734                !strconcat("ldu.global.", TyStr), []>;
1735  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1736                             regclass:$dst4), (ins MEMri64:$src), 
1737                !strconcat("ldu.global.", TyStr), []>;
1738  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1739                             regclass:$dst4), (ins imemAny:$src), 
1740                !strconcat("ldu.global.", TyStr), []>;
1741 }
1742
1743 defm INT_PTX_LDU_G_v2i8_ELE
1744   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1745 defm INT_PTX_LDU_G_v2i16_ELE
1746   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1747 defm INT_PTX_LDU_G_v2i32_ELE
1748   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1749 defm INT_PTX_LDU_G_v2f16_ELE
1750   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1751 defm INT_PTX_LDU_G_v2f16x2_ELE
1752   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1753 defm INT_PTX_LDU_G_v2f32_ELE
1754   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1755 defm INT_PTX_LDU_G_v2i64_ELE
1756   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1757 defm INT_PTX_LDU_G_v2f64_ELE
1758   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1759 defm INT_PTX_LDU_G_v4i8_ELE
1760   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1761 defm INT_PTX_LDU_G_v4i16_ELE
1762   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1763     Int16Regs>;
1764 defm INT_PTX_LDU_G_v4i32_ELE
1765   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1766     Int32Regs>;
1767 defm INT_PTX_LDU_G_v4f16_ELE
1768   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1769     Float16Regs>;
1770 defm INT_PTX_LDU_G_v4f16x2_ELE
1771   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1772     Float16x2Regs>;
1773 defm INT_PTX_LDU_G_v4f32_ELE
1774   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1775     Float32Regs>;
1776
1777
1778 //-----------------------------------
1779 // Support for ldg on sm_35 or later 
1780 //-----------------------------------
1781
1782 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1783 // non-coherent texture cache, and therefore the values read must be read-only
1784 // during the lifetime of the kernel.
1785
1786 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1787   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1788                !strconcat("ld.global.nc.", TyStr),
1789                       []>, Requires<[hasLDG]>;
1790   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1791                !strconcat("ld.global.nc.", TyStr),
1792                         []>, Requires<[hasLDG]>;
1793  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1794                !strconcat("ld.global.nc.", TyStr),
1795                       []>, Requires<[hasLDG]>;
1796  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1797                !strconcat("ld.global.nc.", TyStr),
1798                       []>, Requires<[hasLDG]>;
1799  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1800                !strconcat("ld.global.nc.", TyStr),
1801                         []>, Requires<[hasLDG]>;
1802 }
1803
1804 defm INT_PTX_LDG_GLOBAL_i8
1805   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1806 defm INT_PTX_LDG_GLOBAL_i16
1807   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1808 defm INT_PTX_LDG_GLOBAL_i32
1809   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1810 defm INT_PTX_LDG_GLOBAL_i64
1811   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1812 defm INT_PTX_LDG_GLOBAL_f16
1813   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1814 defm INT_PTX_LDG_GLOBAL_f16x2
1815   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1816 defm INT_PTX_LDG_GLOBAL_f32
1817   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1818 defm INT_PTX_LDG_GLOBAL_f64
1819   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1820 defm INT_PTX_LDG_GLOBAL_p32
1821   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1822 defm INT_PTX_LDG_GLOBAL_p64
1823   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1824
1825 // vector
1826
1827 // Elementized vector ldg 
1828 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1829  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1830                      (ins Int32Regs:$src),
1831                      !strconcat("ld.global.nc.", TyStr), []>;
1832  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1833                      (ins Int64Regs:$src),
1834                      !strconcat("ld.global.nc.", TyStr), []>;
1835  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1836                      (ins MEMri:$src),
1837                      !strconcat("ld.global.nc.", TyStr), []>;
1838  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1839                      (ins MEMri64:$src),
1840                      !strconcat("ld.global.nc.", TyStr), []>;
1841  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1842                      (ins imemAny:$src),
1843                      !strconcat("ld.global.nc.", TyStr), []>;
1844 }
1845
1846 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
1847   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1848                               regclass:$dst4), (ins Int32Regs:$src), 
1849                !strconcat("ld.global.nc.", TyStr), []>;
1850   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1851                                regclass:$dst4), (ins Int64Regs:$src), 
1852                !strconcat("ld.global.nc.", TyStr), []>;
1853   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1854                               regclass:$dst4), (ins MEMri:$src), 
1855                !strconcat("ld.global.nc.", TyStr), []>;
1856   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1857                               regclass:$dst4), (ins MEMri64:$src), 
1858                !strconcat("ld.global.nc.", TyStr), []>;
1859   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1860                              regclass:$dst4), (ins imemAny:$src), 
1861                !strconcat("ld.global.nc.", TyStr), []>;
1862 }
1863
1864 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1865 defm INT_PTX_LDG_G_v2i8_ELE
1866   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1867 defm INT_PTX_LDG_G_v2i16_ELE
1868   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1869 defm INT_PTX_LDG_G_v2i32_ELE
1870   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1871 defm INT_PTX_LDG_G_v2f16_ELE
1872   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1873 defm INT_PTX_LDG_G_v2f16x2_ELE
1874   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1875 defm INT_PTX_LDG_G_v2f32_ELE
1876   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1877 defm INT_PTX_LDG_G_v2i64_ELE
1878   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1879 defm INT_PTX_LDG_G_v2f64_ELE
1880   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1881 defm INT_PTX_LDG_G_v4i8_ELE
1882   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1883 defm INT_PTX_LDG_G_v4i16_ELE
1884   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1885 defm INT_PTX_LDG_G_v4i32_ELE
1886   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1887 defm INT_PTX_LDG_G_v4f16_ELE
1888   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1889 defm INT_PTX_LDG_G_v4f16x2_ELE
1890   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1891 defm INT_PTX_LDG_G_v4f32_ELE
1892   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1893
1894
1895 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1896    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1897           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1898       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1899    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1900           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1901       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1902    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1903           "{{ .reg .b64 %tmp;\n\t"
1904           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
1905           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
1906       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1907       Requires<[useShortPtr]>;
1908 }
1909
1910 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1911    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1912           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1913       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1914    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1915           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1916       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1917    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1918           "{{ .reg .b64 %tmp;\n\t"
1919           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1920           #"  cvt.u32.u64 \t$result, %tmp; }}",
1921       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1922       Requires<[useShortPtr]>;
1923 }
1924
1925 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1926 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1927 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1928 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1929
1930 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1931 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1932 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1933 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1934
1935
1936 // nvvm.ptr.gen.to.param
1937 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1938   (ins Int32Regs:$src),
1939                         "mov.u32 \t$result, $src;",
1940                               [(set Int32Regs:$result,
1941                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1942 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1943   (ins Int64Regs:$src),
1944                         "mov.u64 \t$result, $src;",
1945                               [(set Int64Regs:$result,
1946                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1947
1948
1949 // nvvm.move intrinsicc
1950 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1951                              "mov.b16 \t$r, $s;",
1952                              [(set Int16Regs:$r,
1953                                (int_nvvm_move_i16 Int16Regs:$s))]>;
1954 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1955                              "mov.b32 \t$r, $s;",
1956                              [(set Int32Regs:$r,
1957                                (int_nvvm_move_i32 Int32Regs:$s))]>;
1958 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1959                              "mov.b64 \t$r, $s;",
1960                              [(set Int64Regs:$r,
1961                                (int_nvvm_move_i64 Int64Regs:$s))]>;
1962 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1963                              "mov.f32 \t$r, $s;",
1964                              [(set Float32Regs:$r,
1965                                (int_nvvm_move_float Float32Regs:$s))]>;
1966 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1967                              "mov.f64 \t$r, $s;",
1968                              [(set Float64Regs:$r,
1969                                (int_nvvm_move_double Float64Regs:$s))]>;
1970 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1971                              "mov.u32 \t$r, $s;",
1972                              [(set Int32Regs:$r,
1973                                (int_nvvm_move_ptr Int32Regs:$s))]>;
1974 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1975                              "mov.u64 \t$r, $s;",
1976                              [(set Int64Regs:$r,
1977                                (int_nvvm_move_ptr Int64Regs:$s))]>;
1978
1979 // @TODO: Are these actually needed, or will we always just see symbols
1980 // copied to registers first?
1981 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1982                              "mov.u32 \t$r, $s;",
1983                              [(set Int32Regs:$r,
1984                              (int_nvvm_move_ptr texternalsym:$s))]>;
1985 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1986                              "mov.u64 \t$r, $s;",
1987                              [(set Int64Regs:$r,
1988                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
1989
1990
1991 // MoveParam        %r1, param
1992 // ptr_local_to_gen %r2, %r1
1993 // ptr_gen_to_local %r3, %r2
1994 // ->
1995 // mov %r1, param
1996
1997 // @TODO: Revisit this.  There is a type
1998 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1999 // instructions are not currently defined. However, we can use the ptr
2000 // variants and the asm printer will do the right thing.
2001 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2002                 (MoveParam texternalsym:$src)))),
2003                (nvvm_move_ptr64  texternalsym:$src)>;
2004 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2005                 (MoveParam texternalsym:$src)))),
2006                (nvvm_move_ptr32  texternalsym:$src)>;
2007
2008 def texsurf_handles
2009   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2010               "mov.u64 \t$result, $src;", []>;
2011
2012 //-----------------------------------
2013 // Compiler Error Warn
2014 // - Just ignore them in codegen
2015 //-----------------------------------
2016
2017 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2018                 "// llvm.nvvm.compiler.warn()",
2019                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2020 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2021                 "// llvm.nvvm.compiler.warn()",
2022                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2023 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2024                 "// llvm.nvvm.compiler.error()",
2025                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2026 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2027                 "// llvm.nvvm.compiler.error()",
2028                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2029
2030
2031 // isspacep
2032
2033 def ISSPACEP_CONST_32
2034   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2035               "isspacep.const \t$d, $a;",
2036               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2037     Requires<[hasPTX31]>;
2038 def ISSPACEP_CONST_64
2039   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2040               "isspacep.const \t$d, $a;",
2041               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2042     Requires<[hasPTX31]>;
2043 def ISSPACEP_GLOBAL_32
2044   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2045               "isspacep.global \t$d, $a;",
2046               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2047 def ISSPACEP_GLOBAL_64
2048   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2049               "isspacep.global \t$d, $a;",
2050               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2051 def ISSPACEP_LOCAL_32
2052   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2053               "isspacep.local \t$d, $a;",
2054               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2055 def ISSPACEP_LOCAL_64
2056   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2057               "isspacep.local \t$d, $a;",
2058               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2059 def ISSPACEP_SHARED_32
2060   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2061               "isspacep.shared \t$d, $a;",
2062               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2063 def ISSPACEP_SHARED_64
2064   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2065               "isspacep.shared \t$d, $a;",
2066               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2067
2068
2069 // Special register reads
2070 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2071                             (ins SpecialRegs:$r),
2072                             "mov.b32 \t$d, $r;", []>;
2073
2074 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2075 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2076 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2077 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2078 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2079 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2080 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2081 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2082 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2083 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2084 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2085 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2086 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2087 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2088 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2089 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2090 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2091 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2092 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2093 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2094 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2095 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2096 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2097 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2098 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2099 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2100 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2101 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2102 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2103 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2104 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2105 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2106
2107
2108 // rotate builtin support
2109
2110 def ROTATE_B32_HW_IMM
2111   : NVPTXInst<(outs Int32Regs:$dst),
2112               (ins  Int32Regs:$src, i32imm:$amt),
2113               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2114               [(set Int32Regs:$dst,
2115                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2116               Requires<[hasHWROT32]> ;
2117
2118 def ROTATE_B32_HW_REG
2119   : NVPTXInst<(outs Int32Regs:$dst),
2120               (ins  Int32Regs:$src, Int32Regs:$amt),
2121               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2122               [(set Int32Regs:$dst,
2123                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2124               Requires<[hasHWROT32]> ;
2125
2126 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2127           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2128       Requires<[noHWROT32]> ;
2129
2130 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2131           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2132       Requires<[noHWROT32]> ;
2133
2134 let hasSideEffects = 0 in {
2135   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2136     !strconcat("{{\n\t",
2137                ".reg .b32 %dummy;\n\t",
2138                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2139                "}}"),
2140           []> ;
2141
2142   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2143     !strconcat("{{\n\t",
2144                ".reg .b32 %dummy;\n\t",
2145                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2146                "}}"),
2147           []> ;
2148 }
2149
2150 let hasSideEffects = 0 in {
2151   def PACK_TWO_INT32
2152     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2153                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2154 }
2155
2156 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2157           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2158                           (GET_LO_INT64 Int64Regs:$src))> ;
2159
2160 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2161 // no side effects.
2162 let hasSideEffects = 0 in {
2163   def SHF_L_WRAP_B32_IMM
2164     : NVPTXInst<(outs Int32Regs:$dst),
2165                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2166                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2167       Requires<[hasHWROT32]>;
2168
2169   def SHF_L_WRAP_B32_REG
2170     : NVPTXInst<(outs Int32Regs:$dst),
2171                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2172                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2173       Requires<[hasHWROT32]>;
2174
2175   def SHF_R_WRAP_B32_IMM
2176     : NVPTXInst<(outs Int32Regs:$dst),
2177                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2178                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2179       Requires<[hasHWROT32]>;
2180
2181   def SHF_R_WRAP_B32_REG
2182     : NVPTXInst<(outs Int32Regs:$dst),
2183                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2184                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2185       Requires<[hasHWROT32]>;
2186 }
2187
2188 // HW version of rotate 64
2189 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2190           (PACK_TWO_INT32
2191             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2192                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2193             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2194                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2195       Requires<[hasHWROT32]>;
2196
2197 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2198           (PACK_TWO_INT32
2199             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2200                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2201             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2202                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2203       Requires<[hasHWROT32]>;
2204
2205
2206 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2207           (PACK_TWO_INT32
2208             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2209                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2210             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2211                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2212       Requires<[hasHWROT32]>;
2213
2214 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2215           (PACK_TWO_INT32
2216             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2217                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2218             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2219                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2220       Requires<[hasHWROT32]>;
2221
2222 // SW version of rotate 64
2223 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2224           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2225       Requires<[noHWROT32]>;
2226 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2227           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2228       Requires<[noHWROT32]>;
2229 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2230           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2231       Requires<[noHWROT32]>;
2232 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2233           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2234       Requires<[noHWROT32]>;
2235
2236
2237 //-----------------------------------
2238 // Texture Intrinsics
2239 //-----------------------------------
2240
2241 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2242 // also defined in NVPTXReplaceImageHandles.cpp
2243
2244 // texmode_independent
2245 let IsTex = 1, IsTexModeUnified = 0 in {
2246 // Texture fetch instructions using handles
2247 def TEX_1D_F32_S32
2248   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249                     Float32Regs:$b, Float32Regs:$a),
2250               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2251               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2252               []>;
2253 def TEX_1D_F32_F32
2254   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2255                     Float32Regs:$b, Float32Regs:$a),
2256               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2257               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2258               []>;
2259 def TEX_1D_F32_F32_LEVEL
2260   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2261                     Float32Regs:$b, Float32Regs:$a),
2262               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2263               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2264               "[$t, $s, \\{$x\\}], $lod;",
2265               []>;
2266 def TEX_1D_F32_F32_GRAD
2267   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2268                     Float32Regs:$b, Float32Regs:$a),
2269               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2270                    Float32Regs:$gradx, Float32Regs:$grady),
2271               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2272               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2273               []>;
2274 def TEX_1D_S32_S32
2275   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2276                     Int32Regs:$b, Int32Regs:$a),
2277               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2278               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2279               []>;
2280 def TEX_1D_S32_F32
2281   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2282                     Int32Regs:$b, Int32Regs:$a),
2283               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2284               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2285               []>;
2286 def TEX_1D_S32_F32_LEVEL
2287   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2288                     Int32Regs:$b, Int32Regs:$a),
2289               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2290                    Float32Regs:$lod),
2291               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2292               "[$t, $s, \\{$x\\}], $lod;",
2293               []>;
2294 def TEX_1D_S32_F32_GRAD
2295   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2296                     Int32Regs:$b, Int32Regs:$a),
2297               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2298                    Float32Regs:$gradx, Float32Regs:$grady),
2299               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2300               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2301               []>;
2302 def TEX_1D_U32_S32
2303   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2304                     Int32Regs:$b, Int32Regs:$a),
2305               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2306               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2307               []>;
2308 def TEX_1D_U32_F32
2309   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2310                     Int32Regs:$b, Int32Regs:$a),
2311               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2312               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2313               []>;
2314 def TEX_1D_U32_F32_LEVEL
2315   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2316                     Int32Regs:$b, Int32Regs:$a),
2317               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2318                    Float32Regs:$lod),
2319               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2320               "[$t, $s, \\{$x\\}], $lod;",
2321               []>;
2322 def TEX_1D_U32_F32_GRAD
2323   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2324                     Int32Regs:$b, Int32Regs:$a),
2325               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2326                    Float32Regs:$gradx, Float32Regs:$grady),
2327               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2328               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2329               []>;
2330
2331 def TEX_1D_ARRAY_F32_S32
2332   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2333                     Float32Regs:$b, Float32Regs:$a),
2334               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2335               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2336               "[$t, $s, \\{$l, $x\\}];",
2337               []>;
2338 def TEX_1D_ARRAY_F32_F32
2339   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2340                     Float32Regs:$b, Float32Regs:$a),
2341               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2342               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2343               "[$t, $s, \\{$l, $x\\}];",
2344               []>;
2345 def TEX_1D_ARRAY_F32_F32_LEVEL
2346   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2347                     Float32Regs:$b, Float32Regs:$a),
2348               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2349                    Float32Regs:$lod),
2350               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2351               "[$t, $s, \\{$l, $x\\}], $lod;",
2352               []>;
2353 def TEX_1D_ARRAY_F32_F32_GRAD
2354   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2355                     Float32Regs:$b, Float32Regs:$a),
2356               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2357                    Float32Regs:$gradx, Float32Regs:$grady),
2358               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2359               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2360               []>;
2361 def TEX_1D_ARRAY_S32_S32
2362   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363                     Int32Regs:$b, Int32Regs:$a),
2364               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2365               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2366               "[$t, $s, \\{$l, $x\\}];",
2367               []>;
2368 def TEX_1D_ARRAY_S32_F32
2369   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2370                     Int32Regs:$b, Int32Regs:$a),
2371               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2372               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2373               "[$t, $s, \\{$l, $x\\}];",
2374               []>;
2375 def TEX_1D_ARRAY_S32_F32_LEVEL
2376   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377                     Int32Regs:$b, Int32Regs:$a),
2378               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2379                    Float32Regs:$lod),
2380               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2381               "[$t, $s, \\{$l, $x\\}], $lod;",
2382               []>;
2383 def TEX_1D_ARRAY_S32_F32_GRAD
2384   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2385                     Int32Regs:$b, Int32Regs:$a),
2386               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2387                    Float32Regs:$gradx, Float32Regs:$grady),
2388               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2389               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2390               []>;
2391 def TEX_1D_ARRAY_U32_S32
2392   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2393                     Int32Regs:$b, Int32Regs:$a),
2394               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2395               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2396               "[$t, $s, \\{$l, $x\\}];",
2397               []>;
2398 def TEX_1D_ARRAY_U32_F32
2399   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2400                     Int32Regs:$b, Int32Regs:$a),
2401               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2402               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2403               "[$t, $s, \\{$l, $x\\}];",
2404               []>;
2405 def TEX_1D_ARRAY_U32_F32_LEVEL
2406   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2407                     Int32Regs:$b, Int32Regs:$a),
2408               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2409                    Float32Regs:$lod),
2410               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2411               "[$t, $s, \\{$l, $x\\}], $lod;",
2412               []>;
2413 def TEX_1D_ARRAY_U32_F32_GRAD
2414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2415                     Int32Regs:$b, Int32Regs:$a),
2416               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2417                    Float32Regs:$gradx, Float32Regs:$grady),
2418               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2419               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2420               []>;
2421
2422 def TEX_2D_F32_S32
2423   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2424                     Float32Regs:$b, Float32Regs:$a),
2425               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2426               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2427               "[$t, $s, \\{$x, $y\\}];",
2428               []>;
2429 def TEX_2D_F32_F32
2430   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2431                     Float32Regs:$b, Float32Regs:$a),
2432               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2433               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2434               "[$t, $s, \\{$x, $y\\}];",
2435               []>;
2436 def TEX_2D_F32_F32_LEVEL
2437   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2438                     Float32Regs:$b, Float32Regs:$a),
2439               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2440                    Float32Regs:$lod),
2441               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2442               "[$t, $s, \\{$x, $y\\}], $lod;",
2443               []>;
2444 def TEX_2D_F32_F32_GRAD
2445   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2446                     Float32Regs:$b, Float32Regs:$a),
2447               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2448                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2449                    Float32Regs:$grady0, Float32Regs:$grady1),
2450               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2451               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2452               "\\{$grady0, $grady1\\};",
2453               []>;
2454 def TEX_2D_S32_S32
2455   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2456                     Int32Regs:$b, Int32Regs:$a),
2457               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2458               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2459               "[$t, $s, \\{$x, $y\\}];",
2460               []>;
2461 def TEX_2D_S32_F32
2462   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2463                     Int32Regs:$b, Int32Regs:$a),
2464               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2465               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2466               "[$t, $s, \\{$x, $y\\}];",
2467               []>;
2468 def TEX_2D_S32_F32_LEVEL
2469   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2470                     Int32Regs:$b, Int32Regs:$a),
2471               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2472                    Float32Regs:$lod),
2473               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2474               "[$t, $s, \\{$x, $y\\}], $lod;",
2475               []>;
2476 def TEX_2D_S32_F32_GRAD
2477   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2478                     Int32Regs:$b, Int32Regs:$a),
2479               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2480                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2481                    Float32Regs:$grady0, Float32Regs:$grady1),
2482               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2483               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2484               "\\{$grady0, $grady1\\};",
2485               []>;
2486 def TEX_2D_U32_S32
2487   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2488                     Int32Regs:$b, Int32Regs:$a),
2489               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2490               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2491               "[$t, $s, \\{$x, $y\\}];",
2492               []>;
2493 def TEX_2D_U32_F32
2494   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2495                     Int32Regs:$b, Int32Regs:$a),
2496               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2497               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2498               "[$t, $s, \\{$x, $y\\}];",
2499               []>;
2500 def TEX_2D_U32_F32_LEVEL
2501   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2502                     Int32Regs:$b, Int32Regs:$a),
2503               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2504                    Float32Regs:$lod),
2505               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2506               "[$t, $s, \\{$x, $y\\}], $lod;",
2507               []>;
2508 def TEX_2D_U32_F32_GRAD
2509   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2510                     Int32Regs:$b, Int32Regs:$a),
2511               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2512                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2513                    Float32Regs:$grady0, Float32Regs:$grady1),
2514               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2515               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2516               "\\{$grady0, $grady1\\};",
2517               []>;
2518
2519 def TEX_2D_ARRAY_F32_S32
2520   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2521                     Float32Regs:$b, Float32Regs:$a),
2522               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2523                    Int32Regs:$y),
2524               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2525               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2526               []>;
2527 def TEX_2D_ARRAY_F32_F32
2528   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2529                     Float32Regs:$b, Float32Regs:$a),
2530               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2531                    Float32Regs:$y),
2532               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2533               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2534               []>;
2535 def TEX_2D_ARRAY_F32_F32_LEVEL
2536   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2537                     Float32Regs:$b, Float32Regs:$a),
2538               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2539                    Float32Regs:$y, Float32Regs:$lod),
2540               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2541               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2542               []>;
2543 def TEX_2D_ARRAY_F32_F32_GRAD
2544   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2545                     Float32Regs:$b, Float32Regs:$a),
2546               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2547                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2548                    Float32Regs:$grady0, Float32Regs:$grady1),
2549               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2550               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2551               "\\{$grady0, $grady1\\};",
2552               []>;
2553 def TEX_2D_ARRAY_S32_S32
2554   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555                     Int32Regs:$b, Int32Regs:$a),
2556               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2557                    Int32Regs:$y),
2558               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2559               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2560               []>;
2561 def TEX_2D_ARRAY_S32_F32
2562   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563                     Int32Regs:$b, Int32Regs:$a),
2564               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2565                    Float32Regs:$y),
2566               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2567               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2568               []>;
2569 def TEX_2D_ARRAY_S32_F32_LEVEL
2570   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2571                     Int32Regs:$b, Int32Regs:$a),
2572               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2573                    Float32Regs:$y, Float32Regs:$lod),
2574               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2575               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2576               []>;
2577 def TEX_2D_ARRAY_S32_F32_GRAD
2578   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2579                     Int32Regs:$b, Int32Regs:$a),
2580               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2581                    Float32Regs:$y,
2582                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2583                    Float32Regs:$grady0, Float32Regs:$grady1),
2584               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2585               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2586               "\\{$grady0, $grady1\\};",
2587               []>;
2588 def TEX_2D_ARRAY_U32_S32
2589   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2590                     Int32Regs:$b, Int32Regs:$a),
2591               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2592                    Int32Regs:$y),
2593               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2594               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2595               []>;
2596 def TEX_2D_ARRAY_U32_F32
2597   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598                     Int32Regs:$b, Int32Regs:$a),
2599               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                    Float32Regs:$y),
2601               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2602               "[$t, $s, \\{$l, $x, $y, $y\\}];",
2603               []>;
2604 def TEX_2D_ARRAY_U32_F32_LEVEL
2605   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606                     Int32Regs:$b, Int32Regs:$a),
2607               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608                    Float32Regs:$y, Float32Regs:$lod),
2609               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2610               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2611               []>;
2612 def TEX_2D_ARRAY_U32_F32_GRAD
2613   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614                     Int32Regs:$b, Int32Regs:$a),
2615               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2616                    Float32Regs:$y,
2617                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2618                    Float32Regs:$grady0, Float32Regs:$grady1),
2619               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2620               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2621               "\\{$grady0, $grady1\\};",
2622               []>;
2623
2624 def TEX_3D_F32_S32
2625   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2626                     Float32Regs:$b, Float32Regs:$a),
2627               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2628                    Int32Regs:$z),
2629               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2630               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2631               []>;
2632 def TEX_3D_F32_F32
2633   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2634                     Float32Regs:$b, Float32Regs:$a),
2635               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2636                    Float32Regs:$z),
2637               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2638               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2639               []>;
2640 def TEX_3D_F32_F32_LEVEL
2641   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2642                     Float32Regs:$b, Float32Regs:$a),
2643               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2644                    Float32Regs:$z, Float32Regs:$lod),
2645               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2646               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2647               []>;
2648 def TEX_3D_F32_F32_GRAD
2649   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2650                     Float32Regs:$b, Float32Regs:$a),
2651               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2652                    Float32Regs:$z,
2653                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2654                    Float32Regs:$gradx2, Float32Regs:$grady0,
2655                    Float32Regs:$grady1, Float32Regs:$grady2),
2656               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2657               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2658               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2659               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2660               []>;
2661 def TEX_3D_S32_S32
2662   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2663                     Int32Regs:$b, Int32Regs:$a),
2664               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2665                    Int32Regs:$z),
2666               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2667               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2668               []>;
2669 def TEX_3D_S32_F32
2670   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671                     Int32Regs:$b, Int32Regs:$a),
2672               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2673                    Float32Regs:$z),
2674               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2675               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2676               []>;
2677 def TEX_3D_S32_F32_LEVEL
2678   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679                     Int32Regs:$b, Int32Regs:$a),
2680               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2681                    Float32Regs:$z, Float32Regs:$lod),
2682               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2683               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2684               []>;
2685 def TEX_3D_S32_F32_GRAD
2686   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2687                     Int32Regs:$b, Int32Regs:$a),
2688               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2689                    Float32Regs:$z,
2690                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2691                    Float32Regs:$gradx2, Float32Regs:$grady0,
2692                    Float32Regs:$grady1, Float32Regs:$grady2),
2693               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2694               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2695               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2696               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2697               []>;
2698 def TEX_3D_U32_S32
2699   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2700                     Int32Regs:$b, Int32Regs:$a),
2701               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2702                    Int32Regs:$z),
2703               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2704               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2705               []>;
2706 def TEX_3D_U32_F32
2707   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2708                     Int32Regs:$b, Int32Regs:$a),
2709               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2710                    Float32Regs:$z),
2711               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2712               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2713               []>;
2714 def TEX_3D_U32_F32_LEVEL
2715   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                     Int32Regs:$b, Int32Regs:$a),
2717               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2718                    Float32Regs:$z, Float32Regs:$lod),
2719               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2720               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2721               []>;
2722 def TEX_3D_U32_F32_GRAD
2723   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724                     Int32Regs:$b, Int32Regs:$a),
2725               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2726                    Float32Regs:$z,
2727                    Float32Regs:$gradx0, Float32Regs:$gradx1,
2728                    Float32Regs:$gradx2, Float32Regs:$grady0,
2729                    Float32Regs:$grady1, Float32Regs:$grady2),
2730               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2731               "[$t, $s, \\{$x, $y, $z, $z\\}], "
2732               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2733               "\\{$grady0, $grady1, $grady2, $grady2\\};",
2734               []>;
2735
2736 def TEX_CUBE_F32_F32
2737   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2738                     Float32Regs:$b, Float32Regs:$a),
2739               (ins Int64Regs:$t, Int64Regs:$s,
2740                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2741               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2742               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2743               []>;
2744 def TEX_CUBE_F32_F32_LEVEL
2745   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2746                     Float32Regs:$b, Float32Regs:$a),
2747               (ins Int64Regs:$t, Int64Regs:$s,
2748                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2749                    Float32Regs:$lod),
2750               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2751               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2752               []>;
2753 def TEX_CUBE_S32_F32
2754   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2755                     Int32Regs:$b, Int32Regs:$a),
2756               (ins Int64Regs:$t, Int64Regs:$s,
2757                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2758               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2759               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2760               []>;
2761 def TEX_CUBE_S32_F32_LEVEL
2762   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2763                     Int32Regs:$b, Int32Regs:$a),
2764               (ins Int64Regs:$t, Int64Regs:$s,
2765                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2766                    Float32Regs:$lod),
2767               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2768               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2769               []>;
2770 def TEX_CUBE_U32_F32
2771   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2772                     Int32Regs:$b, Int32Regs:$a),
2773               (ins Int64Regs:$t, Int64Regs:$s,
2774                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2775               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2776               "[$t, $s, \\{$x, $y, $z, $z\\}];",
2777               []>;
2778 def TEX_CUBE_U32_F32_LEVEL
2779   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2780                     Int32Regs:$b, Int32Regs:$a),
2781               (ins Int64Regs:$t, Int64Regs:$s,
2782                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2783                    Float32Regs:$lod),
2784               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2785               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2786               []>;
2787
2788 def TEX_CUBE_ARRAY_F32_F32
2789   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2790                     Float32Regs:$b, Float32Regs:$a),
2791               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2792                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2793               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2794               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2795               []>;
2796 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2797   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2798                     Float32Regs:$b, Float32Regs:$a),
2799               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2800                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2801                    Float32Regs:$lod),
2802               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2803               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2804               []>;
2805 def TEX_CUBE_ARRAY_S32_F32
2806   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2807                     Int32Regs:$b, Int32Regs:$a),
2808               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2809                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2810               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2811               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2812               []>;
2813 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2814   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2815                     Int32Regs:$b, Int32Regs:$a),
2816               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2817                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2818                    Float32Regs:$lod),
2819               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2820               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2821               []>;
2822 def TEX_CUBE_ARRAY_U32_F32
2823   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2824                     Int32Regs:$b, Int32Regs:$a),
2825               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2826                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2827               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2828               "[$t, $s, \\{$l, $x, $y, $z\\}];",
2829               []>;
2830 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2831   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2832                     Int32Regs:$b, Int32Regs:$a),
2833               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2834                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2835                    Float32Regs:$lod),
2836               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2837               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2838               []>;
2839
2840 def TLD4_R_2D_F32_F32
2841   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2842                     Float32Regs:$v2, Float32Regs:$v3),
2843               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2844               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2845               "[$t, $s, \\{$x, $y\\}];",
2846               []>;
2847 def TLD4_G_2D_F32_F32
2848   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2849                     Float32Regs:$v2, Float32Regs:$v3),
2850               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2851               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2852               "[$t, $s, \\{$x, $y\\}];",
2853               []>;
2854 def TLD4_B_2D_F32_F32
2855   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2856                     Float32Regs:$v2, Float32Regs:$v3),
2857               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2858               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2859               "[$t, $s, \\{$x, $y\\}];",
2860               []>;
2861 def TLD4_A_2D_F32_F32
2862   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2863                     Float32Regs:$v2, Float32Regs:$v3),
2864               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2865               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2866               "[$t, $s, \\{$x, $y\\}];",
2867               []>;
2868 def TLD4_R_2D_S32_F32
2869   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2870                     Int32Regs:$v2, Int32Regs:$v3),
2871               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2872               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2873               "[$t, $s, \\{$x, $y\\}];",
2874               []>;
2875 def TLD4_G_2D_S32_F32
2876   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2877                     Int32Regs:$v2, Int32Regs:$v3),
2878               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2879               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2880               "[$t, $s, \\{$x, $y\\}];",
2881               []>;
2882 def TLD4_B_2D_S32_F32
2883   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2884                     Int32Regs:$v2, Int32Regs:$v3),
2885               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2886               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2887               "[$t, $s, \\{$x, $y\\}];",
2888               []>;
2889 def TLD4_A_2D_S32_F32
2890   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2891                     Int32Regs:$v2, Int32Regs:$v3),
2892               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2893               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2894               "[$t, $s, \\{$x, $y\\}];",
2895               []>;
2896 def TLD4_R_2D_U32_F32
2897   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2898                     Int32Regs:$v2, Int32Regs:$v3),
2899               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2900               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2901               "[$t, $s, \\{$x, $y\\}];",
2902               []>;
2903 def TLD4_G_2D_U32_F32
2904   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2905                     Int32Regs:$v2, Int32Regs:$v3),
2906               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2907               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2908               "[$t, $s, \\{$x, $y\\}];",
2909               []>;
2910 def TLD4_B_2D_U32_F32
2911   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2912                     Int32Regs:$v2, Int32Regs:$v3),
2913               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2914               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2915               "[$t, $s, \\{$x, $y\\}];",
2916               []>;
2917 def TLD4_A_2D_U32_F32
2918   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2919                     Int32Regs:$v2, Int32Regs:$v3),
2920               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2921               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2922               "[$t, $s, \\{$x, $y\\}];",
2923               []>;
2924 }
2925
2926
2927 // texmode_unified
2928 let IsTex = 1, IsTexModeUnified = 1 in {
2929 // Texture fetch instructions using handles
2930 def TEX_UNIFIED_1D_F32_S32
2931   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932                     Float32Regs:$b, Float32Regs:$a),
2933               (ins Int64Regs:$t, Int32Regs:$x),
2934               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2935               []>;
2936 def TEX_UNIFIED_1D_F32_F32
2937   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2938                     Float32Regs:$b, Float32Regs:$a),
2939               (ins Int64Regs:$t, Float32Regs:$x),
2940               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2941               []>;
2942 def TEX_UNIFIED_1D_F32_F32_LEVEL
2943   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2944                     Float32Regs:$b, Float32Regs:$a),
2945               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2946               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2947               "[$t, \\{$x\\}], $lod;",
2948               []>;
2949 def TEX_UNIFIED_1D_F32_F32_GRAD
2950   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2951                     Float32Regs:$b, Float32Regs:$a),
2952               (ins Int64Regs:$t, Float32Regs:$x,
2953                    Float32Regs:$gradx, Float32Regs:$grady),
2954               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2955               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2956               []>;
2957 def TEX_UNIFIED_1D_S32_S32
2958   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2959                     Int32Regs:$b, Int32Regs:$a),
2960               (ins Int64Regs:$t, Int32Regs:$x),
2961               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2962               []>;
2963 def TEX_UNIFIED_1D_S32_F32
2964   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2965                     Int32Regs:$b, Int32Regs:$a),
2966               (ins Int64Regs:$t, Float32Regs:$x),
2967               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2968               []>;
2969 def TEX_UNIFIED_1D_S32_F32_LEVEL
2970   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2971                     Int32Regs:$b, Int32Regs:$a),
2972               (ins Int64Regs:$t, Float32Regs:$x,
2973                    Float32Regs:$lod),
2974               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2975               "[$t, \\{$x\\}], $lod;",
2976               []>;
2977 def TEX_UNIFIED_1D_S32_F32_GRAD
2978   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2979                     Int32Regs:$b, Int32Regs:$a),
2980               (ins Int64Regs:$t, Float32Regs:$x,
2981                    Float32Regs:$gradx, Float32Regs:$grady),
2982               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2983               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2984               []>;
2985 def TEX_UNIFIED_1D_U32_S32
2986   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2987                     Int32Regs:$b, Int32Regs:$a),
2988               (ins Int64Regs:$t, Int32Regs:$x),
2989               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2990               []>;
2991 def TEX_UNIFIED_1D_U32_F32
2992   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993                     Int32Regs:$b, Int32Regs:$a),
2994               (ins Int64Regs:$t, Float32Regs:$x),
2995               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2996               []>;
2997 def TEX_UNIFIED_1D_U32_F32_LEVEL
2998   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2999                     Int32Regs:$b, Int32Regs:$a),
3000               (ins Int64Regs:$t, Float32Regs:$x,
3001                    Float32Regs:$lod),
3002               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3003               "[$t, \\{$x\\}], $lod;",
3004               []>;
3005 def TEX_UNIFIED_1D_U32_F32_GRAD
3006   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3007                     Int32Regs:$b, Int32Regs:$a),
3008               (ins Int64Regs:$t, Float32Regs:$x,
3009                    Float32Regs:$gradx, Float32Regs:$grady),
3010               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3011               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3012               []>;
3013
3014 def TEX_UNIFIED_1D_ARRAY_F32_S32
3015   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3016                     Float32Regs:$b, Float32Regs:$a),
3017               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3018               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3019               "[$t, \\{$l, $x\\}];",
3020               []>;
3021 def TEX_UNIFIED_1D_ARRAY_F32_F32
3022   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3023                     Float32Regs:$b, Float32Regs:$a),
3024               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3025               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3026               "[$t, \\{$l, $x\\}];",
3027               []>;
3028 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3029   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3030                     Float32Regs:$b, Float32Regs:$a),
3031               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3032                    Float32Regs:$lod),
3033               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3034               "[$t, \\{$l, $x\\}], $lod;",
3035               []>;
3036 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3037   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3038                     Float32Regs:$b, Float32Regs:$a),
3039               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3040                    Float32Regs:$gradx, Float32Regs:$grady),
3041               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3042               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3043               []>;
3044 def TEX_UNIFIED_1D_ARRAY_S32_S32
3045   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046                     Int32Regs:$b, Int32Regs:$a),
3047               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3048               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3049               "[$t, \\{$l, $x\\}];",
3050               []>;
3051 def TEX_UNIFIED_1D_ARRAY_S32_F32
3052   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053                     Int32Regs:$b, Int32Regs:$a),
3054               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3055               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3056               "[$t, \\{$l, $x\\}];",
3057               []>;
3058 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3059   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060                     Int32Regs:$b, Int32Regs:$a),
3061               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3062                    Float32Regs:$lod),
3063               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3064               "[$t, \\{$l, $x\\}], $lod;",
3065               []>;
3066 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3067   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3068                     Int32Regs:$b, Int32Regs:$a),
3069               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3070                    Float32Regs:$gradx, Float32Regs:$grady),
3071               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3072               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3073               []>;
3074 def TEX_UNIFIED_1D_ARRAY_U32_S32
3075   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3076                     Int32Regs:$b, Int32Regs:$a),
3077               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3078               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3079               "[$t, \\{$l, $x\\}];",
3080               []>;
3081 def TEX_UNIFIED_1D_ARRAY_U32_F32
3082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3083                     Int32Regs:$b, Int32Regs:$a),
3084               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3085               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3086               "[$t, \\{$l, $x\\}];",
3087               []>;
3088 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3089   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3090                     Int32Regs:$b, Int32Regs:$a),
3091               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3092                    Float32Regs:$lod),
3093               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3094               "[$t, \\{$l, $x\\}], $lod;",
3095               []>;
3096 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3098                     Int32Regs:$b, Int32Regs:$a),
3099               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3100                    Float32Regs:$gradx, Float32Regs:$grady),
3101               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3102               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3103               []>;
3104
3105 def TEX_UNIFIED_2D_F32_S32
3106   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3107                     Float32Regs:$b, Float32Regs:$a),
3108               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3109               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3110               "[$t, \\{$x, $y\\}];",
3111               []>;
3112 def TEX_UNIFIED_2D_F32_F32
3113   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3114                     Float32Regs:$b, Float32Regs:$a),
3115               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3116               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3117               "[$t, \\{$x, $y\\}];",
3118               []>;
3119 def TEX_UNIFIED_2D_F32_F32_LEVEL
3120   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3121                     Float32Regs:$b, Float32Regs:$a),
3122               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3123                    Float32Regs:$lod),
3124               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3125               "[$t, \\{$x, $y\\}], $lod;",
3126               []>;
3127 def TEX_UNIFIED_2D_F32_F32_GRAD
3128   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3129                     Float32Regs:$b, Float32Regs:$a),
3130               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3131                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3132                    Float32Regs:$grady0, Float32Regs:$grady1),
3133               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3134               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3135               "\\{$grady0, $grady1\\};",
3136               []>;
3137 def TEX_UNIFIED_2D_S32_S32
3138   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3139                     Int32Regs:$b, Int32Regs:$a),
3140               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3141               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3142               "[$t, \\{$x, $y\\}];",
3143               []>;
3144 def TEX_UNIFIED_2D_S32_F32
3145   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3146                     Int32Regs:$b, Int32Regs:$a),
3147               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3148               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3149               "[$t, \\{$x, $y\\}];",
3150               []>;
3151 def TEX_UNIFIED_2D_S32_F32_LEVEL
3152   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3153                     Int32Regs:$b, Int32Regs:$a),
3154               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3155                    Float32Regs:$lod),
3156               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3157               "[$t, \\{$x, $y\\}], $lod;",
3158               []>;
3159 def TEX_UNIFIED_2D_S32_F32_GRAD
3160   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3161                     Int32Regs:$b, Int32Regs:$a),
3162               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3163                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3164                    Float32Regs:$grady0, Float32Regs:$grady1),
3165               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3166               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3167               "\\{$grady0, $grady1\\};",
3168               []>;
3169 def TEX_UNIFIED_2D_U32_S32
3170   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3171                     Int32Regs:$b, Int32Regs:$a),
3172               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3173               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3174               "[$t, \\{$x, $y\\}];",
3175               []>;
3176 def TEX_UNIFIED_2D_U32_F32
3177   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3178                     Int32Regs:$b, Int32Regs:$a),
3179               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3180               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3181               "[$t, \\{$x, $y\\}];",
3182               []>;
3183 def TEX_UNIFIED_2D_U32_F32_LEVEL
3184   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3185                     Int32Regs:$b, Int32Regs:$a),
3186               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3187                    Float32Regs:$lod),
3188               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3189               "[$t, \\{$x, $y\\}], $lod;",
3190               []>;
3191 def TEX_UNIFIED_2D_U32_F32_GRAD
3192   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3193                     Int32Regs:$b, Int32Regs:$a),
3194               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3195                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3196                    Float32Regs:$grady0, Float32Regs:$grady1),
3197               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3198               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3199               "\\{$grady0, $grady1\\};",
3200               []>;
3201
3202 def TEX_UNIFIED_2D_ARRAY_F32_S32
3203   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3204                     Float32Regs:$b, Float32Regs:$a),
3205               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3206                    Int32Regs:$y),
3207               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3208               "[$t, \\{$l, $x, $y, $y\\}];",
3209               []>;
3210 def TEX_UNIFIED_2D_ARRAY_F32_F32
3211   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3212                     Float32Regs:$b, Float32Regs:$a),
3213               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3214                    Float32Regs:$y),
3215               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3216               "[$t, \\{$l, $x, $y, $y\\}];",
3217               []>;
3218 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3219   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3220                     Float32Regs:$b, Float32Regs:$a),
3221               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3222                    Float32Regs:$y, Float32Regs:$lod),
3223               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3224               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3225               []>;
3226 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3227   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3228                     Float32Regs:$b, Float32Regs:$a),
3229               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3230                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3231                    Float32Regs:$grady0, Float32Regs:$grady1),
3232               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3233               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3234               "\\{$grady0, $grady1\\};",
3235               []>;
3236 def TEX_UNIFIED_2D_ARRAY_S32_S32
3237   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238                     Int32Regs:$b, Int32Regs:$a),
3239               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3240                    Int32Regs:$y),
3241               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3242               "[$t, \\{$l, $x, $y, $y\\}];",
3243               []>;
3244 def TEX_UNIFIED_2D_ARRAY_S32_F32
3245   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246                     Int32Regs:$b, Int32Regs:$a),
3247               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3248                    Float32Regs:$y),
3249               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3250               "[$t, \\{$l, $x, $y, $y\\}];",
3251               []>;
3252 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3253   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3254                     Int32Regs:$b, Int32Regs:$a),
3255               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3256                    Float32Regs:$y, Float32Regs:$lod),
3257               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3258               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3259               []>;
3260 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3261   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3262                     Int32Regs:$b, Int32Regs:$a),
3263               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3264                    Float32Regs:$y,
3265                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3266                    Float32Regs:$grady0, Float32Regs:$grady1),
3267               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3268               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3269               "\\{$grady0, $grady1\\};",
3270               []>;
3271 def TEX_UNIFIED_2D_ARRAY_U32_S32
3272   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3273                     Int32Regs:$b, Int32Regs:$a),
3274               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3275                    Int32Regs:$y),
3276               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3277               "[$t, \\{$l, $x, $y, $y\\}];",
3278               []>;
3279 def TEX_UNIFIED_2D_ARRAY_U32_F32
3280   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281                     Int32Regs:$b, Int32Regs:$a),
3282               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                    Float32Regs:$y),
3284               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3285               "[$t, \\{$l, $x, $y, $y\\}];",
3286               []>;
3287 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3288   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289                     Int32Regs:$b, Int32Regs:$a),
3290               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291                    Float32Regs:$y, Float32Regs:$lod),
3292               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3293               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3294               []>;
3295 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3296   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297                     Int32Regs:$b, Int32Regs:$a),
3298               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3299                    Float32Regs:$y,
3300                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3301                    Float32Regs:$grady0, Float32Regs:$grady1),
3302               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3303               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3304               "\\{$grady0, $grady1\\};",
3305               []>;
3306
3307 def TEX_UNIFIED_3D_F32_S32
3308   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3309                     Float32Regs:$b, Float32Regs:$a),
3310               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3311                    Int32Regs:$z),
3312               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3313               "[$t, \\{$x, $y, $z, $z\\}];",
3314               []>;
3315 def TEX_UNIFIED_3D_F32_F32
3316   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3317                     Float32Regs:$b, Float32Regs:$a),
3318               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3319                    Float32Regs:$z),
3320               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3321               "[$t, \\{$x, $y, $z, $z\\}];",
3322               []>;
3323 def TEX_UNIFIED_3D_F32_F32_LEVEL
3324   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3325                     Float32Regs:$b, Float32Regs:$a),
3326               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3327                    Float32Regs:$z, Float32Regs:$lod),
3328               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3329               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3330               []>;
3331 def TEX_UNIFIED_3D_F32_F32_GRAD
3332   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3333                     Float32Regs:$b, Float32Regs:$a),
3334               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3335                    Float32Regs:$z,
3336                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3337                    Float32Regs:$gradx2, Float32Regs:$grady0,
3338                    Float32Regs:$grady1, Float32Regs:$grady2),
3339               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3340               "[$t, \\{$x, $y, $z, $z\\}], "
3341               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3342               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3343               []>;
3344 def TEX_UNIFIED_3D_S32_S32
3345   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3346                     Int32Regs:$b, Int32Regs:$a),
3347               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3348                    Int32Regs:$z),
3349               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3350               "[$t, \\{$x, $y, $z, $z\\}];",
3351               []>;
3352 def TEX_UNIFIED_3D_S32_F32
3353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3354                     Int32Regs:$b, Int32Regs:$a),
3355               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3356                    Float32Regs:$z),
3357               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3358               "[$t, \\{$x, $y, $z, $z\\}];",
3359               []>;
3360 def TEX_UNIFIED_3D_S32_F32_LEVEL
3361   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3362                     Int32Regs:$b, Int32Regs:$a),
3363               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3364                    Float32Regs:$z, Float32Regs:$lod),
3365               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3366               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3367               []>;
3368 def TEX_UNIFIED_3D_S32_F32_GRAD
3369   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3370                     Int32Regs:$b, Int32Regs:$a),
3371               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3372                    Float32Regs:$z,
3373                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3374                    Float32Regs:$gradx2, Float32Regs:$grady0,
3375                    Float32Regs:$grady1, Float32Regs:$grady2),
3376               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3377               "[$t, \\{$x, $y, $z, $z\\}], "
3378               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3379               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3380               []>;
3381 def TEX_UNIFIED_3D_U32_S32
3382   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3383                     Int32Regs:$b, Int32Regs:$a),
3384               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3385                    Int32Regs:$z),
3386               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3387               "[$t, \\{$x, $y, $z, $z\\}];",
3388               []>;
3389 def TEX_UNIFIED_3D_U32_F32
3390   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3391                     Int32Regs:$b, Int32Regs:$a),
3392               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3393                    Float32Regs:$z),
3394               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3395               "[$t, \\{$x, $y, $z, $z\\}];",
3396               []>;
3397 def TEX_UNIFIED_3D_U32_F32_LEVEL
3398   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                     Int32Regs:$b, Int32Regs:$a),
3400               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3401                    Float32Regs:$z, Float32Regs:$lod),
3402               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3403               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3404               []>;
3405 def TEX_UNIFIED_3D_U32_F32_GRAD
3406   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407                     Int32Regs:$b, Int32Regs:$a),
3408               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3409                    Float32Regs:$z,
3410                    Float32Regs:$gradx0, Float32Regs:$gradx1,
3411                    Float32Regs:$gradx2, Float32Regs:$grady0,
3412                    Float32Regs:$grady1, Float32Regs:$grady2),
3413               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3414               "[$t, \\{$x, $y, $z, $z\\}], "
3415               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3416               "\\{$grady0, $grady1, $grady2, $grady2\\};",
3417               []>;
3418
3419 def TEX_UNIFIED_CUBE_F32_F32
3420   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3421                     Float32Regs:$b, Float32Regs:$a),
3422               (ins Int64Regs:$t,
3423                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3424               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3425               "[$t, \\{$x, $y, $z, $z\\}];",
3426               []>;
3427 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3428   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3429                     Float32Regs:$b, Float32Regs:$a),
3430               (ins Int64Regs:$t,
3431                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3432                    Float32Regs:$lod),
3433               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3434               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3435               []>;
3436 def TEX_UNIFIED_CUBE_S32_F32
3437   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3438                     Int32Regs:$b, Int32Regs:$a),
3439               (ins Int64Regs:$t,
3440                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3441               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3442               "[$t, \\{$x, $y, $z, $z\\}];",
3443               []>;
3444 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3445   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3446                     Int32Regs:$b, Int32Regs:$a),
3447               (ins Int64Regs:$t,
3448                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3449                    Float32Regs:$lod),
3450               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3451               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3452               []>;
3453 def TEX_UNIFIED_CUBE_U32_F32
3454   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3455                     Int32Regs:$b, Int32Regs:$a),
3456               (ins Int64Regs:$t,
3457                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3458               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3459               "[$t, \\{$x, $y, $z, $z\\}];",
3460               []>;
3461 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3462   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3463                     Int32Regs:$b, Int32Regs:$a),
3464               (ins Int64Regs:$t,
3465                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3466                    Float32Regs:$lod),
3467               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3468               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3469               []>;
3470
3471 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3472   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3473                     Float32Regs:$b, Float32Regs:$a),
3474               (ins Int64Regs:$t, Int32Regs:$l,
3475                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3476               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3477               "[$t, \\{$l, $x, $y, $z\\}];",
3478               []>;
3479 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3480   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3481                     Float32Regs:$b, Float32Regs:$a),
3482               (ins Int64Regs:$t, Int32Regs:$l,
3483                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3484                    Float32Regs:$lod),
3485               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3486               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3487               []>;
3488 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3489   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3490                     Int32Regs:$b, Int32Regs:$a),
3491               (ins Int64Regs:$t, Int32Regs:$l,
3492                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3493               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3494               "[$t, \\{$l, $x, $y, $z\\}];",
3495               []>;
3496 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3497   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3498                     Int32Regs:$b, Int32Regs:$a),
3499               (ins Int64Regs:$t, Int32Regs:$l,
3500                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3501                    Float32Regs:$lod),
3502               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3503               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3504               []>;
3505 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3506   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3507                     Int32Regs:$b, Int32Regs:$a),
3508               (ins Int64Regs:$t, Int32Regs:$l,
3509                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3510               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3511               "[$t, \\{$l, $x, $y, $z\\}];",
3512               []>;
3513 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3514   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3515                     Int32Regs:$b, Int32Regs:$a),
3516               (ins Int64Regs:$t, Int32Regs:$l,
3517                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3518                    Float32Regs:$lod),
3519               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3520               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3521               []>;
3522
3523 def TLD4_UNIFIED_R_2D_F32_F32
3524   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3525                     Float32Regs:$v2, Float32Regs:$v3),
3526               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3527               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3528               "[$t, \\{$x, $y\\}];",
3529               []>;
3530 def TLD4_UNIFIED_G_2D_F32_F32
3531   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3532                     Float32Regs:$v2, Float32Regs:$v3),
3533               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3534               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3535               "[$t, \\{$x, $y\\}];",
3536               []>;
3537 def TLD4_UNIFIED_B_2D_F32_F32
3538   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3539                     Float32Regs:$v2, Float32Regs:$v3),
3540               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3541               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3542               "[$t, \\{$x, $y\\}];",
3543               []>;
3544 def TLD4_UNIFIED_A_2D_F32_F32
3545   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3546                     Float32Regs:$v2, Float32Regs:$v3),
3547               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3548               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3549               "[$t, \\{$x, $y\\}];",
3550               []>;
3551 def TLD4_UNIFIED_R_2D_S32_F32
3552   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3553                     Int32Regs:$v2, Int32Regs:$v3),
3554               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3555               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3556               "[$t, \\{$x, $y\\}];",
3557               []>;
3558 def TLD4_UNIFIED_G_2D_S32_F32
3559   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3560                     Int32Regs:$v2, Int32Regs:$v3),
3561               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3562               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3563               "[$t, \\{$x, $y\\}];",
3564               []>;
3565 def TLD4_UNIFIED_B_2D_S32_F32
3566   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3567                     Int32Regs:$v2, Int32Regs:$v3),
3568               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3569               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3570               "[$t, \\{$x, $y\\}];",
3571               []>;
3572 def TLD4_UNIFIED_A_2D_S32_F32
3573   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3574                     Int32Regs:$v2, Int32Regs:$v3),
3575               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3576               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3577               "[$t, \\{$x, $y\\}];",
3578               []>;
3579 def TLD4_UNIFIED_R_2D_U32_F32
3580   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3581                     Int32Regs:$v2, Int32Regs:$v3),
3582               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3583               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3584               "[$t, \\{$x, $y\\}];",
3585               []>;
3586 def TLD4_UNIFIED_G_2D_U32_F32
3587   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3588                     Int32Regs:$v2, Int32Regs:$v3),
3589               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3590               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3591               "[$t, \\{$x, $y\\}];",
3592               []>;
3593 def TLD4_UNIFIED_B_2D_U32_F32
3594   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3595                     Int32Regs:$v2, Int32Regs:$v3),
3596               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3597               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3598               "[$t, \\{$x, $y\\}];",
3599               []>;
3600 def TLD4_UNIFIED_A_2D_U32_F32
3601   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3602                     Int32Regs:$v2, Int32Regs:$v3),
3603               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3604               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3605               "[$t, \\{$x, $y\\}];",
3606               []>;
3607 }
3608
3609
3610
3611 //=== Surface load instructions
3612 // .clamp variant
3613 let IsSuld = 1 in {
3614 def SULD_1D_I8_CLAMP
3615   : NVPTXInst<(outs Int16Regs:$r),
3616               (ins Int64Regs:$s, Int32Regs:$x),
3617               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3618               []>;
3619 def SULD_1D_I16_CLAMP
3620   : NVPTXInst<(outs Int16Regs:$r),
3621               (ins Int64Regs:$s, Int32Regs:$x),
3622               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3623               []>;
3624 def SULD_1D_I32_CLAMP
3625   : NVPTXInst<(outs Int32Regs:$r),
3626               (ins Int64Regs:$s, Int32Regs:$x),
3627               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3628               []>;
3629 def SULD_1D_I64_CLAMP
3630   : NVPTXInst<(outs Int64Regs:$r),
3631               (ins Int64Regs:$s, Int32Regs:$x),
3632               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3633               []>;
3634
3635 def SULD_1D_ARRAY_I8_CLAMP
3636   : NVPTXInst<(outs Int16Regs:$r),
3637               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3638               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3639               []>;
3640 def SULD_1D_ARRAY_I16_CLAMP
3641   : NVPTXInst<(outs Int16Regs:$r),
3642               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3643               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3644               []>;
3645 def SULD_1D_ARRAY_I32_CLAMP
3646   : NVPTXInst<(outs Int32Regs:$r),
3647               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3648               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3649               []>;
3650 def SULD_1D_ARRAY_I64_CLAMP
3651   : NVPTXInst<(outs Int64Regs:$r),
3652               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3653               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3654               []>;
3655
3656 def SULD_2D_I8_CLAMP
3657   : NVPTXInst<(outs Int16Regs:$r),
3658               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3659               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3660               []>;
3661 def SULD_2D_I16_CLAMP
3662   : NVPTXInst<(outs Int16Regs:$r),
3663               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3664               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3665               []>;
3666 def SULD_2D_I32_CLAMP
3667   : NVPTXInst<(outs Int32Regs:$r),
3668               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3669               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3670               []>;
3671 def SULD_2D_I64_CLAMP
3672   : NVPTXInst<(outs Int64Regs:$r),
3673               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3674               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3675               []>;
3676
3677 def SULD_2D_ARRAY_I8_CLAMP
3678   : NVPTXInst<(outs Int16Regs:$r),
3679               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3680               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3681               []>;
3682 def SULD_2D_ARRAY_I16_CLAMP
3683   : NVPTXInst<(outs Int16Regs:$r),
3684               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3685               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3686               []>;
3687 def SULD_2D_ARRAY_I32_CLAMP
3688   : NVPTXInst<(outs Int32Regs:$r),
3689               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3690               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3691               []>;
3692 def SULD_2D_ARRAY_I64_CLAMP
3693   : NVPTXInst<(outs Int64Regs:$r),
3694               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3695               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3696               []>;
3697
3698 def SULD_3D_I8_CLAMP
3699   : NVPTXInst<(outs Int16Regs:$r),
3700               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3701               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3702               []>;
3703 def SULD_3D_I16_CLAMP
3704   : NVPTXInst<(outs Int16Regs:$r),
3705               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3706               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3707               []>;
3708 def SULD_3D_I32_CLAMP
3709   : NVPTXInst<(outs Int32Regs:$r),
3710               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3711               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3712               []>;
3713 def SULD_3D_I64_CLAMP
3714   : NVPTXInst<(outs Int64Regs:$r),
3715               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3716               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3717               []>;
3718 }
3719
3720 let IsSuld = 2 in {
3721 def SULD_1D_V2I8_CLAMP
3722   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3723               (ins Int64Regs:$s, Int32Regs:$x),
3724               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3725               []>;
3726 def SULD_1D_V2I16_CLAMP
3727   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3728               (ins Int64Regs:$s, Int32Regs:$x),
3729               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3730               []>;
3731 def SULD_1D_V2I32_CLAMP
3732   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3733               (ins Int64Regs:$s, Int32Regs:$x),
3734               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3735               []>;
3736 def SULD_1D_V2I64_CLAMP
3737   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3738               (ins Int64Regs:$s, Int32Regs:$x),
3739               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3740               []>;
3741
3742 def SULD_1D_ARRAY_V2I8_CLAMP
3743   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3744               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3745               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3746               []>;
3747 def SULD_1D_ARRAY_V2I16_CLAMP
3748   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3749               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3750               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3751               []>;
3752 def SULD_1D_ARRAY_V2I32_CLAMP
3753   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3754               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3755               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3756               []>;
3757 def SULD_1D_ARRAY_V2I64_CLAMP
3758   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3759               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3760               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3761               []>;
3762
3763 def SULD_2D_V2I8_CLAMP
3764   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3765               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3766               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3767               []>;
3768 def SULD_2D_V2I16_CLAMP
3769   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3770               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3771               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3772               []>;
3773 def SULD_2D_V2I32_CLAMP
3774   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3775               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3776               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3777               []>;
3778 def SULD_2D_V2I64_CLAMP
3779   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3780               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3781               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3782               []>;
3783
3784 def SULD_2D_ARRAY_V2I8_CLAMP
3785   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3786               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3787               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3788               "[$s, \\{$l, $x, $y, $y\\}];",
3789               []>;
3790 def SULD_2D_ARRAY_V2I16_CLAMP
3791   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3792               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3793               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3794               "[$s, \\{$l, $x, $y, $y\\}];",
3795               []>;
3796 def SULD_2D_ARRAY_V2I32_CLAMP
3797   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3798               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3799               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3800               "[$s, \\{$l, $x, $y, $y\\}];",
3801               []>;
3802 def SULD_2D_ARRAY_V2I64_CLAMP
3803   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3804               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3805               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3806               "[$s, \\{$l, $x, $y, $y\\}];",
3807               []>;
3808
3809 def SULD_3D_V2I8_CLAMP
3810   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3811               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3812               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3813               []>;
3814 def SULD_3D_V2I16_CLAMP
3815   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3816               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3817               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3818               []>;
3819 def SULD_3D_V2I32_CLAMP
3820   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3821               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3822               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3823               []>;
3824 def SULD_3D_V2I64_CLAMP
3825   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3826               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3827               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3828               []>;
3829 }
3830
3831 let IsSuld = 3 in {
3832 def SULD_1D_V4I8_CLAMP
3833   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3834               (ins Int64Regs:$s, Int32Regs:$x),
3835               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3836               []>;
3837 def SULD_1D_V4I16_CLAMP
3838   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3839               (ins Int64Regs:$s, Int32Regs:$x),
3840               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3841               []>;
3842 def SULD_1D_V4I32_CLAMP
3843   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3844               (ins Int64Regs:$s, Int32Regs:$x),
3845               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3846               []>;
3847
3848 def SULD_1D_ARRAY_V4I8_CLAMP
3849   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3850               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3851               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3852               "[$s, \\{$l, $x\\}];",
3853               []>;
3854 def SULD_1D_ARRAY_V4I16_CLAMP
3855   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3856               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3857               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3858               "[$s, \\{$l, $x\\}];",
3859               []>;
3860 def SULD_1D_ARRAY_V4I32_CLAMP
3861   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3862               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3864               "[$s, \\{$l, $x\\}];",
3865               []>;
3866
3867 def SULD_2D_V4I8_CLAMP
3868   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3869               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3870               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3871               []>;
3872 def SULD_2D_V4I16_CLAMP
3873   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3874               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3875               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3876               []>;
3877 def SULD_2D_V4I32_CLAMP
3878   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3879               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3881               []>;
3882
3883 def SULD_2D_ARRAY_V4I8_CLAMP
3884   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3885               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3886               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3887               "[$s, \\{$l, $x, $y, $y\\}];",
3888               []>;
3889 def SULD_2D_ARRAY_V4I16_CLAMP
3890   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3891               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3892               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3893               "[$s, \\{$l, $x, $y, $y\\}];",
3894               []>;
3895 def SULD_2D_ARRAY_V4I32_CLAMP
3896   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3898               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3899               "[$s, \\{$l, $x, $y, $y\\}];",
3900               []>;
3901
3902
3903 def SULD_3D_V4I8_CLAMP
3904   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3905               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3906               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3907               "[$s, \\{$x, $y, $z, $z\\}];",
3908               []>;
3909 def SULD_3D_V4I16_CLAMP
3910   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3911               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3912               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3913               "[$s, \\{$x, $y, $z, $z\\}];",
3914               []>;
3915 def SULD_3D_V4I32_CLAMP
3916   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3918               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3919               "[$s, \\{$x, $y, $z, $z\\}];",
3920               []>;
3921 }
3922
3923
3924 // .trap variant
3925 let IsSuld = 1 in {
3926 def SULD_1D_I8_TRAP
3927   : NVPTXInst<(outs Int16Regs:$r),
3928               (ins Int64Regs:$s, Int32Regs:$x),
3929               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3930               []>;
3931 def SULD_1D_I16_TRAP
3932   : NVPTXInst<(outs Int16Regs:$r),
3933               (ins Int64Regs:$s, Int32Regs:$x),
3934               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3935               []>;
3936 def SULD_1D_I32_TRAP
3937   : NVPTXInst<(outs Int32Regs:$r),
3938               (ins Int64Regs:$s, Int32Regs:$x),
3939               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3940               []>;
3941 def SULD_1D_I64_TRAP
3942   : NVPTXInst<(outs Int64Regs:$r),
3943               (ins Int64Regs:$s, Int32Regs:$x),
3944               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3945               []>;
3946
3947 def SULD_1D_ARRAY_I8_TRAP
3948   : NVPTXInst<(outs Int16Regs:$r),
3949               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3950               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3951               []>;
3952 def SULD_1D_ARRAY_I16_TRAP
3953   : NVPTXInst<(outs Int16Regs:$r),
3954               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3955               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3956               []>;
3957 def SULD_1D_ARRAY_I32_TRAP
3958   : NVPTXInst<(outs Int32Regs:$r),
3959               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3960               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3961               []>;
3962 def SULD_1D_ARRAY_I64_TRAP
3963   : NVPTXInst<(outs Int64Regs:$r),
3964               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3965               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3966               []>;
3967
3968 def SULD_2D_I8_TRAP
3969   : NVPTXInst<(outs Int16Regs:$r),
3970               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3971               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3972               []>;
3973 def SULD_2D_I16_TRAP
3974   : NVPTXInst<(outs Int16Regs:$r),
3975               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3976               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3977               []>;
3978 def SULD_2D_I32_TRAP
3979   : NVPTXInst<(outs Int32Regs:$r),
3980               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3981               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3982               []>;
3983 def SULD_2D_I64_TRAP
3984   : NVPTXInst<(outs Int64Regs:$r),
3985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3986               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3987               []>;
3988
3989 def SULD_2D_ARRAY_I8_TRAP
3990   : NVPTXInst<(outs Int16Regs:$r),
3991               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3992               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3993               []>;
3994 def SULD_2D_ARRAY_I16_TRAP
3995   : NVPTXInst<(outs Int16Regs:$r),
3996               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3997               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3998               []>;
3999 def SULD_2D_ARRAY_I32_TRAP
4000   : NVPTXInst<(outs Int32Regs:$r),
4001               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4002               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4003               []>;
4004 def SULD_2D_ARRAY_I64_TRAP
4005   : NVPTXInst<(outs Int64Regs:$r),
4006               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4007               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4008               []>;
4009
4010 def SULD_3D_I8_TRAP
4011   : NVPTXInst<(outs Int16Regs:$r),
4012               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4013               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4014               []>;
4015 def SULD_3D_I16_TRAP
4016   : NVPTXInst<(outs Int16Regs:$r),
4017               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4018               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4019               []>;
4020 def SULD_3D_I32_TRAP
4021   : NVPTXInst<(outs Int32Regs:$r),
4022               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4023               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4024               []>;
4025 def SULD_3D_I64_TRAP
4026   : NVPTXInst<(outs Int64Regs:$r),
4027               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4028               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4029               []>;
4030 }
4031
4032 let IsSuld = 2 in {
4033 def SULD_1D_V2I8_TRAP
4034   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4035               (ins Int64Regs:$s, Int32Regs:$x),
4036               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4037               []>;
4038 def SULD_1D_V2I16_TRAP
4039   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4040               (ins Int64Regs:$s, Int32Regs:$x),
4041               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4042               []>;
4043 def SULD_1D_V2I32_TRAP
4044   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4045               (ins Int64Regs:$s, Int32Regs:$x),
4046               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4047               []>;
4048 def SULD_1D_V2I64_TRAP
4049   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4050               (ins Int64Regs:$s, Int32Regs:$x),
4051               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4052               []>;
4053
4054 def SULD_1D_ARRAY_V2I8_TRAP
4055   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4056               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4057               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4058               []>;
4059 def SULD_1D_ARRAY_V2I16_TRAP
4060   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4061               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4062               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4063               []>;
4064 def SULD_1D_ARRAY_V2I32_TRAP
4065   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4066               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4067               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4068               []>;
4069 def SULD_1D_ARRAY_V2I64_TRAP
4070   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4071               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4073               []>;
4074
4075 def SULD_2D_V2I8_TRAP
4076   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4077               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4078               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4079               []>;
4080 def SULD_2D_V2I16_TRAP
4081   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4082               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4083               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4084               []>;
4085 def SULD_2D_V2I32_TRAP
4086   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4087               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4088               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4089               []>;
4090 def SULD_2D_V2I64_TRAP
4091   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4092               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4093               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4094               []>;
4095
4096 def SULD_2D_ARRAY_V2I8_TRAP
4097   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4098               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4099               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4100               "[$s, \\{$l, $x, $y, $y\\}];",
4101               []>;
4102 def SULD_2D_ARRAY_V2I16_TRAP
4103   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4104               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4105               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4106               "[$s, \\{$l, $x, $y, $y\\}];",
4107               []>;
4108 def SULD_2D_ARRAY_V2I32_TRAP
4109   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4110               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4111               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4112               "[$s, \\{$l, $x, $y, $y\\}];",
4113               []>;
4114 def SULD_2D_ARRAY_V2I64_TRAP
4115   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4116               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4117               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4118               "[$s, \\{$l, $x, $y, $y\\}];",
4119               []>;
4120
4121 def SULD_3D_V2I8_TRAP
4122   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4123               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4124               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4125               []>;
4126 def SULD_3D_V2I16_TRAP
4127   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4128               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4129               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4130               []>;
4131 def SULD_3D_V2I32_TRAP
4132   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4133               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4134               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4135               []>;
4136 def SULD_3D_V2I64_TRAP
4137   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4138               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4140               []>;
4141 }
4142
4143 let IsSuld = 3 in {
4144 def SULD_1D_V4I8_TRAP
4145   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4146               (ins Int64Regs:$s, Int32Regs:$x),
4147               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4148               []>;
4149 def SULD_1D_V4I16_TRAP
4150   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4151               (ins Int64Regs:$s, Int32Regs:$x),
4152               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4153               []>;
4154 def SULD_1D_V4I32_TRAP
4155   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4156               (ins Int64Regs:$s, Int32Regs:$x),
4157               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4158               []>;
4159
4160 def SULD_1D_ARRAY_V4I8_TRAP
4161   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4162               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4163               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4164               "[$s, \\{$l, $x\\}];",
4165               []>;
4166 def SULD_1D_ARRAY_V4I16_TRAP
4167   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4168               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4169               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4170               "[$s, \\{$l, $x\\}];",
4171               []>;
4172 def SULD_1D_ARRAY_V4I32_TRAP
4173   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4174               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4175               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4176               "[$s, \\{$l, $x\\}];",
4177               []>;
4178
4179 def SULD_2D_V4I8_TRAP
4180   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4181               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4182               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4183               []>;
4184 def SULD_2D_V4I16_TRAP
4185   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4186               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4187               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4188               []>;
4189 def SULD_2D_V4I32_TRAP
4190   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4191               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4193               []>;
4194
4195 def SULD_2D_ARRAY_V4I8_TRAP
4196   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4197               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4198               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4199               "[$s, \\{$l, $x, $y, $y\\}];",
4200               []>;
4201 def SULD_2D_ARRAY_V4I16_TRAP
4202   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4203               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4204               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4205               "[$s, \\{$l, $x, $y, $y\\}];",
4206               []>;
4207 def SULD_2D_ARRAY_V4I32_TRAP
4208   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4210               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4211               "[$s, \\{$l, $x, $y, $y\\}];",
4212               []>;
4213
4214
4215 def SULD_3D_V4I8_TRAP
4216   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4217               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4218               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4219               "[$s, \\{$x, $y, $z, $z\\}];",
4220               []>;
4221 def SULD_3D_V4I16_TRAP
4222   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4223               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4224               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4225               "[$s, \\{$x, $y, $z, $z\\}];",
4226               []>;
4227 def SULD_3D_V4I32_TRAP
4228   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4229               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4230               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4231               "[$s, \\{$x, $y, $z, $z\\}];",
4232               []>;
4233 }
4234
4235 // .zero variant
4236 let IsSuld = 1 in {
4237 def SULD_1D_I8_ZERO
4238   : NVPTXInst<(outs Int16Regs:$r),
4239               (ins Int64Regs:$s, Int32Regs:$x),
4240               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4241               []>;
4242 def SULD_1D_I16_ZERO
4243   : NVPTXInst<(outs Int16Regs:$r),
4244               (ins Int64Regs:$s, Int32Regs:$x),
4245               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4246               []>;
4247 def SULD_1D_I32_ZERO
4248   : NVPTXInst<(outs Int32Regs:$r),
4249               (ins Int64Regs:$s, Int32Regs:$x),
4250               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4251               []>;
4252 def SULD_1D_I64_ZERO
4253   : NVPTXInst<(outs Int64Regs:$r),
4254               (ins Int64Regs:$s, Int32Regs:$x),
4255               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4256               []>;
4257
4258 def SULD_1D_ARRAY_I8_ZERO
4259   : NVPTXInst<(outs Int16Regs:$r),
4260               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4261               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4262               []>;
4263 def SULD_1D_ARRAY_I16_ZERO
4264   : NVPTXInst<(outs Int16Regs:$r),
4265               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4266               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4267               []>;
4268 def SULD_1D_ARRAY_I32_ZERO
4269   : NVPTXInst<(outs Int32Regs:$r),
4270               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4271               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4272               []>;
4273 def SULD_1D_ARRAY_I64_ZERO
4274   : NVPTXInst<(outs Int64Regs:$r),
4275               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4276               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4277               []>;
4278
4279 def SULD_2D_I8_ZERO
4280   : NVPTXInst<(outs Int16Regs:$r),
4281               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4282               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4283               []>;
4284 def SULD_2D_I16_ZERO
4285   : NVPTXInst<(outs Int16Regs:$r),
4286               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4287               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4288               []>;
4289 def SULD_2D_I32_ZERO
4290   : NVPTXInst<(outs Int32Regs:$r),
4291               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4292               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4293               []>;
4294 def SULD_2D_I64_ZERO
4295   : NVPTXInst<(outs Int64Regs:$r),
4296               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4297               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4298               []>;
4299
4300 def SULD_2D_ARRAY_I8_ZERO
4301   : NVPTXInst<(outs Int16Regs:$r),
4302               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4303               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4304               []>;
4305 def SULD_2D_ARRAY_I16_ZERO
4306   : NVPTXInst<(outs Int16Regs:$r),
4307               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4308               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4309               []>;
4310 def SULD_2D_ARRAY_I32_ZERO
4311   : NVPTXInst<(outs Int32Regs:$r),
4312               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4313               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4314               []>;
4315 def SULD_2D_ARRAY_I64_ZERO
4316   : NVPTXInst<(outs Int64Regs:$r),
4317               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4318               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4319               []>;
4320
4321 def SULD_3D_I8_ZERO
4322   : NVPTXInst<(outs Int16Regs:$r),
4323               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4324               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4325               []>;
4326 def SULD_3D_I16_ZERO
4327   : NVPTXInst<(outs Int16Regs:$r),
4328               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4329               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4330               []>;
4331 def SULD_3D_I32_ZERO
4332   : NVPTXInst<(outs Int32Regs:$r),
4333               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4334               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4335               []>;
4336 def SULD_3D_I64_ZERO
4337   : NVPTXInst<(outs Int64Regs:$r),
4338               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4339               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4340               []>;
4341 }
4342
4343 let IsSuld = 2 in {
4344 def SULD_1D_V2I8_ZERO
4345   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4346               (ins Int64Regs:$s, Int32Regs:$x),
4347               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4348               []>;
4349 def SULD_1D_V2I16_ZERO
4350   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4351               (ins Int64Regs:$s, Int32Regs:$x),
4352               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4353               []>;
4354 def SULD_1D_V2I32_ZERO
4355   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4356               (ins Int64Regs:$s, Int32Regs:$x),
4357               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4358               []>;
4359 def SULD_1D_V2I64_ZERO
4360   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4361               (ins Int64Regs:$s, Int32Regs:$x),
4362               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4363               []>;
4364
4365 def SULD_1D_ARRAY_V2I8_ZERO
4366   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4367               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4368               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4369               []>;
4370 def SULD_1D_ARRAY_V2I16_ZERO
4371   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4372               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4373               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4374               []>;
4375 def SULD_1D_ARRAY_V2I32_ZERO
4376   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4377               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4378               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4379               []>;
4380 def SULD_1D_ARRAY_V2I64_ZERO
4381   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4382               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4383               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4384               []>;
4385
4386 def SULD_2D_V2I8_ZERO
4387   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4388               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4389               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4390               []>;
4391 def SULD_2D_V2I16_ZERO
4392   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4393               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4394               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4395               []>;
4396 def SULD_2D_V2I32_ZERO
4397   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4398               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4399               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4400               []>;
4401 def SULD_2D_V2I64_ZERO
4402   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4403               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4404               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4405               []>;
4406
4407 def SULD_2D_ARRAY_V2I8_ZERO
4408   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4409               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4410               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4411               "[$s, \\{$l, $x, $y, $y\\}];",
4412               []>;
4413 def SULD_2D_ARRAY_V2I16_ZERO
4414   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4415               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4416               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4417               "[$s, \\{$l, $x, $y, $y\\}];",
4418               []>;
4419 def SULD_2D_ARRAY_V2I32_ZERO
4420   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4421               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4422               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4423               "[$s, \\{$l, $x, $y, $y\\}];",
4424               []>;
4425 def SULD_2D_ARRAY_V2I64_ZERO
4426   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4427               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4428               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4429               "[$s, \\{$l, $x, $y, $y\\}];",
4430               []>;
4431
4432 def SULD_3D_V2I8_ZERO
4433   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4434               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4435               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4436               []>;
4437 def SULD_3D_V2I16_ZERO
4438   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4439               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4440               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4441               []>;
4442 def SULD_3D_V2I32_ZERO
4443   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4444               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4446               []>;
4447 def SULD_3D_V2I64_ZERO
4448   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4449               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4450               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4451               []>;
4452 }
4453
4454 let IsSuld = 3 in {
4455 def SULD_1D_V4I8_ZERO
4456   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4457               (ins Int64Regs:$s, Int32Regs:$x),
4458               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4459               []>;
4460 def SULD_1D_V4I16_ZERO
4461   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4462               (ins Int64Regs:$s, Int32Regs:$x),
4463               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4464               []>;
4465 def SULD_1D_V4I32_ZERO
4466   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4467               (ins Int64Regs:$s, Int32Regs:$x),
4468               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4469               []>;
4470
4471 def SULD_1D_ARRAY_V4I8_ZERO
4472   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4473               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4474               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4475               "[$s, \\{$l, $x\\}];",
4476               []>;
4477 def SULD_1D_ARRAY_V4I16_ZERO
4478   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4479               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4480               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4481               "[$s, \\{$l, $x\\}];",
4482               []>;
4483 def SULD_1D_ARRAY_V4I32_ZERO
4484   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4485               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4486               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4487               "[$s, \\{$l, $x\\}];",
4488               []>;
4489
4490 def SULD_2D_V4I8_ZERO
4491   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4492               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4493               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4494               []>;
4495 def SULD_2D_V4I16_ZERO
4496   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4497               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4498               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4499               []>;
4500 def SULD_2D_V4I32_ZERO
4501   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4502               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4504               []>;
4505
4506 def SULD_2D_ARRAY_V4I8_ZERO
4507   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4508               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4509               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4510               "[$s, \\{$l, $x, $y, $y\\}];",
4511               []>;
4512 def SULD_2D_ARRAY_V4I16_ZERO
4513   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4514               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4515               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4516               "[$s, \\{$l, $x, $y, $y\\}];",
4517               []>;
4518 def SULD_2D_ARRAY_V4I32_ZERO
4519   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4521               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4522               "[$s, \\{$l, $x, $y, $y\\}];",
4523               []>;
4524
4525
4526 def SULD_3D_V4I8_ZERO
4527   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4528               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4529               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4530               "[$s, \\{$x, $y, $z, $z\\}];",
4531               []>;
4532 def SULD_3D_V4I16_ZERO
4533   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4534               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4535               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4536               "[$s, \\{$x, $y, $z, $z\\}];",
4537               []>;
4538 def SULD_3D_V4I32_ZERO
4539   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4540               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4541               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4542               "[$s, \\{$x, $y, $z, $z\\}];",
4543               []>;
4544 }
4545
4546 //-----------------------------------
4547 // Texture Query Intrinsics
4548 //-----------------------------------
4549
4550 let IsSurfTexQuery = 1 in {
4551 def TXQ_CHANNEL_ORDER
4552   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4553               "txq.channel_order.b32 \t$d, [$a];",
4554               []>;
4555 def TXQ_CHANNEL_DATA_TYPE
4556   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4557               "txq.channel_data_type.b32 \t$d, [$a];",
4558               []>;
4559 def TXQ_WIDTH
4560   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4561               "txq.width.b32 \t$d, [$a];",
4562               []>;
4563 def TXQ_HEIGHT
4564   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4565               "txq.height.b32 \t$d, [$a];",
4566               []>;
4567 def TXQ_DEPTH
4568   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4569               "txq.depth.b32 \t$d, [$a];",
4570               []>;
4571 def TXQ_ARRAY_SIZE
4572   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4573               "txq.array_size.b32 \t$d, [$a];",
4574               []>;
4575 def TXQ_NUM_SAMPLES
4576   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4577               "txq.num_samples.b32 \t$d, [$a];",
4578               []>;
4579 def TXQ_NUM_MIPMAP_LEVELS
4580   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4581               "txq.num_mipmap_levels.b32 \t$d, [$a];",
4582               []>;
4583 }
4584
4585 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4586           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4587 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4588           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4589 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4590           (TXQ_WIDTH Int64Regs:$a)>;
4591 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4592           (TXQ_HEIGHT Int64Regs:$a)>;
4593 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4594           (TXQ_DEPTH Int64Regs:$a)>;
4595 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4596           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4597 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4598           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4599 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4600           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4601
4602
4603 //-----------------------------------
4604 // Surface Query Intrinsics
4605 //-----------------------------------
4606
4607 let IsSurfTexQuery = 1 in {
4608 def SUQ_CHANNEL_ORDER
4609   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610               "suq.channel_order.b32 \t$d, [$a];",
4611               []>;
4612 def SUQ_CHANNEL_DATA_TYPE
4613   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614               "suq.channel_data_type.b32 \t$d, [$a];",
4615               []>;
4616 def SUQ_WIDTH
4617   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618               "suq.width.b32 \t$d, [$a];",
4619               []>;
4620 def SUQ_HEIGHT
4621   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622               "suq.height.b32 \t$d, [$a];",
4623               []>;
4624 def SUQ_DEPTH
4625   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626               "suq.depth.b32 \t$d, [$a];",
4627               []>;
4628 def SUQ_ARRAY_SIZE
4629   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630               "suq.array_size.b32 \t$d, [$a];",
4631               []>;
4632 }
4633
4634 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4635           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4636 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4637           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4638 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4639           (SUQ_WIDTH Int64Regs:$a)>;
4640 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4641           (SUQ_HEIGHT Int64Regs:$a)>;
4642 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4643           (SUQ_DEPTH Int64Regs:$a)>;
4644 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4645           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4646
4647
4648 //===- Handle Query -------------------------------------------------------===//
4649
4650 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4651 def ISTYPEP_SAMPLER
4652   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4653               "istypep.samplerref \t$d, $a;",
4654               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4655 def ISTYPEP_SURFACE
4656   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4657               "istypep.surfref \t$d, $a;",
4658               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4659 def ISTYPEP_TEXTURE
4660   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4661               "istypep.texref \t$d, $a;",
4662               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4663
4664 //===- Surface Stores -----------------------------------------------------===//
4665
4666 let IsSust = 1 in {
4667 // Unformatted
4668 // .clamp variant
4669 def SUST_B_1D_B8_CLAMP
4670   : NVPTXInst<(outs),
4671               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4672               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4673               []>;
4674 def SUST_B_1D_B16_CLAMP
4675   : NVPTXInst<(outs),
4676               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4677               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4678               []>;
4679 def SUST_B_1D_B32_CLAMP
4680   : NVPTXInst<(outs),
4681               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4682               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4683               []>;
4684 def SUST_B_1D_B64_CLAMP
4685   : NVPTXInst<(outs),
4686               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4687               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4688               []>;
4689 def SUST_B_1D_V2B8_CLAMP
4690   : NVPTXInst<(outs),
4691               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4692               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4693               []>;
4694 def SUST_B_1D_V2B16_CLAMP
4695   : NVPTXInst<(outs),
4696               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4697               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4698               []>;
4699 def SUST_B_1D_V2B32_CLAMP
4700   : NVPTXInst<(outs),
4701               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4702               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4703               []>;
4704 def SUST_B_1D_V2B64_CLAMP
4705   : NVPTXInst<(outs),
4706               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4707               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4708               []>;
4709 def SUST_B_1D_V4B8_CLAMP
4710   : NVPTXInst<(outs),
4711               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4712                    Int16Regs:$b, Int16Regs:$a),
4713               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4714               []>;
4715 def SUST_B_1D_V4B16_CLAMP
4716   : NVPTXInst<(outs),
4717               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4718                    Int16Regs:$b, Int16Regs:$a),
4719               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4720               []>;
4721 def SUST_B_1D_V4B32_CLAMP
4722   : NVPTXInst<(outs),
4723               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4724                    Int32Regs:$b, Int32Regs:$a),
4725               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4726               []>;
4727
4728
4729 def SUST_B_1D_ARRAY_B8_CLAMP
4730   : NVPTXInst<(outs),
4731               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4732               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4733               []>;
4734 def SUST_B_1D_ARRAY_B16_CLAMP
4735   : NVPTXInst<(outs),
4736               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4737               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4738               []>;
4739 def SUST_B_1D_ARRAY_B32_CLAMP
4740   : NVPTXInst<(outs),
4741               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4742               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4743               []>;
4744 def SUST_B_1D_ARRAY_B64_CLAMP
4745   : NVPTXInst<(outs),
4746               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4747               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4748               []>;
4749 def SUST_B_1D_ARRAY_V2B8_CLAMP
4750   : NVPTXInst<(outs),
4751               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4752                    Int16Regs:$g),
4753               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4754               []>;
4755 def SUST_B_1D_ARRAY_V2B16_CLAMP
4756   : NVPTXInst<(outs),
4757               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4758                    Int16Regs:$g),
4759               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4760               []>;
4761 def SUST_B_1D_ARRAY_V2B32_CLAMP
4762   : NVPTXInst<(outs),
4763               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4764                    Int32Regs:$g),
4765               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4766               []>;
4767 def SUST_B_1D_ARRAY_V2B64_CLAMP
4768   : NVPTXInst<(outs),
4769               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4770                    Int64Regs:$g),
4771               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4772               []>;
4773 def SUST_B_1D_ARRAY_V4B8_CLAMP
4774   : NVPTXInst<(outs),
4775               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4776                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4777               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4778               "\\{$r, $g, $b, $a\\};",
4779               []>;
4780 def SUST_B_1D_ARRAY_V4B16_CLAMP
4781   : NVPTXInst<(outs),
4782               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4783                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4784              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4785              "\\{$r, $g, $b, $a\\};",
4786               []>;
4787 def SUST_B_1D_ARRAY_V4B32_CLAMP
4788   : NVPTXInst<(outs),
4789               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4790                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4791              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4792              "\\{$r, $g, $b, $a\\};",
4793               []>;
4794
4795
4796 def SUST_B_2D_B8_CLAMP
4797   : NVPTXInst<(outs),
4798               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4799               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4800               []>;
4801 def SUST_B_2D_B16_CLAMP
4802   : NVPTXInst<(outs),
4803               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4804               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4805               []>;
4806 def SUST_B_2D_B32_CLAMP
4807   : NVPTXInst<(outs),
4808               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4809               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4810               []>;
4811 def SUST_B_2D_B64_CLAMP
4812   : NVPTXInst<(outs),
4813               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4814               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4815               []>;
4816 def SUST_B_2D_V2B8_CLAMP
4817   : NVPTXInst<(outs),
4818               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4819                    Int16Regs:$g),
4820               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4821               []>;
4822 def SUST_B_2D_V2B16_CLAMP
4823   : NVPTXInst<(outs),
4824               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4825                    Int16Regs:$g),
4826               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4827               []>;
4828 def SUST_B_2D_V2B32_CLAMP
4829   : NVPTXInst<(outs),
4830               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4831                    Int32Regs:$g),
4832               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4833               []>;
4834 def SUST_B_2D_V2B64_CLAMP
4835   : NVPTXInst<(outs),
4836               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4837                    Int64Regs:$g),
4838               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4839               []>;
4840 def SUST_B_2D_V4B8_CLAMP
4841   : NVPTXInst<(outs),
4842               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4843                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4844               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4845               "\\{$r, $g, $b, $a\\};",
4846               []>;
4847 def SUST_B_2D_V4B16_CLAMP
4848   : NVPTXInst<(outs),
4849               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4850                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4851              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4852              "\\{$r, $g, $b, $a\\};",
4853               []>;
4854 def SUST_B_2D_V4B32_CLAMP
4855   : NVPTXInst<(outs),
4856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4857                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4858              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4859              "\\{$r, $g, $b, $a\\};",
4860               []>;
4861
4862
4863 def SUST_B_2D_ARRAY_B8_CLAMP
4864   : NVPTXInst<(outs),
4865               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4866                    Int16Regs:$r),
4867               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4868               []>;
4869 def SUST_B_2D_ARRAY_B16_CLAMP
4870   : NVPTXInst<(outs),
4871               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4872                    Int16Regs:$r),
4873               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4874               []>;
4875 def SUST_B_2D_ARRAY_B32_CLAMP
4876   : NVPTXInst<(outs),
4877               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4878                    Int32Regs:$r),
4879               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4880               []>;
4881 def SUST_B_2D_ARRAY_B64_CLAMP
4882   : NVPTXInst<(outs),
4883               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4884                    Int64Regs:$r),
4885               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4886               []>;
4887 def SUST_B_2D_ARRAY_V2B8_CLAMP
4888   : NVPTXInst<(outs),
4889               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4890                    Int16Regs:$r, Int16Regs:$g),
4891               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4892               "\\{$r, $g\\};",
4893               []>;
4894 def SUST_B_2D_ARRAY_V2B16_CLAMP
4895   : NVPTXInst<(outs),
4896               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4897                    Int16Regs:$r, Int16Regs:$g),
4898              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4899              "\\{$r, $g\\};",
4900               []>;
4901 def SUST_B_2D_ARRAY_V2B32_CLAMP
4902   : NVPTXInst<(outs),
4903               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4904                    Int32Regs:$r, Int32Regs:$g),
4905              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4906              "\\{$r, $g\\};",
4907               []>;
4908 def SUST_B_2D_ARRAY_V2B64_CLAMP
4909   : NVPTXInst<(outs),
4910               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911                    Int64Regs:$r, Int64Regs:$g),
4912              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4913              "\\{$r, $g\\};",
4914               []>;
4915 def SUST_B_2D_ARRAY_V4B8_CLAMP
4916   : NVPTXInst<(outs),
4917               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4918                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4919       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4920       "\\{$r, $g, $b, $a\\};",
4921               []>;
4922 def SUST_B_2D_ARRAY_V4B16_CLAMP
4923   : NVPTXInst<(outs),
4924               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4925                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4926      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4927      "\\{$r, $g, $b, $a\\};",
4928               []>;
4929 def SUST_B_2D_ARRAY_V4B32_CLAMP
4930   : NVPTXInst<(outs),
4931               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4932                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4933      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4934      "\\{$r, $g, $b, $a\\};",
4935               []>;
4936
4937
4938 def SUST_B_3D_B8_CLAMP
4939   : NVPTXInst<(outs),
4940               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4941                    Int16Regs:$r),
4942               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4943               []>;
4944 def SUST_B_3D_B16_CLAMP
4945   : NVPTXInst<(outs),
4946               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4947                    Int16Regs:$r),
4948               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4949               []>;
4950 def SUST_B_3D_B32_CLAMP
4951   : NVPTXInst<(outs),
4952               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4953                    Int32Regs:$r),
4954               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4955               []>;
4956 def SUST_B_3D_B64_CLAMP
4957   : NVPTXInst<(outs),
4958               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4959                    Int64Regs:$r),
4960               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4961               []>;
4962 def SUST_B_3D_V2B8_CLAMP
4963   : NVPTXInst<(outs),
4964               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4965                    Int16Regs:$r, Int16Regs:$g),
4966               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4967               "\\{$r, $g\\};",
4968               []>;
4969 def SUST_B_3D_V2B16_CLAMP
4970   : NVPTXInst<(outs),
4971               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972                    Int16Regs:$r, Int16Regs:$g),
4973               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4974               "\\{$r, $g\\};",
4975               []>;
4976 def SUST_B_3D_V2B32_CLAMP
4977   : NVPTXInst<(outs),
4978               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4979                    Int32Regs:$r, Int32Regs:$g),
4980               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4981               "\\{$r, $g\\};",
4982               []>;
4983 def SUST_B_3D_V2B64_CLAMP
4984   : NVPTXInst<(outs),
4985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986                    Int64Regs:$r, Int64Regs:$g),
4987               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4988               "\\{$r, $g\\};",
4989               []>;
4990 def SUST_B_3D_V4B8_CLAMP
4991   : NVPTXInst<(outs),
4992               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4993                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4994          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4995          "\\{$r, $g, $b, $a\\};",
4996               []>;
4997 def SUST_B_3D_V4B16_CLAMP
4998   : NVPTXInst<(outs),
4999               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5000                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5001         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5002         "\\{$r, $g, $b, $a\\};",
5003               []>;
5004 def SUST_B_3D_V4B32_CLAMP
5005   : NVPTXInst<(outs),
5006               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5007                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5008         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5009         "\\{$r, $g, $b, $a\\};",
5010               []>;
5011
5012
5013 // .trap variant
5014 def SUST_B_1D_B8_TRAP
5015   : NVPTXInst<(outs),
5016               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5017               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5018               []>;
5019 def SUST_B_1D_B16_TRAP
5020   : NVPTXInst<(outs),
5021               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5022               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5023               []>;
5024 def SUST_B_1D_B32_TRAP
5025   : NVPTXInst<(outs),
5026               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5027               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5028               []>;
5029 def SUST_B_1D_B64_TRAP
5030   : NVPTXInst<(outs),
5031               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5032               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5033               []>;
5034 def SUST_B_1D_V2B8_TRAP
5035   : NVPTXInst<(outs),
5036               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5037               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5038               []>;
5039 def SUST_B_1D_V2B16_TRAP
5040   : NVPTXInst<(outs),
5041               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5042               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5043               []>;
5044 def SUST_B_1D_V2B32_TRAP
5045   : NVPTXInst<(outs),
5046               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5047               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5048               []>;
5049 def SUST_B_1D_V2B64_TRAP
5050   : NVPTXInst<(outs),
5051               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5052               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5053               []>;
5054 def SUST_B_1D_V4B8_TRAP
5055   : NVPTXInst<(outs),
5056               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5057                    Int16Regs:$b, Int16Regs:$a),
5058               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5059               []>;
5060 def SUST_B_1D_V4B16_TRAP
5061   : NVPTXInst<(outs),
5062               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5063                    Int16Regs:$b, Int16Regs:$a),
5064               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5065               []>;
5066 def SUST_B_1D_V4B32_TRAP
5067   : NVPTXInst<(outs),
5068               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5069                    Int32Regs:$b, Int32Regs:$a),
5070               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5071               []>;
5072
5073
5074 def SUST_B_1D_ARRAY_B8_TRAP
5075   : NVPTXInst<(outs),
5076               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5077               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5078               []>;
5079 def SUST_B_1D_ARRAY_B16_TRAP
5080   : NVPTXInst<(outs),
5081               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5082               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5083               []>;
5084 def SUST_B_1D_ARRAY_B32_TRAP
5085   : NVPTXInst<(outs),
5086               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5087               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5088               []>;
5089 def SUST_B_1D_ARRAY_B64_TRAP
5090   : NVPTXInst<(outs),
5091               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5092               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5093               []>;
5094 def SUST_B_1D_ARRAY_V2B8_TRAP
5095   : NVPTXInst<(outs),
5096               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5097                    Int16Regs:$g),
5098               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5099               []>;
5100 def SUST_B_1D_ARRAY_V2B16_TRAP
5101   : NVPTXInst<(outs),
5102               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5103                    Int16Regs:$g),
5104               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5105               []>;
5106 def SUST_B_1D_ARRAY_V2B32_TRAP
5107   : NVPTXInst<(outs),
5108               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5109                    Int32Regs:$g),
5110               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5111               []>;
5112 def SUST_B_1D_ARRAY_V2B64_TRAP
5113   : NVPTXInst<(outs),
5114               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5115                    Int64Regs:$g),
5116               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5117               []>;
5118 def SUST_B_1D_ARRAY_V4B8_TRAP
5119   : NVPTXInst<(outs),
5120               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5121                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5122               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5123               "\\{$r, $g, $b, $a\\};",
5124               []>;
5125 def SUST_B_1D_ARRAY_V4B16_TRAP
5126   : NVPTXInst<(outs),
5127               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5128                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5129              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5130              "\\{$r, $g, $b, $a\\};",
5131               []>;
5132 def SUST_B_1D_ARRAY_V4B32_TRAP
5133   : NVPTXInst<(outs),
5134               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5135                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5136              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5137              "\\{$r, $g, $b, $a\\};",
5138               []>;
5139
5140
5141 def SUST_B_2D_B8_TRAP
5142   : NVPTXInst<(outs),
5143               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5144               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5145               []>;
5146 def SUST_B_2D_B16_TRAP
5147   : NVPTXInst<(outs),
5148               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5149               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5150               []>;
5151 def SUST_B_2D_B32_TRAP
5152   : NVPTXInst<(outs),
5153               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5154               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5155               []>;
5156 def SUST_B_2D_B64_TRAP
5157   : NVPTXInst<(outs),
5158               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5159               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5160               []>;
5161 def SUST_B_2D_V2B8_TRAP
5162   : NVPTXInst<(outs),
5163               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5164                    Int16Regs:$g),
5165               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5166               []>;
5167 def SUST_B_2D_V2B16_TRAP
5168   : NVPTXInst<(outs),
5169               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5170                    Int16Regs:$g),
5171               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5172               []>;
5173 def SUST_B_2D_V2B32_TRAP
5174   : NVPTXInst<(outs),
5175               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5176                    Int32Regs:$g),
5177               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5178               []>;
5179 def SUST_B_2D_V2B64_TRAP
5180   : NVPTXInst<(outs),
5181               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5182                    Int64Regs:$g),
5183               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5184               []>;
5185 def SUST_B_2D_V4B8_TRAP
5186   : NVPTXInst<(outs),
5187               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5188                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5189               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5190               "\\{$r, $g, $b, $a\\};",
5191               []>;
5192 def SUST_B_2D_V4B16_TRAP
5193   : NVPTXInst<(outs),
5194               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5195                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5196              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5197              "\\{$r, $g, $b, $a\\};",
5198               []>;
5199 def SUST_B_2D_V4B32_TRAP
5200   : NVPTXInst<(outs),
5201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5202                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5203              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5204              "\\{$r, $g, $b, $a\\};",
5205               []>;
5206
5207
5208 def SUST_B_2D_ARRAY_B8_TRAP
5209   : NVPTXInst<(outs),
5210               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5211                    Int16Regs:$r),
5212               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5213               []>;
5214 def SUST_B_2D_ARRAY_B16_TRAP
5215   : NVPTXInst<(outs),
5216               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5217                    Int16Regs:$r),
5218               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5219               []>;
5220 def SUST_B_2D_ARRAY_B32_TRAP
5221   : NVPTXInst<(outs),
5222               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5223                    Int32Regs:$r),
5224               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5225               []>;
5226 def SUST_B_2D_ARRAY_B64_TRAP
5227   : NVPTXInst<(outs),
5228               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5229                    Int64Regs:$r),
5230               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5231               []>;
5232 def SUST_B_2D_ARRAY_V2B8_TRAP
5233   : NVPTXInst<(outs),
5234               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5235                    Int16Regs:$r, Int16Regs:$g),
5236               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5237               "\\{$r, $g\\};",
5238               []>;
5239 def SUST_B_2D_ARRAY_V2B16_TRAP
5240   : NVPTXInst<(outs),
5241               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5242                    Int16Regs:$r, Int16Regs:$g),
5243              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5244              "\\{$r, $g\\};",
5245               []>;
5246 def SUST_B_2D_ARRAY_V2B32_TRAP
5247   : NVPTXInst<(outs),
5248               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5249                    Int32Regs:$r, Int32Regs:$g),
5250              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5251              "\\{$r, $g\\};",
5252               []>;
5253 def SUST_B_2D_ARRAY_V2B64_TRAP
5254   : NVPTXInst<(outs),
5255               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256                    Int64Regs:$r, Int64Regs:$g),
5257              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5258              "\\{$r, $g\\};",
5259               []>;
5260 def SUST_B_2D_ARRAY_V4B8_TRAP
5261   : NVPTXInst<(outs),
5262               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5263                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5264       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5265       "\\{$r, $g, $b, $a\\};",
5266               []>;
5267 def SUST_B_2D_ARRAY_V4B16_TRAP
5268   : NVPTXInst<(outs),
5269               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5270                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5271      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5272      "\\{$r, $g, $b, $a\\};",
5273               []>;
5274 def SUST_B_2D_ARRAY_V4B32_TRAP
5275   : NVPTXInst<(outs),
5276               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5277                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5278      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5279      "\\{$r, $g, $b, $a\\};",
5280               []>;
5281
5282
5283 def SUST_B_3D_B8_TRAP
5284   : NVPTXInst<(outs),
5285               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5286                    Int16Regs:$r),
5287               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5288               []>;
5289 def SUST_B_3D_B16_TRAP
5290   : NVPTXInst<(outs),
5291               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5292                    Int16Regs:$r),
5293               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5294               []>;
5295 def SUST_B_3D_B32_TRAP
5296   : NVPTXInst<(outs),
5297               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5298                    Int32Regs:$r),
5299               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5300               []>;
5301 def SUST_B_3D_B64_TRAP
5302   : NVPTXInst<(outs),
5303               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5304                    Int64Regs:$r),
5305               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5306               []>;
5307 def SUST_B_3D_V2B8_TRAP
5308   : NVPTXInst<(outs),
5309               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5310                    Int16Regs:$r, Int16Regs:$g),
5311               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5312               "\\{$r, $g\\};",
5313               []>;
5314 def SUST_B_3D_V2B16_TRAP
5315   : NVPTXInst<(outs),
5316               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5317                    Int16Regs:$r, Int16Regs:$g),
5318               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5319               "\\{$r, $g\\};",
5320               []>;
5321 def SUST_B_3D_V2B32_TRAP
5322   : NVPTXInst<(outs),
5323               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5324                    Int32Regs:$r, Int32Regs:$g),
5325               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5326               "\\{$r, $g\\};",
5327               []>;
5328 def SUST_B_3D_V2B64_TRAP
5329   : NVPTXInst<(outs),
5330               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331                    Int64Regs:$r, Int64Regs:$g),
5332               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5333               "\\{$r, $g\\};",
5334               []>;
5335 def SUST_B_3D_V4B8_TRAP
5336   : NVPTXInst<(outs),
5337               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5338                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5339          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5340          "\\{$r, $g, $b, $a\\};",
5341               []>;
5342 def SUST_B_3D_V4B16_TRAP
5343   : NVPTXInst<(outs),
5344               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5346         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5347         "\\{$r, $g, $b, $a\\};",
5348               []>;
5349 def SUST_B_3D_V4B32_TRAP
5350   : NVPTXInst<(outs),
5351               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5353         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5354         "\\{$r, $g, $b, $a\\};",
5355               []>;
5356
5357
5358 // .zero variant
5359 def SUST_B_1D_B8_ZERO
5360   : NVPTXInst<(outs),
5361               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5362               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5363               []>;
5364 def SUST_B_1D_B16_ZERO
5365   : NVPTXInst<(outs),
5366               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5368               []>;
5369 def SUST_B_1D_B32_ZERO
5370   : NVPTXInst<(outs),
5371               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5372               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5373               []>;
5374 def SUST_B_1D_B64_ZERO
5375   : NVPTXInst<(outs),
5376               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5377               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5378               []>;
5379 def SUST_B_1D_V2B8_ZERO
5380   : NVPTXInst<(outs),
5381               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5382               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5383               []>;
5384 def SUST_B_1D_V2B16_ZERO
5385   : NVPTXInst<(outs),
5386               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5387               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5388               []>;
5389 def SUST_B_1D_V2B32_ZERO
5390   : NVPTXInst<(outs),
5391               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5392               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5393               []>;
5394 def SUST_B_1D_V2B64_ZERO
5395   : NVPTXInst<(outs),
5396               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5397               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5398               []>;
5399 def SUST_B_1D_V4B8_ZERO
5400   : NVPTXInst<(outs),
5401               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5402                    Int16Regs:$b, Int16Regs:$a),
5403               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5404               []>;
5405 def SUST_B_1D_V4B16_ZERO
5406   : NVPTXInst<(outs),
5407               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5408                    Int16Regs:$b, Int16Regs:$a),
5409               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5410               []>;
5411 def SUST_B_1D_V4B32_ZERO
5412   : NVPTXInst<(outs),
5413               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5414                    Int32Regs:$b, Int32Regs:$a),
5415               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5416               []>;
5417
5418
5419 def SUST_B_1D_ARRAY_B8_ZERO
5420   : NVPTXInst<(outs),
5421               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5422               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5423               []>;
5424 def SUST_B_1D_ARRAY_B16_ZERO
5425   : NVPTXInst<(outs),
5426               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5427               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5428               []>;
5429 def SUST_B_1D_ARRAY_B32_ZERO
5430   : NVPTXInst<(outs),
5431               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5432               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5433               []>;
5434 def SUST_B_1D_ARRAY_B64_ZERO
5435   : NVPTXInst<(outs),
5436               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5437               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5438               []>;
5439 def SUST_B_1D_ARRAY_V2B8_ZERO
5440   : NVPTXInst<(outs),
5441               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5442                    Int16Regs:$g),
5443               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5444               []>;
5445 def SUST_B_1D_ARRAY_V2B16_ZERO
5446   : NVPTXInst<(outs),
5447               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5448                    Int16Regs:$g),
5449               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5450               []>;
5451 def SUST_B_1D_ARRAY_V2B32_ZERO
5452   : NVPTXInst<(outs),
5453               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5454                    Int32Regs:$g),
5455               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5456               []>;
5457 def SUST_B_1D_ARRAY_V2B64_ZERO
5458   : NVPTXInst<(outs),
5459               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5460                    Int64Regs:$g),
5461               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5462               []>;
5463 def SUST_B_1D_ARRAY_V4B8_ZERO
5464   : NVPTXInst<(outs),
5465               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5466                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5468               "\\{$r, $g, $b, $a\\};",
5469               []>;
5470 def SUST_B_1D_ARRAY_V4B16_ZERO
5471   : NVPTXInst<(outs),
5472               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5473                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5474              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5475              "\\{$r, $g, $b, $a\\};",
5476               []>;
5477 def SUST_B_1D_ARRAY_V4B32_ZERO
5478   : NVPTXInst<(outs),
5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5480                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5481              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5482              "\\{$r, $g, $b, $a\\};",
5483               []>;
5484
5485
5486 def SUST_B_2D_B8_ZERO
5487   : NVPTXInst<(outs),
5488               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5489               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5490               []>;
5491 def SUST_B_2D_B16_ZERO
5492   : NVPTXInst<(outs),
5493               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5494               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5495               []>;
5496 def SUST_B_2D_B32_ZERO
5497   : NVPTXInst<(outs),
5498               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5499               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5500               []>;
5501 def SUST_B_2D_B64_ZERO
5502   : NVPTXInst<(outs),
5503               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5504               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5505               []>;
5506 def SUST_B_2D_V2B8_ZERO
5507   : NVPTXInst<(outs),
5508               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5509                    Int16Regs:$g),
5510               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5511               []>;
5512 def SUST_B_2D_V2B16_ZERO
5513   : NVPTXInst<(outs),
5514               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5515                    Int16Regs:$g),
5516               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5517               []>;
5518 def SUST_B_2D_V2B32_ZERO
5519   : NVPTXInst<(outs),
5520               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5521                    Int32Regs:$g),
5522               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5523               []>;
5524 def SUST_B_2D_V2B64_ZERO
5525   : NVPTXInst<(outs),
5526               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5527                    Int64Regs:$g),
5528               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5529               []>;
5530 def SUST_B_2D_V4B8_ZERO
5531   : NVPTXInst<(outs),
5532               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5533                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5534               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5535               "\\{$r, $g, $b, $a\\};",
5536               []>;
5537 def SUST_B_2D_V4B16_ZERO
5538   : NVPTXInst<(outs),
5539               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5540                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5541              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5542              "\\{$r, $g, $b, $a\\};",
5543               []>;
5544 def SUST_B_2D_V4B32_ZERO
5545   : NVPTXInst<(outs),
5546               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5547                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5548              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5549              "\\{$r, $g, $b, $a\\};",
5550               []>;
5551
5552
5553 def SUST_B_2D_ARRAY_B8_ZERO
5554   : NVPTXInst<(outs),
5555               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5556                    Int16Regs:$r),
5557               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5558               []>;
5559 def SUST_B_2D_ARRAY_B16_ZERO
5560   : NVPTXInst<(outs),
5561               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5562                    Int16Regs:$r),
5563               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5564               []>;
5565 def SUST_B_2D_ARRAY_B32_ZERO
5566   : NVPTXInst<(outs),
5567               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5568                    Int32Regs:$r),
5569               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5570               []>;
5571 def SUST_B_2D_ARRAY_B64_ZERO
5572   : NVPTXInst<(outs),
5573               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5574                    Int64Regs:$r),
5575               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5576               []>;
5577 def SUST_B_2D_ARRAY_V2B8_ZERO
5578   : NVPTXInst<(outs),
5579               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5580                    Int16Regs:$r, Int16Regs:$g),
5581               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5582               "\\{$r, $g\\};",
5583               []>;
5584 def SUST_B_2D_ARRAY_V2B16_ZERO
5585   : NVPTXInst<(outs),
5586               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5587                    Int16Regs:$r, Int16Regs:$g),
5588              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5589              "\\{$r, $g\\};",
5590               []>;
5591 def SUST_B_2D_ARRAY_V2B32_ZERO
5592   : NVPTXInst<(outs),
5593               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5594                    Int32Regs:$r, Int32Regs:$g),
5595              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5596              "\\{$r, $g\\};",
5597               []>;
5598 def SUST_B_2D_ARRAY_V2B64_ZERO
5599   : NVPTXInst<(outs),
5600               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5601                    Int64Regs:$r, Int64Regs:$g),
5602              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5603              "\\{$r, $g\\};",
5604               []>;
5605 def SUST_B_2D_ARRAY_V4B8_ZERO
5606   : NVPTXInst<(outs),
5607               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5608                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5609       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5610       "\\{$r, $g, $b, $a\\};",
5611               []>;
5612 def SUST_B_2D_ARRAY_V4B16_ZERO
5613   : NVPTXInst<(outs),
5614               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5615                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5616      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5617      "\\{$r, $g, $b, $a\\};",
5618               []>;
5619 def SUST_B_2D_ARRAY_V4B32_ZERO
5620   : NVPTXInst<(outs),
5621               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5622                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5623      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5624      "\\{$r, $g, $b, $a\\};",
5625               []>;
5626
5627
5628 def SUST_B_3D_B8_ZERO
5629   : NVPTXInst<(outs),
5630               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5631                    Int16Regs:$r),
5632               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5633               []>;
5634 def SUST_B_3D_B16_ZERO
5635   : NVPTXInst<(outs),
5636               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5637                    Int16Regs:$r),
5638               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5639               []>;
5640 def SUST_B_3D_B32_ZERO
5641   : NVPTXInst<(outs),
5642               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643                    Int32Regs:$r),
5644               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5645               []>;
5646 def SUST_B_3D_B64_ZERO
5647   : NVPTXInst<(outs),
5648               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5649                    Int64Regs:$r),
5650               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5651               []>;
5652 def SUST_B_3D_V2B8_ZERO
5653   : NVPTXInst<(outs),
5654               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5655                    Int16Regs:$r, Int16Regs:$g),
5656               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5657               "\\{$r, $g\\};",
5658               []>;
5659 def SUST_B_3D_V2B16_ZERO
5660   : NVPTXInst<(outs),
5661               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5662                    Int16Regs:$r, Int16Regs:$g),
5663               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5664               "\\{$r, $g\\};",
5665               []>;
5666 def SUST_B_3D_V2B32_ZERO
5667   : NVPTXInst<(outs),
5668               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5669                    Int32Regs:$r, Int32Regs:$g),
5670               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5671               "\\{$r, $g\\};",
5672               []>;
5673 def SUST_B_3D_V2B64_ZERO
5674   : NVPTXInst<(outs),
5675               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5676                    Int64Regs:$r, Int64Regs:$g),
5677               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5678               "\\{$r, $g\\};",
5679               []>;
5680 def SUST_B_3D_V4B8_ZERO
5681   : NVPTXInst<(outs),
5682               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5683                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5684          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5685          "\\{$r, $g, $b, $a\\};",
5686               []>;
5687 def SUST_B_3D_V4B16_ZERO
5688   : NVPTXInst<(outs),
5689               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5691         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5692         "\\{$r, $g, $b, $a\\};",
5693               []>;
5694 def SUST_B_3D_V4B32_ZERO
5695   : NVPTXInst<(outs),
5696               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5697                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5698         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5699         "\\{$r, $g, $b, $a\\};",
5700               []>;
5701
5702
5703
5704 // Formatted
5705
5706 def SUST_P_1D_B8_TRAP
5707   : NVPTXInst<(outs),
5708               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5709               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5710               []>;
5711 def SUST_P_1D_B16_TRAP
5712   : NVPTXInst<(outs),
5713               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5714               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5715               []>;
5716 def SUST_P_1D_B32_TRAP
5717   : NVPTXInst<(outs),
5718               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5719               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5720               []>;
5721 def SUST_P_1D_V2B8_TRAP
5722   : NVPTXInst<(outs),
5723               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5724               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5725               []>;
5726 def SUST_P_1D_V2B16_TRAP
5727   : NVPTXInst<(outs),
5728               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5729               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5730               []>;
5731 def SUST_P_1D_V2B32_TRAP
5732   : NVPTXInst<(outs),
5733               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5734               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5735               []>;
5736 def SUST_P_1D_V4B8_TRAP
5737   : NVPTXInst<(outs),
5738               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5739                    Int16Regs:$b, Int16Regs:$a),
5740               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5741               []>;
5742 def SUST_P_1D_V4B16_TRAP
5743   : NVPTXInst<(outs),
5744               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5745                    Int16Regs:$b, Int16Regs:$a),
5746               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5747               []>;
5748 def SUST_P_1D_V4B32_TRAP
5749   : NVPTXInst<(outs),
5750               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5751                    Int32Regs:$b, Int32Regs:$a),
5752               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5753               []>;
5754
5755
5756 def SUST_P_1D_ARRAY_B8_TRAP
5757   : NVPTXInst<(outs),
5758               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5759               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5760               []>;
5761 def SUST_P_1D_ARRAY_B16_TRAP
5762   : NVPTXInst<(outs),
5763               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5764               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5765               []>;
5766 def SUST_P_1D_ARRAY_B32_TRAP
5767   : NVPTXInst<(outs),
5768               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5769               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5770               []>;
5771 def SUST_P_1D_ARRAY_V2B8_TRAP
5772   : NVPTXInst<(outs),
5773               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5774                    Int16Regs:$g),
5775               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5776               []>;
5777 def SUST_P_1D_ARRAY_V2B16_TRAP
5778   : NVPTXInst<(outs),
5779               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5780                    Int16Regs:$g),
5781               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5782               []>;
5783 def SUST_P_1D_ARRAY_V2B32_TRAP
5784   : NVPTXInst<(outs),
5785               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5786                    Int32Regs:$g),
5787               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5788               []>;
5789 def SUST_P_1D_ARRAY_V4B8_TRAP
5790   : NVPTXInst<(outs),
5791               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5792                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5793               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5794               "\\{$r, $g, $b, $a\\};",
5795               []>;
5796 def SUST_P_1D_ARRAY_V4B16_TRAP
5797   : NVPTXInst<(outs),
5798               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5799                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5800              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5801              "\\{$r, $g, $b, $a\\};",
5802               []>;
5803 def SUST_P_1D_ARRAY_V4B32_TRAP
5804   : NVPTXInst<(outs),
5805               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5806                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5807              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5808              "\\{$r, $g, $b, $a\\};",
5809               []>;
5810
5811
5812 def SUST_P_2D_B8_TRAP
5813   : NVPTXInst<(outs),
5814               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5816               []>;
5817 def SUST_P_2D_B16_TRAP
5818   : NVPTXInst<(outs),
5819               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5820               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5821               []>;
5822 def SUST_P_2D_B32_TRAP
5823   : NVPTXInst<(outs),
5824               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5825               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5826               []>;
5827 def SUST_P_2D_V2B8_TRAP
5828   : NVPTXInst<(outs),
5829               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5830                    Int16Regs:$g),
5831               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5832               []>;
5833 def SUST_P_2D_V2B16_TRAP
5834   : NVPTXInst<(outs),
5835               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5836                    Int16Regs:$g),
5837               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5838               []>;
5839 def SUST_P_2D_V2B32_TRAP
5840   : NVPTXInst<(outs),
5841               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5842                    Int32Regs:$g),
5843               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5844               []>;
5845 def SUST_P_2D_V4B8_TRAP
5846   : NVPTXInst<(outs),
5847               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5848                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5850               "\\{$r, $g, $b, $a\\};",
5851               []>;
5852 def SUST_P_2D_V4B16_TRAP
5853   : NVPTXInst<(outs),
5854               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5855                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5856              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5857              "\\{$r, $g, $b, $a\\};",
5858               []>;
5859 def SUST_P_2D_V4B32_TRAP
5860   : NVPTXInst<(outs),
5861               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5862                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5863              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5864              "\\{$r, $g, $b, $a\\};",
5865               []>;
5866
5867
5868 def SUST_P_2D_ARRAY_B8_TRAP
5869   : NVPTXInst<(outs),
5870               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5871                    Int16Regs:$r),
5872               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5873               []>;
5874 def SUST_P_2D_ARRAY_B16_TRAP
5875   : NVPTXInst<(outs),
5876               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5877                    Int16Regs:$r),
5878               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5879               []>;
5880 def SUST_P_2D_ARRAY_B32_TRAP
5881   : NVPTXInst<(outs),
5882               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5883                    Int32Regs:$r),
5884               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5885               []>;
5886 def SUST_P_2D_ARRAY_V2B8_TRAP
5887   : NVPTXInst<(outs),
5888               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5889                    Int16Regs:$r, Int16Regs:$g),
5890               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5891               "\\{$r, $g\\};",
5892               []>;
5893 def SUST_P_2D_ARRAY_V2B16_TRAP
5894   : NVPTXInst<(outs),
5895               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5896                    Int16Regs:$r, Int16Regs:$g),
5897              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5898              "\\{$r, $g\\};",
5899               []>;
5900 def SUST_P_2D_ARRAY_V2B32_TRAP
5901   : NVPTXInst<(outs),
5902               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5903                    Int32Regs:$r, Int32Regs:$g),
5904              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5905              "\\{$r, $g\\};",
5906               []>;
5907 def SUST_P_2D_ARRAY_V4B8_TRAP
5908   : NVPTXInst<(outs),
5909               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5910                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5912       "\\{$r, $g, $b, $a\\};",
5913               []>;
5914 def SUST_P_2D_ARRAY_V4B16_TRAP
5915   : NVPTXInst<(outs),
5916               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5917                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5919      "\\{$r, $g, $b, $a\\};",
5920               []>;
5921 def SUST_P_2D_ARRAY_V4B32_TRAP
5922   : NVPTXInst<(outs),
5923               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5924                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5925      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5926      "\\{$r, $g, $b, $a\\};",
5927               []>;
5928
5929
5930 def SUST_P_3D_B8_TRAP
5931   : NVPTXInst<(outs),
5932               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5933                    Int16Regs:$r),
5934               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5935               []>;
5936 def SUST_P_3D_B16_TRAP
5937   : NVPTXInst<(outs),
5938               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5939                    Int16Regs:$r),
5940               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5941               []>;
5942 def SUST_P_3D_B32_TRAP
5943   : NVPTXInst<(outs),
5944               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5945                    Int32Regs:$r),
5946               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5947               []>;
5948 def SUST_P_3D_V2B8_TRAP
5949   : NVPTXInst<(outs),
5950               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5951                    Int16Regs:$r, Int16Regs:$g),
5952               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5953               "\\{$r, $g\\};",
5954               []>;
5955 def SUST_P_3D_V2B16_TRAP
5956   : NVPTXInst<(outs),
5957               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5958                    Int16Regs:$r, Int16Regs:$g),
5959               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5960               "\\{$r, $g\\};",
5961               []>;
5962 def SUST_P_3D_V2B32_TRAP
5963   : NVPTXInst<(outs),
5964               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5965                    Int32Regs:$r, Int32Regs:$g),
5966               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5967               "\\{$r, $g\\};",
5968               []>;
5969 def SUST_P_3D_V4B8_TRAP
5970   : NVPTXInst<(outs),
5971               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5973          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5974          "\\{$r, $g, $b, $a\\};",
5975               []>;
5976 def SUST_P_3D_V4B16_TRAP
5977   : NVPTXInst<(outs),
5978               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5979                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5980         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5981         "\\{$r, $g, $b, $a\\};",
5982               []>;
5983 def SUST_P_3D_V4B32_TRAP
5984   : NVPTXInst<(outs),
5985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5987         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5988         "\\{$r, $g, $b, $a\\};",
5989               []>;
5990 }
5991
5992 // Surface store instruction patterns
5993 // I'm not sure why we can't just include these in the instruction definitions,
5994 // but TableGen complains of type errors :(
5995
5996 // .clamp variant
5997 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5998            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5999           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6000
6001 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6002            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6003           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6004
6005 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6006            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6007           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6008
6009 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6010            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6011           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6012
6013 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6014            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6015           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6016            Int16Regs:$r, Int16Regs:$g)>;
6017
6018 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6019            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6020           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6021            Int16Regs:$r, Int16Regs:$g)>;
6022
6023 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6024            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6025           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6026            Int32Regs:$r, Int32Regs:$g)>;
6027
6028 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6029            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6030           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6031            Int64Regs:$r, Int64Regs:$g)>;
6032
6033 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6034            Int64Regs:$s, Int32Regs:$x,
6035            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6036           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6037            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6038
6039 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6040            Int64Regs:$s, Int32Regs:$x,
6041            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6042           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6043            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6044
6045 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6046            Int64Regs:$s, Int32Regs:$x,
6047            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6048           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6049            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6050
6051
6052
6053 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6054            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6055           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6056            Int16Regs:$r)>;
6057
6058 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6059            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6060           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6061            Int16Regs:$r)>;
6062
6063 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6064            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6065           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6066            Int32Regs:$r)>;
6067
6068 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6069            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6070           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6071            Int64Regs:$r)>;
6072
6073 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6074           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6075           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6076            Int16Regs:$r, Int16Regs:$g)>;
6077
6078 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6079           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6080           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6081            Int16Regs:$r, Int16Regs:$g)>;
6082
6083 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6084           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6085           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6086            Int32Regs:$r, Int32Regs:$g)>;
6087
6088 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6089           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6090           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6091            Int64Regs:$r, Int64Regs:$g)>;
6092
6093 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6094            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6095            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6096           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6098
6099 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6100            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6101            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6102           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6104
6105 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6106            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6107            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6108           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6110
6111
6112
6113 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6114            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6115           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6116            Int16Regs:$r)>;
6117
6118 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6119            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6120           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6121            Int16Regs:$r)>;
6122
6123 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6124            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6125           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6126            Int32Regs:$r)>;
6127
6128 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6129            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6130           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6131            Int64Regs:$r)>;
6132
6133 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6134           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6135           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6136            Int16Regs:$r, Int16Regs:$g)>;
6137
6138 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6139           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6140           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6141            Int16Regs:$r, Int16Regs:$g)>;
6142
6143 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6144           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6145           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6146            Int32Regs:$r, Int32Regs:$g)>;
6147
6148 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6149           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6150           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6151            Int64Regs:$r, Int64Regs:$g)>;
6152
6153 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6154            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6156           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6157            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6158
6159 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6160            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6162           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6163            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6164
6165 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6166            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6167            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6168           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6170
6171
6172
6173 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6174           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6175           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6176            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6177            Int16Regs:$r)>;
6178
6179 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6180           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6181           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6182            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6183            Int16Regs:$r)>;
6184
6185 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6186           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6187           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6188            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6189            Int32Regs:$r)>;
6190
6191 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6192           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6193           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6194            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6195            Int64Regs:$r)>;
6196
6197 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6198            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6199            Int16Regs:$r, Int16Regs:$g),
6200           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6201            Int32Regs:$x, Int32Regs:$y,
6202            Int16Regs:$r, Int16Regs:$g)>;
6203
6204 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6205            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6206            Int16Regs:$r, Int16Regs:$g),
6207           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6208            Int32Regs:$x, Int32Regs:$y,
6209            Int16Regs:$r, Int16Regs:$g)>;
6210
6211 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6212            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6213            Int32Regs:$g),
6214           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6215            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6216
6217 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6218            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6219            Int64Regs:$g),
6220           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6221            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6222
6223 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6224            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6225            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6226           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6227            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6228            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6229
6230 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6231            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6232            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6233           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6234            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6235            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6236
6237 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6238            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6239            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6240           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6241            Int32Regs:$x, Int32Regs:$y,
6242            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6243
6244
6245
6246 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6247            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6248            Int16Regs:$r),
6249           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6250            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6251            Int16Regs:$r)>;
6252
6253 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6254            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6255            Int16Regs:$r),
6256           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6257            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6258            Int16Regs:$r)>;
6259
6260 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6261            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6262            Int32Regs:$r),
6263           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6264            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6265            Int32Regs:$r)>;
6266
6267 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6268            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6269            Int64Regs:$r),
6270           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6271            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6272            Int64Regs:$r)>;
6273
6274 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6275            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6276            Int16Regs:$r, Int16Regs:$g),
6277           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6278            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6279            Int16Regs:$r, Int16Regs:$g)>;
6280
6281 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6282            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6283            Int16Regs:$r, Int16Regs:$g),
6284           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6285            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286            Int16Regs:$r, Int16Regs:$g)>;
6287
6288 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6289            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290            Int32Regs:$r, Int32Regs:$g),
6291           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6292            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293            Int32Regs:$r, Int32Regs:$g)>;
6294
6295 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6296            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297            Int64Regs:$r, Int64Regs:$g),
6298           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6299            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300            Int64Regs:$r, Int64Regs:$g)>;
6301
6302 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6303            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6305           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6306            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6308
6309 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6310            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6312           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6313            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6315
6316 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6317            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6319           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6320            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6322
6323
6324 // .trap variant
6325 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6326            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6327           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6328
6329 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6330            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6331           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6332
6333 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6334            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6335           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6336
6337 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6338            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6339           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6340
6341 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6342            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6343           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6344            Int16Regs:$r, Int16Regs:$g)>;
6345
6346 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6347            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6348           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6349            Int16Regs:$r, Int16Regs:$g)>;
6350
6351 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6352            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6353           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6354            Int32Regs:$r, Int32Regs:$g)>;
6355
6356 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6357            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6358           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6359            Int64Regs:$r, Int64Regs:$g)>;
6360
6361 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6362            Int64Regs:$s, Int32Regs:$x,
6363            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6364           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6365            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6366
6367 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6368            Int64Regs:$s, Int32Regs:$x,
6369            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6370           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6371            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6372
6373 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6374            Int64Regs:$s, Int32Regs:$x,
6375            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6376           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6377            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6378
6379
6380
6381 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6382            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6383           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6384            Int16Regs:$r)>;
6385
6386 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6387            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6388           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6389            Int16Regs:$r)>;
6390
6391 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6392            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6393           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6394            Int32Regs:$r)>;
6395
6396 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6397            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6398           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6399            Int64Regs:$r)>;
6400
6401 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6402           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6403           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6404            Int16Regs:$r, Int16Regs:$g)>;
6405
6406 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6407           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6408           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6409            Int16Regs:$r, Int16Regs:$g)>;
6410
6411 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6412           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6413           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6414            Int32Regs:$r, Int32Regs:$g)>;
6415
6416 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6417           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6418           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6419            Int64Regs:$r, Int64Regs:$g)>;
6420
6421 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6422            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6423            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6424           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6425            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6426
6427 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6428            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6429            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6430           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6431            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6432
6433 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6434            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6435            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6436           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6438
6439
6440
6441 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6442            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6443           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6444            Int16Regs:$r)>;
6445
6446 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6447            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6448           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6449            Int16Regs:$r)>;
6450
6451 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6452            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6453           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6454            Int32Regs:$r)>;
6455
6456 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6457            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6458           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6459            Int64Regs:$r)>;
6460
6461 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6462           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6463           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6464            Int16Regs:$r, Int16Regs:$g)>;
6465
6466 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6467           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6468           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6469            Int16Regs:$r, Int16Regs:$g)>;
6470
6471 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6472           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6473           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6474            Int32Regs:$r, Int32Regs:$g)>;
6475
6476 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6477           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6478           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6479            Int64Regs:$r, Int64Regs:$g)>;
6480
6481 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6482            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6483            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6484           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6485            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6486
6487 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6488            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6489            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6490           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6491            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6492
6493 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6494            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6495            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6496           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6498
6499
6500
6501 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6502           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6503           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6504            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6505            Int16Regs:$r)>;
6506
6507 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6508           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6509           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6510            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6511            Int16Regs:$r)>;
6512
6513 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6514           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6515           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6516            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6517            Int32Regs:$r)>;
6518
6519 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6520           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6521           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6522            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6523            Int64Regs:$r)>;
6524
6525 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6526            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6527            Int16Regs:$r, Int16Regs:$g),
6528           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6529            Int32Regs:$x, Int32Regs:$y,
6530            Int16Regs:$r, Int16Regs:$g)>;
6531
6532 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6533            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6534            Int16Regs:$r, Int16Regs:$g),
6535           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6536            Int32Regs:$x, Int32Regs:$y,
6537            Int16Regs:$r, Int16Regs:$g)>;
6538
6539 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6540            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6541            Int32Regs:$g),
6542           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6543            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6544
6545 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6546            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6547            Int64Regs:$g),
6548           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6549            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6550
6551 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6552            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6553            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6554           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6555            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6556            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6557
6558 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6559            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6560            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6561           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6562            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6563            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6564
6565 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6566            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6567            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6568           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6569            Int32Regs:$x, Int32Regs:$y,
6570            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6571
6572
6573
6574 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6575            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6576            Int16Regs:$r),
6577           (SUST_B_3D_B8_TRAP Int64Regs:$s,
6578            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6579            Int16Regs:$r)>;
6580
6581 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6582            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6583            Int16Regs:$r),
6584           (SUST_B_3D_B16_TRAP Int64Regs:$s,
6585            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6586            Int16Regs:$r)>;
6587
6588 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6589            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6590            Int32Regs:$r),
6591           (SUST_B_3D_B32_TRAP Int64Regs:$s,
6592            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6593            Int32Regs:$r)>;
6594
6595 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6596            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6597            Int64Regs:$r),
6598           (SUST_B_3D_B64_TRAP Int64Regs:$s,
6599            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6600            Int64Regs:$r)>;
6601
6602 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6603            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6604            Int16Regs:$r, Int16Regs:$g),
6605           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6606            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6607            Int16Regs:$r, Int16Regs:$g)>;
6608
6609 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6610            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6611            Int16Regs:$r, Int16Regs:$g),
6612           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6613            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614            Int16Regs:$r, Int16Regs:$g)>;
6615
6616 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6617            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6618            Int32Regs:$r, Int32Regs:$g),
6619           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6620            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621            Int32Regs:$r, Int32Regs:$g)>;
6622
6623 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6624            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625            Int64Regs:$r, Int64Regs:$g),
6626           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6627            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628            Int64Regs:$r, Int64Regs:$g)>;
6629
6630 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6631            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6633           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6634            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6636
6637 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6638            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6640           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6641            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6643
6644 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6645            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6647           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6648            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6650
6651
6652 // .zero variant
6653 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6654            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6655           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6656
6657 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6658            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6659           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6660
6661 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6662            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6663           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6664
6665 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6666            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6667           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6668
6669 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6670            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6671           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6672            Int16Regs:$r, Int16Regs:$g)>;
6673
6674 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6675            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6676           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6677            Int16Regs:$r, Int16Regs:$g)>;
6678
6679 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6680            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6681           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6682            Int32Regs:$r, Int32Regs:$g)>;
6683
6684 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6685            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6686           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6687            Int64Regs:$r, Int64Regs:$g)>;
6688
6689 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6690            Int64Regs:$s, Int32Regs:$x,
6691            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6692           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6693            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6694
6695 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6696            Int64Regs:$s, Int32Regs:$x,
6697            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6698           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6699            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6700
6701 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6702            Int64Regs:$s, Int32Regs:$x,
6703            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6704           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6705            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6706
6707
6708
6709 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6710            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6711           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6712            Int16Regs:$r)>;
6713
6714 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6715            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6716           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6717            Int16Regs:$r)>;
6718
6719 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6720            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6721           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6722            Int32Regs:$r)>;
6723
6724 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6725            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6726           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6727            Int64Regs:$r)>;
6728
6729 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6730           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6731           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6732            Int16Regs:$r, Int16Regs:$g)>;
6733
6734 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6735           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6736           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6737            Int16Regs:$r, Int16Regs:$g)>;
6738
6739 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6740           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6741           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6742            Int32Regs:$r, Int32Regs:$g)>;
6743
6744 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6745           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6746           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6747            Int64Regs:$r, Int64Regs:$g)>;
6748
6749 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6750            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6751            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6752           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6753            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6754
6755 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6756            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6758           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6759            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6760
6761 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6762            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6763            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6764           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6766
6767
6768
6769 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6770            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6771           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6772            Int16Regs:$r)>;
6773
6774 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6775            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6776           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6777            Int16Regs:$r)>;
6778
6779 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6780            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6781           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6782            Int32Regs:$r)>;
6783
6784 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6785            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6786           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6787            Int64Regs:$r)>;
6788
6789 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6790           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6791           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6792            Int16Regs:$r, Int16Regs:$g)>;
6793
6794 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6795           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6796           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6797            Int16Regs:$r, Int16Regs:$g)>;
6798
6799 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6800           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6801           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6802            Int32Regs:$r, Int32Regs:$g)>;
6803
6804 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6805           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6806           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6807            Int64Regs:$r, Int64Regs:$g)>;
6808
6809 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6810            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6811            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6812           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6813            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814
6815 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6816            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6818           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6819            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6820
6821 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6822            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6823            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6824           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6826
6827
6828
6829 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6830           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6832            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6833            Int16Regs:$r)>;
6834
6835 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6836           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6837           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6838            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6839            Int16Regs:$r)>;
6840
6841 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6842           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6843           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6844            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6845            Int32Regs:$r)>;
6846
6847 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6848           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6849           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6850            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6851            Int64Regs:$r)>;
6852
6853 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6854            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6855            Int16Regs:$r, Int16Regs:$g),
6856           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6857            Int32Regs:$x, Int32Regs:$y,
6858            Int16Regs:$r, Int16Regs:$g)>;
6859
6860 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6861            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6862            Int16Regs:$r, Int16Regs:$g),
6863           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6864            Int32Regs:$x, Int32Regs:$y,
6865            Int16Regs:$r, Int16Regs:$g)>;
6866
6867 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6868            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6869            Int32Regs:$g),
6870           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6871            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6872
6873 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6874            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6875            Int64Regs:$g),
6876           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6877            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6878
6879 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6880            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6881            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6882           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6883            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6884            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6885
6886 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6887            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6888            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6889           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6890            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6891            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6892
6893 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6894            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6895            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6896           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6897            Int32Regs:$x, Int32Regs:$y,
6898            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6899
6900
6901
6902 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6903            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6904            Int16Regs:$r),
6905           (SUST_B_3D_B8_ZERO Int64Regs:$s,
6906            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6907            Int16Regs:$r)>;
6908
6909 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6910            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6911            Int16Regs:$r),
6912           (SUST_B_3D_B16_ZERO Int64Regs:$s,
6913            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6914            Int16Regs:$r)>;
6915
6916 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6917            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6918            Int32Regs:$r),
6919           (SUST_B_3D_B32_ZERO Int64Regs:$s,
6920            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6921            Int32Regs:$r)>;
6922
6923 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6924            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6925            Int64Regs:$r),
6926           (SUST_B_3D_B64_ZERO Int64Regs:$s,
6927            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6928            Int64Regs:$r)>;
6929
6930 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6931            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6932            Int16Regs:$r, Int16Regs:$g),
6933           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6934            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6935            Int16Regs:$r, Int16Regs:$g)>;
6936
6937 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6938            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6939            Int16Regs:$r, Int16Regs:$g),
6940           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6941            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6942            Int16Regs:$r, Int16Regs:$g)>;
6943
6944 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6945            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6946            Int32Regs:$r, Int32Regs:$g),
6947           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6948            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6949            Int32Regs:$r, Int32Regs:$g)>;
6950
6951 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6952            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6953            Int64Regs:$r, Int64Regs:$g),
6954           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6955            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6956            Int64Regs:$r, Int64Regs:$g)>;
6957
6958 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6959            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6960            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6961           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6962            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6963            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6964
6965 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6966            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6967            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6968           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6969            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6970            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6971
6972 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6973            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6974            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6975           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6976            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6977            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6978
6979
6980
6981
6982 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6983            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6984           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6985
6986 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6987            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6988           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6989
6990 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6991            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6992           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6993
6994 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6995            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6996           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6997            Int16Regs:$r, Int16Regs:$g)>;
6998
6999 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7000            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7001           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7002            Int16Regs:$r, Int16Regs:$g)>;
7003
7004 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7005            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7006           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7007            Int32Regs:$r, Int32Regs:$g)>;
7008
7009 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7010            Int64Regs:$s, Int32Regs:$x,
7011            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7012           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7013            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7014
7015 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7016            Int64Regs:$s, Int32Regs:$x,
7017            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7018           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7019            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7020
7021 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7022            Int64Regs:$s, Int32Regs:$x,
7023            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7024           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7025            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7026
7027
7028
7029 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7030            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7031           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7032            Int16Regs:$r)>;
7033
7034 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7035            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7036           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7037            Int16Regs:$r)>;
7038
7039 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7040            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7041           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7042            Int32Regs:$r)>;
7043
7044 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7045           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7046           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7047            Int16Regs:$r, Int16Regs:$g)>;
7048
7049 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7050           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7051           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7052            Int16Regs:$r, Int16Regs:$g)>;
7053
7054 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7055           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7056           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7057            Int32Regs:$r, Int32Regs:$g)>;
7058
7059 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7060            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7061            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7062           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7063            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7064
7065 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7066            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7067            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7068           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7069            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7070
7071 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7072            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7073            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7074           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7075            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7076
7077
7078
7079 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7080            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7081           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7082            Int16Regs:$r)>;
7083
7084 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7085            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7086           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7087            Int16Regs:$r)>;
7088
7089 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7090            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7091           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7092            Int32Regs:$r)>;
7093
7094 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7095           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7096           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7097            Int16Regs:$r, Int16Regs:$g)>;
7098
7099 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7100           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7101           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7102            Int16Regs:$r, Int16Regs:$g)>;
7103
7104 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7105           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7106           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7107            Int32Regs:$r, Int32Regs:$g)>;
7108
7109 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7110            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7111            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7112           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7113            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7114
7115 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7116            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7117            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7118           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7119            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7120
7121 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7122            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7123            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7124           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7125            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7126
7127
7128
7129 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7130           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7131           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7132            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7133            Int16Regs:$r)>;
7134
7135 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7136           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7137           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7138            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7139            Int16Regs:$r)>;
7140
7141 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7142           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7143           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7144            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7145            Int32Regs:$r)>;
7146
7147 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7148            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7149            Int16Regs:$r, Int16Regs:$g),
7150           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7151            Int32Regs:$x, Int32Regs:$y,
7152            Int16Regs:$r, Int16Regs:$g)>;
7153
7154 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7155            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7156            Int16Regs:$r, Int16Regs:$g),
7157           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7158            Int32Regs:$x, Int32Regs:$y,
7159            Int16Regs:$r, Int16Regs:$g)>;
7160
7161 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7162            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7163            Int32Regs:$g),
7164           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7165            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7166
7167 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7168            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7169            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7170           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7171            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7172            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7173
7174 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7175            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7176            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7177           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7178            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7179            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7180
7181 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7182            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7183            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7184           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7185            Int32Regs:$x, Int32Regs:$y,
7186            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7187
7188
7189
7190 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7191            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7192            Int16Regs:$r),
7193           (SUST_P_3D_B8_TRAP Int64Regs:$s,
7194            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7195            Int16Regs:$r)>;
7196
7197 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7198            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7199            Int16Regs:$r),
7200           (SUST_P_3D_B16_TRAP Int64Regs:$s,
7201            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7202            Int16Regs:$r)>;
7203
7204 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7205            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7206            Int32Regs:$r),
7207           (SUST_P_3D_B32_TRAP Int64Regs:$s,
7208            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7209            Int32Regs:$r)>;
7210
7211 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7212            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7213            Int16Regs:$r, Int16Regs:$g),
7214           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7215            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7216            Int16Regs:$r, Int16Regs:$g)>;
7217
7218 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7219            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7220            Int16Regs:$r, Int16Regs:$g),
7221           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7222            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7223            Int16Regs:$r, Int16Regs:$g)>;
7224
7225 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7226            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7227            Int32Regs:$r, Int32Regs:$g),
7228           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7229            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7230            Int32Regs:$r, Int32Regs:$g)>;
7231
7232 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7233            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7234            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7235           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7236            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7237            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7238
7239 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7240            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7241            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7242           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7243            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7244            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7245
7246 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7247            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7248            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7249           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7250            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7251            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7252
7253 //-----------------------------------
7254 // Read Special Registers
7255 //-----------------------------------
7256
7257 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7258   : NVPTXInst<(outs Int64Regs:$d), (ins),
7259               !strconcat("mov.u64 \t$d, %", regname, ";"),
7260               [(set Int64Regs:$d, (intop))]>;
7261
7262 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7263   : NVPTXInst<(outs Int32Regs:$d), (ins),
7264               !strconcat("mov.u32 \t$d, %", regname, ";"),
7265               [(set Int32Regs:$d, (intop))]>;
7266
7267 // TODO Add read vector-version of special registers
7268
7269 def INT_PTX_SREG_TID_X :
7270     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7271 def INT_PTX_SREG_TID_Y :
7272     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7273 def INT_PTX_SREG_TID_Z :
7274     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7275 def INT_PTX_SREG_TID_W :
7276     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7277
7278 def INT_PTX_SREG_NTID_X :
7279     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7280 def INT_PTX_SREG_NTID_Y :
7281     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7282 def INT_PTX_SREG_NTID_Z :
7283     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7284 def INT_PTX_SREG_NTID_W :
7285     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7286
7287 def INT_PTX_SREG_LANEID :
7288     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7289 def INT_PTX_SREG_WARPID :
7290     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7291 def INT_PTX_SREG_NWARPID :
7292     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7293
7294 def INT_PTX_SREG_CTAID_X :
7295     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7296 def INT_PTX_SREG_CTAID_Y :
7297     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7298 def INT_PTX_SREG_CTAID_Z :
7299     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7300 def INT_PTX_SREG_CTAID_W :
7301     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7302
7303 def INT_PTX_SREG_NCTAID_X :
7304     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7305 def INT_PTX_SREG_NCTAID_Y :
7306     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7307 def INT_PTX_SREG_NCTAID_Z :
7308     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7309 def INT_PTX_SREG_NCTAID_W :
7310     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7311
7312 def INT_PTX_SREG_SMID :
7313     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7314 def INT_PTX_SREG_NSMID :
7315     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7316 def INT_PTX_SREG_GRIDID :
7317     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7318
7319 def INT_PTX_SREG_LANEMASK_EQ :
7320     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7321 def INT_PTX_SREG_LANEMASK_LE :
7322     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7323 def INT_PTX_SREG_LANEMASK_LT :
7324     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7325 def INT_PTX_SREG_LANEMASK_GE :
7326     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7327 def INT_PTX_SREG_LANEMASK_GT :
7328     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7329
7330 def INT_PTX_SREG_CLOCK :
7331     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7332 def INT_PTX_SREG_CLOCK64 :
7333     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7334
7335 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7336 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7337 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7338 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7339
7340 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7341 // handle the constant.
7342 def INT_PTX_SREG_WARPSIZE :
7343     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7344               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7345
7346 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7347 // In addition to target-independent fields provided by WMMA_REGS, it adds
7348 // the fields commonly used to implement specific PTX instruction -- register
7349 // types and names, constraints, parts of assembly, etc.
7350 class WMMA_REGINFO<WMMA_REGS r>
7351       : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7352   // NVPTX register types used to carry fragment data.
7353   NVPTXRegClass regclass = !cond(
7354     !eq(ptx_elt_type, "f16") : Float16x2Regs,
7355     !eq(ptx_elt_type, "f32") : Float32Regs,
7356     !eq(ptx_elt_type, "s32") : Int32Regs,
7357     !eq(ptx_elt_type, "s8") : Int32Regs,
7358     !eq(ptx_elt_type, "u8") : Int32Regs,
7359     !eq(ptx_elt_type, "s4") : Int32Regs,
7360     !eq(ptx_elt_type, "u4") : Int32Regs,
7361     !eq(ptx_elt_type, "b1") : Int32Regs);
7362
7363   // Instruction input/output arguments for the fragment.
7364   list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7365
7366   // List of register names for the fragment -- ["ra0", "ra1",...]
7367   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7368
7369   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7370   string regstring = "{{$" # !head(reg_names)
7371                            # !foldl("", !tail(reg_names), a, b,
7372                                     !strconcat(a, ", $", b))
7373                      # "}}";
7374
7375   // Predicates for particular fragment variant. Technically those are
7376   // per-instruction predicates, but currently all fragments that can be used in
7377   // a given instruction are subject to the same constraints, so an instruction
7378   // can use predicates from any of its fragments. If/when this is no
7379   // longer the case, we can concat all per-fragment predicates to enforce that
7380   // all fragments of the instruction are viable.
7381   list<Predicate> Predicates = !cond(
7382     // fp16 -> fp16/fp32 @ m16n16k16
7383     !and(!eq(geom, "m16n16k16"),
7384          !or(!eq(ptx_elt_type, "f16"),
7385              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7386
7387     // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7388     !and(!or(!eq(geom, "m8n32k16"),
7389              !eq(geom, "m32n8k16")),
7390          !or(!eq(ptx_elt_type, "f16"),
7391              !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7392
7393     // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7394     !and(!or(!eq(geom,"m16n16k16"),
7395              !eq(geom,"m8n32k16"),
7396              !eq(geom,"m32n8k16")),
7397          !or(!eq(ptx_elt_type, "u8"),
7398              !eq(ptx_elt_type, "s8"),
7399              !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7400
7401     // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7402     !or(!eq(geom,"m8n8k128"),
7403         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7404
7405     !eq(geom, "m8n8k4") : [hasSM70, hasPTX64]);
7406
7407   // template DAGs for instruction inputs/output.
7408   dag Outs = !dag(outs, ptx_regs, reg_names);
7409   dag Ins = !dag(ins, ptx_regs, reg_names);
7410 }
7411
7412 // Convert dag of arguments into a dag to match given intrinsic.
7413 class BuildPatternI<Intrinsic Intr, dag Ins> {
7414   // Build a dag pattern that matches the intrinsic call.
7415   dag ret = !foreach(tmp, Ins,
7416                           !subst(imem, ADDRvar,
7417                           !subst(MEMri64, ADDRri64,
7418                           !subst(MEMri, ADDRri,
7419                           !subst(ins, Intr, tmp)))));
7420 }
7421
7422 // Same as above, but uses PatFrag instead of an Intrinsic.
7423 class BuildPatternPF<PatFrag Intr, dag Ins> {
7424   // Build a dag pattern that matches the intrinsic call.
7425   dag ret = !foreach(tmp, Ins,
7426                           !subst(imem, ADDRvar,
7427                           !subst(MEMri64, ADDRri64,
7428                           !subst(MEMri, ADDRri,
7429                           !subst(ins, Intr, tmp)))));
7430 }
7431
7432 // Common WMMA-related fields used for building patterns for all MMA instructions.
7433 class WMMA_INSTR<string _Intr, list<dag> _Args>
7434   : NVPTXInst<(outs), (ins), "?", []> {
7435   Intrinsic Intr = !cast<Intrinsic>(_Intr);
7436   // Concatenate all arguments into a single dag.
7437   dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7438   // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7439   dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7440 }
7441
7442 //
7443 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7444 //
7445
7446 class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7447                 DAGOperand SrcOp>
7448   : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7449                               [!con((ins SrcOp:$src),
7450                                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7451     Requires<Frag.Predicates> {
7452   // Load/store intrinsics are overloaded on pointer's address space.
7453   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7454   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7455   dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7456   // Build PatFrag that only matches particular address space.
7457   PatFrag IntrFrag = PatFrag<PFOperands,
7458                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7459                              !cond(!eq(Space, ".shared"): AS_match.shared,
7460                                    !eq(Space, ".global"): AS_match.global,
7461                                    1: AS_match.generic)>;
7462   // Build AS-constrained pattern.
7463   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7464
7465   let OutOperandList = Frag.Outs;
7466   let InOperandList = !con(Args, (ins MmaCode:$ptx));
7467   let AsmString = "wmma.load."
7468                   # Frag.frag
7469                   # ".sync"
7470                   # "${ptx:aligned}"
7471                   # "." # Layout
7472                   # "." # Frag.geom
7473                   # Space
7474                   # "." # Frag.ptx_elt_type # " \t"
7475                   # Frag.regstring
7476                   # ", [$src]"
7477                   # !if(WithStride, ", $ldm", "")
7478                   # ";";
7479 }
7480
7481 //
7482 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7483 //
7484 class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7485                    bit WithStride, DAGOperand DstOp>
7486   : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7487                [!con((ins DstOp:$dst),
7488                      Frag.Ins,
7489                      !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7490     Requires<Frag.Predicates> {
7491
7492   // Load/store intrinsics are overloaded on pointer's address space.
7493   // To match the right intrinsic, we need to build AS-constrained PatFrag.
7494   // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7495   dag PFOperands = !con((ops node:$dst),
7496                         !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7497                         !if(WithStride, (ops node:$ldm), (ops)));
7498   // Build PatFrag that only matches particular address space.
7499   PatFrag IntrFrag = PatFrag<PFOperands,
7500                              !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7501                              !cond(!eq(Space, ".shared"): AS_match.shared,
7502                                    !eq(Space, ".global"): AS_match.global,
7503                                    1: AS_match.generic)>;
7504   // Build AS-constrained pattern.
7505   let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7506
7507   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7508   let OutOperandList = (outs);
7509   let AsmString = "wmma.store.d.sync"
7510                   # "${ptx:aligned}"
7511                   # "." # Layout
7512                   # "." # Frag.geom
7513                   # Space
7514                   # "." # Frag.ptx_elt_type
7515                   # " \t[$dst],"
7516                   # Frag.regstring
7517                   # !if(WithStride, ", $ldm", "")
7518                   # ";";
7519 }
7520
7521 // Create all load/store variants
7522 defset list<WMMA_INSTR> MMA_LDSTs  = {
7523   foreach layout = ["row", "col"] in {
7524     foreach stride = [0, 1] in {
7525       foreach space = [".global", ".shared", ""] in {
7526         foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7527           foreach frag = NVVM_MMA_OPS.all_ld_ops in
7528             foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7529               def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7530           foreach frag = NVVM_MMA_OPS.all_st_ops in
7531             foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7532               def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7533         } // addr
7534       } // space
7535     } // stride
7536   } // layout
7537 } // defset
7538
7539 // WMMA.MMA
7540 class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7541                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7542                string ALayout, string BLayout, int Satfinite>
7543   : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7544                              [FragA.Ins, FragB.Ins, FragC.Ins]>,
7545     // Requires does not seem to have effect on Instruction w/o Patterns.
7546     // We set it here anyways and propagate to the Pat<> we construct below.
7547     Requires<FragA.Predicates> {
7548   let OutOperandList = FragD.Outs;
7549   let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7550   string TypeList = !cond(
7551     !eq(FragD.geom, "m8n8k4") : "." # FragD.ptx_elt_type
7552                                 # ".f16.f16."
7553                                 # FragC.ptx_elt_type,
7554     !eq(FragD.ptx_elt_type, "s32") : ".s32"
7555                                      # "." # FragA.ptx_elt_type
7556                                      # "." # FragB.ptx_elt_type
7557                                      # ".s32",
7558     1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7559   );
7560   let AsmString = !if(!eq(FragA.geom, "m8n8k4"),
7561      "mma.sync.aligned.m8n8k4"
7562         # "." # ALayout
7563         # "." # BLayout
7564         # TypeList # "\n\t\t"
7565         # FragD.regstring # ",\n\t\t"
7566         # FragA.regstring # ",\n\t\t"
7567         # FragB.regstring # ",\n\t\t"
7568         # FragC.regstring # ";",
7569      "wmma.mma"
7570         # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7571         # ".sync"
7572         # "${ptx:aligned}"
7573         # "." # ALayout
7574         # "." # BLayout
7575         # "." # FragA.geom
7576         # TypeList
7577         # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7578         # FragD.regstring # ",\n\t\t"
7579         # FragA.regstring # ",\n\t\t"
7580         # FragB.regstring # ",\n\t\t"
7581         # FragC.regstring # ";");
7582 }
7583
7584 defset list<WMMA_INSTR> MMAs  = {
7585   foreach layout_a = ["row", "col"] in {
7586     foreach layout_b = ["row", "col"] in {
7587       foreach satf = [0, 1] in {
7588         foreach op = NVVM_MMA_OPS.all_mma_ops in {
7589           foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7590             def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7591                            WMMA_REGINFO<op[1]>,
7592                            WMMA_REGINFO<op[2]>,
7593                            WMMA_REGINFO<op[3]>,
7594                            layout_a, layout_b, satf>;
7595           }
7596         } // op
7597       } // satf
7598     } // layout_b
7599   } // layout_a
7600 } // defset
7601
7602
7603 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7604 // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7605 // the instruction record.
7606 class WMMA_PAT<WMMA_INSTR wi>
7607       : Pat<wi.IntrinsicPattern,
7608             !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7609                  (wi ptx.version))>,
7610         Requires<wi.Predicates>;
7611
7612 // Build intrinsic->instruction patterns for all MMA instructions.
7613 foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7614   def : WMMA_PAT<mma>;