1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 def immFloat0 : PatLeaf<(fpimm), [{
11 float f = (float)N->getValueAPF().convertToFloat();
15 def immFloat1 : PatLeaf<(fpimm), [{
16 float f = (float)N->getValueAPF().convertToFloat();
20 def immDouble0 : PatLeaf<(fpimm), [{
21 double d = (double)N->getValueAPF().convertToDouble();
25 def immDouble1 : PatLeaf<(fpimm), [{
26 double d = (double)N->getValueAPF().convertToDouble();
32 //-----------------------------------
33 // Synchronization and shuffle functions
34 //-----------------------------------
35 let isConvergent = 1 in {
36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
38 [(int_nvvm_barrier0)]>;
39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
41 [(int_nvvm_barrier_n Int32Regs:$src1)]>;
42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
43 "bar.sync \t$src1, $src2;",
44 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
47 ".reg .pred \t%p1; \n\t",
48 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
49 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
51 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
54 ".reg .pred \t%p1; \n\t",
55 ".reg .pred \t%p2; \n\t",
56 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
57 "bar.red.and.pred \t%p2, 0, %p1; \n\t",
58 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
60 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
63 ".reg .pred \t%p1; \n\t",
64 ".reg .pred \t%p2; \n\t",
65 "setp.ne.u32 \t%p1, $pred, 0; \n\t",
66 "bar.red.or.pred \t%p2, 0, %p1; \n\t",
67 "selp.u32 \t$dst, 1, 0, %p2; \n\t",
69 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
72 [(int_nvvm_bar_sync imm:$i)]>;
74 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
75 [(int_nvvm_bar_warp_sync imm:$i)]>,
76 Requires<[hasPTX60, hasSM30]>;
77 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
78 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
79 Requires<[hasPTX60, hasSM30]>;
81 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
82 [(int_nvvm_barrier_sync imm:$i)]>,
83 Requires<[hasPTX60, hasSM30]>;
84 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
85 [(int_nvvm_barrier_sync Int32Regs:$i)]>,
86 Requires<[hasPTX60, hasSM30]>;
88 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
89 "barrier.sync \t$id, $cnt;",
90 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
91 Requires<[hasPTX60, hasSM30]>;
92 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
93 "barrier.sync \t$id, $cnt;",
94 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
95 Requires<[hasPTX60, hasSM30]>;
96 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
97 "barrier.sync \t$id, $cnt;",
98 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
99 Requires<[hasPTX60, hasSM30]>;
100 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
101 "barrier.sync \t$id, $cnt;",
102 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
103 Requires<[hasPTX60, hasSM30]>;
106 // shfl.{up,down,bfly,idx}.b32
107 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
108 // The last two parameters to shfl can be regs or imms. ptxas is smart
109 // enough to inline constant registers, so strictly speaking we don't need to
110 // handle immediates here. But it's easy enough, and it makes our ptx more
113 (outs regclass:$dst),
114 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
115 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
116 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
118 def imm1 : NVPTXInst<
119 (outs regclass:$dst),
120 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
121 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
122 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
124 def imm2 : NVPTXInst<
125 (outs regclass:$dst),
126 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
127 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
128 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
130 def imm3 : NVPTXInst<
131 (outs regclass:$dst),
132 (ins regclass:$src, i32imm:$offset, i32imm:$mask),
133 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
134 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
137 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
138 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
139 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
140 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
141 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
142 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
143 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
144 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
146 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
147 // Threadmask and the last two parameters to shfl.sync can be regs or imms.
148 // ptxas is smart enough to inline constant registers, so strictly speaking we
149 // don't need to handle immediates here. But it's easy enough, and it makes
150 // our ptx more readable.
152 (outs regclass:$dst),
153 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
154 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
155 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
156 Int32Regs:$offset, Int32Regs:$mask))]>;
159 (outs regclass:$dst),
160 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
161 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
162 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
163 Int32Regs:$offset, imm:$mask))]>;
166 (outs regclass:$dst),
167 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
168 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
169 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
170 imm:$offset, Int32Regs:$mask))]>;
173 (outs regclass:$dst),
174 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
175 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
176 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
177 imm:$offset, imm:$mask))]>;
180 (outs regclass:$dst),
181 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
182 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
183 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
184 Int32Regs:$offset, Int32Regs:$mask))]>;
187 (outs regclass:$dst),
188 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
189 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
190 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
191 Int32Regs:$offset, imm:$mask))]>;
194 (outs regclass:$dst),
195 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
196 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
197 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
198 imm:$offset, Int32Regs:$mask))]>;
201 (outs regclass:$dst),
202 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
203 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
204 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
205 imm:$offset, imm:$mask))]>;
208 // On sm_70 these don't have to be convergent, so we may eventually want to
209 // implement non-convergent variant of this intrinsic.
210 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
211 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
212 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
213 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
214 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
215 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
216 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
217 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
220 // vote.{all,any,uni,ballot}
221 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
222 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
223 "vote." # mode # " \t$dest, $pred;",
224 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
225 Requires<[hasPTX60, hasSM30]>;
228 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
229 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
230 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
231 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
233 // vote.sync.{all,any,uni,ballot}
234 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
235 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
236 "vote.sync." # mode # " \t$dest, $pred, $mask;",
237 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
238 Requires<[hasPTX60, hasSM30]>;
239 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
240 "vote.sync." # mode #" \t$dest, $pred, $mask;",
241 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
242 Requires<[hasPTX60, hasSM30]>;
245 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
246 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
247 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
248 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
250 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
252 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
253 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
255 Requires<[hasPTX60, hasSM70]>;
256 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
257 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
259 Requires<[hasPTX60, hasSM70]>;
260 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
261 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
263 Requires<[hasPTX60, hasSM70]>;
264 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
265 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
266 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
267 Requires<[hasPTX60, hasSM70]>;
270 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
272 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
275 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
277 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
278 (ins i32imm:$mask, ImmOp:$value),
279 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
280 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
281 Requires<[hasPTX60, hasSM70]>;
282 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
283 (ins Int32Regs:$mask, ImmOp:$value),
284 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
285 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
286 Requires<[hasPTX60, hasSM70]>;
287 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
288 (ins i32imm:$mask, regclass:$value),
289 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
290 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
291 Requires<[hasPTX60, hasSM70]>;
292 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
293 (ins Int32Regs:$mask, regclass:$value),
294 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
295 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
296 Requires<[hasPTX60, hasSM70]>;
298 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
300 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
303 } // isConvergent = 1
305 //-----------------------------------
306 // Explicit Memory Fence Functions
307 //-----------------------------------
308 class MEMBAR<string StrOp, Intrinsic IntOP> :
309 NVPTXInst<(outs), (ins),
312 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
313 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
314 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
317 //-----------------------------------
319 //-----------------------------------
321 // Map min(1.0, max(0.0, x)) to sat(x)
322 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
324 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
325 // Same story for fmax, fmin.
327 def : Pat<(int_nvvm_fmin_f immFloat1,
328 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
329 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
330 def : Pat<(int_nvvm_fmin_f immFloat1,
331 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
332 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
333 def : Pat<(int_nvvm_fmin_f
334 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
335 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
336 def : Pat<(int_nvvm_fmin_f
337 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
338 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
340 def : Pat<(int_nvvm_fmin_d immDouble1,
341 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
342 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
343 def : Pat<(int_nvvm_fmin_d immDouble1,
344 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
345 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
346 def : Pat<(int_nvvm_fmin_d
347 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
348 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
349 def : Pat<(int_nvvm_fmin_d
350 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
351 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
354 // We need a full string for OpcStr here because we need to deal with case like
356 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
357 NVPTXRegClass src_regclass, Intrinsic IntOP>
358 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
360 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
362 // We need a full string for OpcStr here because we need to deal with the case
363 // like INT_PTX_NATIVE_POWR_F.
364 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
365 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
366 : NVPTXInst<(outs t_regclass:$dst),
367 (ins s0_regclass:$src0, s1_regclass:$src1),
369 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
371 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
372 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
373 NVPTXRegClass s2_regclass, Intrinsic IntOP>
374 : NVPTXInst<(outs t_regclass:$dst),
375 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
377 [(set t_regclass:$dst,
378 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
384 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
385 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
391 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
392 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
393 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
394 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
396 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
397 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
398 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
399 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
401 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
402 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
403 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
404 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
411 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
412 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
413 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
414 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
416 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
417 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
418 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
419 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
421 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
422 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
423 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
424 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
425 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
426 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
427 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
428 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
429 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
430 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
431 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
432 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
433 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
434 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
435 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
436 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
438 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
439 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
440 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
441 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
442 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
443 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
444 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
445 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
447 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
448 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
449 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
450 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
456 def INT_NVVM_DIV_APPROX_FTZ_F
457 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
458 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
459 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
460 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
462 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
463 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
464 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
465 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
466 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
467 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
468 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
469 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
470 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
471 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
472 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
473 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
474 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
475 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
476 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
477 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
479 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
480 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
481 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
482 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
483 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
484 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
485 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
486 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
492 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
493 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
494 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
495 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
501 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
502 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
503 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
504 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
505 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
506 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
508 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
509 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
510 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
511 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
512 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
513 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
519 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
520 Float32Regs, int_nvvm_fabs_ftz_f>;
521 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
522 Float32Regs, int_nvvm_fabs_f>;
524 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
525 Float64Regs, int_nvvm_fabs_d>;
531 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
532 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
533 def : Pat<(int_nvvm_round_f Float32Regs:$a),
534 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
535 def : Pat<(int_nvvm_round_d Float64Regs:$a),
536 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
542 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
543 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
544 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
545 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
546 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
547 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
553 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
554 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
555 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
556 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
557 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
558 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
564 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
565 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
566 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
567 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
568 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
569 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
571 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
572 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
573 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
574 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
575 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
576 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
582 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
583 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
584 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
585 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
587 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
588 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
589 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
590 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
596 def INT_NVVM_FMA_RN_FTZ_F
597 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
598 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
599 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
600 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
601 def INT_NVVM_FMA_RZ_FTZ_F
602 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
603 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
604 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
605 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
606 def INT_NVVM_FMA_RM_FTZ_F
607 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
608 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
609 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
610 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
611 def INT_NVVM_FMA_RP_FTZ_F
612 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
613 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
614 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
615 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
617 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
618 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
619 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
620 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
621 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
622 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
623 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
624 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
630 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
631 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
632 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
633 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
634 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
635 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
636 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
637 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
638 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
639 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
640 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
641 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
642 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
643 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
644 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
645 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
647 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
648 Float64Regs, int_nvvm_rcp_rn_d>;
649 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
650 Float64Regs, int_nvvm_rcp_rz_d>;
651 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
652 Float64Regs, int_nvvm_rcp_rm_d>;
653 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
654 Float64Regs, int_nvvm_rcp_rp_d>;
656 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
657 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
663 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
664 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
665 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
666 Float32Regs, int_nvvm_sqrt_rn_f>;
667 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
668 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
669 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
670 Float32Regs, int_nvvm_sqrt_rz_f>;
671 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
672 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
673 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
674 Float32Regs, int_nvvm_sqrt_rm_f>;
675 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
676 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
677 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
678 Float32Regs, int_nvvm_sqrt_rp_f>;
679 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
680 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
681 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
682 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
684 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
685 Float64Regs, int_nvvm_sqrt_rn_d>;
686 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
687 Float64Regs, int_nvvm_sqrt_rz_d>;
688 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
689 Float64Regs, int_nvvm_sqrt_rm_d>;
690 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
691 Float64Regs, int_nvvm_sqrt_rp_d>;
693 // nvvm_sqrt intrinsic
694 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
695 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
696 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
697 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
698 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
699 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
700 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
701 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
707 def INT_NVVM_RSQRT_APPROX_FTZ_F
708 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
709 int_nvvm_rsqrt_approx_ftz_f>;
710 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
711 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
712 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
713 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
719 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
720 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
721 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
722 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
723 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
724 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
725 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
726 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
727 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
728 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
729 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
730 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
731 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
732 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
733 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
734 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
736 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
737 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
738 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
739 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
740 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
741 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
742 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
743 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
749 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
750 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
751 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
752 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
753 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
754 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
755 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
756 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
757 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
758 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
759 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
760 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
761 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
762 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
763 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
764 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
766 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
767 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
768 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
769 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
770 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
771 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
772 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
773 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
775 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
776 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
777 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
778 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
779 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
780 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
781 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
782 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
784 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
785 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
786 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
787 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
788 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
789 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
790 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
791 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
793 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
794 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
795 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
796 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
797 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
798 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
799 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
800 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
802 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
803 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
804 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
805 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
806 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
807 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
808 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
809 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
810 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
811 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
812 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
813 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
814 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
815 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
816 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
817 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
819 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
820 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
821 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
822 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
823 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
824 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
825 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
826 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
827 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
828 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
829 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
830 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
831 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
832 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
833 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
834 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
836 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
837 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
838 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
839 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
840 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
841 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
842 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
843 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
845 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
846 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
847 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
848 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
849 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
850 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
851 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
852 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
854 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
855 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
857 def INT_NVVM_D2I_LO : F_MATH_1<
859 ".reg .b32 %temp; \n\t",
860 "mov.b64 \t{$dst, %temp}, $src0;\n\t",
862 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
863 def INT_NVVM_D2I_HI : F_MATH_1<
865 ".reg .b32 %temp; \n\t",
866 "mov.b64 \t{%temp, $dst}, $src0;\n\t",
868 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
870 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
871 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
872 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
873 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
874 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
875 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
876 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
877 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
878 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
879 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
880 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
881 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
882 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
883 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
884 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
885 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
887 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
888 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
889 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
890 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
891 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
892 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
893 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
894 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
895 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
896 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
897 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
898 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
899 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
900 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
901 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
902 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
904 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
905 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
906 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
907 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
908 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
909 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
910 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
911 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
913 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
914 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
915 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
916 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
917 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
918 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
919 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
920 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
922 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
923 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
924 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
925 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
926 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
927 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
928 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
929 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
931 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
932 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
933 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
934 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
935 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
936 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
937 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
938 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
940 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
941 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
942 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
943 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
944 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
945 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
946 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
947 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
949 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
950 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
951 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
952 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
953 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
954 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
955 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
956 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
959 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
960 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
961 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
962 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
968 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
969 Float32Regs, int_nvvm_bitcast_f2i>;
970 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
971 Int32Regs, int_nvvm_bitcast_i2f>;
973 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
974 Int64Regs, int_nvvm_bitcast_ll2d>;
975 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
976 Float64Regs, int_nvvm_bitcast_d2ll>;
982 class INT_FNS_MBO<dag ins, dag Operands>
983 : NVPTXInst<(outs Int32Regs:$dst), ins,
984 "fns.b32 \t$dst, $mask, $base, $offset;",
985 [(set Int32Regs:$dst, Operands )]>,
986 Requires<[hasPTX60, hasSM30]>;
988 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
989 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
990 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
991 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
992 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
993 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
994 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
995 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
996 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
997 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
998 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
999 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
1000 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
1001 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
1002 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
1003 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
1005 //-----------------------------------
1007 //-----------------------------------
1009 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1010 : PatFrag<ops, frag, [{
1011 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
1013 class ATOMIC_SHARED_CHK <dag ops, dag frag>
1014 : PatFrag<ops, frag, [{
1015 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
1017 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1018 : PatFrag<ops, frag, [{
1019 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
1022 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1023 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1024 Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1025 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1026 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1027 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1029 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1030 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1031 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1034 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1035 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1036 list<Predicate> Pred = []> {
1037 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1038 IntOp, IMMType, IMM, Pred>;
1039 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1040 IntOp, IMMType, IMM, Pred>;
1043 // has 2 operands, neg the second one
1044 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1045 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1046 Operand IMMType, list<Predicate> Pred> {
1047 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1050 ".reg \t.s", TypeStr, " temp; \n\t",
1051 "neg.s", TypeStr, " \ttemp, $b; \n\t",
1052 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1054 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1057 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1058 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1059 list<Predicate> Pred = []> {
1060 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1061 IntOp, IMMType, Pred> ;
1062 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1063 IntOp, IMMType, Pred> ;
1067 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1068 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1069 Operand IMMType, list<Predicate> Pred> {
1070 def reg : NVPTXInst<(outs regclass:$dst),
1071 (ins ptrclass:$addr, regclass:$b, regclass:$c),
1072 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1073 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1076 def imm1 : NVPTXInst<(outs regclass:$dst),
1077 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1078 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1079 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1082 def imm2 : NVPTXInst<(outs regclass:$dst),
1083 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1084 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1085 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1088 def imm3 : NVPTXInst<(outs regclass:$dst),
1089 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1090 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1091 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1094 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1095 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1096 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1097 IntOp, IMMType, Pred>;
1098 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1099 IntOp, IMMType, Pred>;
1104 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1105 (atomic_load_add_32 node:$a, node:$b)>;
1106 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1107 (atomic_load_add_32 node:$a, node:$b)>;
1108 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1109 (atomic_load_add_32 node:$a, node:$b)>;
1110 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1111 (atomic_load_add_64 node:$a, node:$b)>;
1112 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1113 (atomic_load_add_64 node:$a, node:$b)>;
1114 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1115 (atomic_load_add_64 node:$a, node:$b)>;
1116 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1117 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1118 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1119 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1120 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1121 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1122 def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1123 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1124 def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1125 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1126 def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1127 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1129 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1130 atomic_load_add_32_g, i32imm, imm>;
1131 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1132 atomic_load_add_32_s, i32imm, imm>;
1133 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1134 atomic_load_add_32_gen, i32imm, imm>;
1135 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1136 ".add", atomic_load_add_32_gen, i32imm, imm>;
1138 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1139 atomic_load_add_64_g, i64imm, imm>;
1140 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1141 atomic_load_add_64_s, i64imm, imm>;
1142 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1143 atomic_load_add_64_gen, i64imm, imm>;
1144 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1145 ".add", atomic_load_add_64_gen, i64imm, imm>;
1147 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1148 atomic_load_add_f32_g, f32imm, fpimm>;
1149 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1150 atomic_load_add_f32_s, f32imm, fpimm>;
1151 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1152 atomic_load_add_f32_gen, f32imm, fpimm>;
1154 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1155 atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
1156 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1157 atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
1158 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1159 atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
1163 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1164 (atomic_load_sub_32 node:$a, node:$b)>;
1165 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1166 (atomic_load_sub_32 node:$a, node:$b)>;
1167 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1168 (atomic_load_sub_32 node:$a, node:$b)>;
1169 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1170 (atomic_load_sub_64 node:$a, node:$b)>;
1171 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1172 (atomic_load_sub_64 node:$a, node:$b)>;
1173 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1174 (atomic_load_sub_64 node:$a, node:$b)>;
1176 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1177 atomic_load_sub_32_g, i32imm>;
1178 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1179 atomic_load_sub_64_g, i64imm>;
1180 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1181 atomic_load_sub_32_gen, i32imm>;
1182 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1183 ".add", atomic_load_sub_32_gen, i32imm>;
1184 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1185 atomic_load_sub_32_s, i32imm>;
1186 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1187 atomic_load_sub_64_s, i64imm>;
1188 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1189 atomic_load_sub_64_gen, i64imm>;
1190 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1191 ".add", atomic_load_sub_64_gen, i64imm>;
1195 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1196 (atomic_swap_32 node:$a, node:$b)>;
1197 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1198 (atomic_swap_32 node:$a, node:$b)>;
1199 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1200 (atomic_swap_32 node:$a, node:$b)>;
1201 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1202 (atomic_swap_64 node:$a, node:$b)>;
1203 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1204 (atomic_swap_64 node:$a, node:$b)>;
1205 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1206 (atomic_swap_64 node:$a, node:$b)>;
1208 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1209 atomic_swap_32_g, i32imm, imm>;
1210 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1211 atomic_swap_32_s, i32imm, imm>;
1212 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1213 atomic_swap_32_gen, i32imm, imm>;
1214 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1215 ".exch", atomic_swap_32_gen, i32imm, imm>;
1216 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1217 atomic_swap_64_g, i64imm, imm>;
1218 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1219 atomic_swap_64_s, i64imm, imm>;
1220 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1221 atomic_swap_64_gen, i64imm, imm>;
1222 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1223 ".exch", atomic_swap_64_gen, i64imm, imm>;
1227 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1228 , (atomic_load_max_32 node:$a, node:$b)>;
1229 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1230 (atomic_load_max_32 node:$a, node:$b)>;
1231 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1232 (atomic_load_max_32 node:$a, node:$b)>;
1233 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1234 , (atomic_load_max_64 node:$a, node:$b)>;
1235 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1236 (atomic_load_max_64 node:$a, node:$b)>;
1237 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1238 (atomic_load_max_64 node:$a, node:$b)>;
1239 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1240 (atomic_load_umax_32 node:$a, node:$b)>;
1241 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1242 (atomic_load_umax_32 node:$a, node:$b)>;
1243 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1244 (atomic_load_umax_32 node:$a, node:$b)>;
1245 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1246 (atomic_load_umax_64 node:$a, node:$b)>;
1247 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1248 (atomic_load_umax_64 node:$a, node:$b)>;
1249 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1250 (atomic_load_umax_64 node:$a, node:$b)>;
1252 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1253 ".max", atomic_load_max_32_g, i32imm, imm>;
1254 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1255 ".max", atomic_load_max_32_s, i32imm, imm>;
1256 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1257 atomic_load_max_32_gen, i32imm, imm>;
1258 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1259 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1260 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1261 ".max", atomic_load_max_64_g, i64imm, imm>;
1262 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1263 ".max", atomic_load_max_64_s, i64imm, imm>;
1264 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1265 atomic_load_max_64_gen, i64imm, imm>;
1266 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1267 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1268 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1269 ".max", atomic_load_umax_32_g, i32imm, imm>;
1270 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1271 ".max", atomic_load_umax_32_s, i32imm, imm>;
1272 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1273 atomic_load_umax_32_gen, i32imm, imm>;
1274 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1275 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1276 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1277 ".max", atomic_load_umax_64_g, i64imm, imm>;
1278 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1279 ".max", atomic_load_umax_64_s, i64imm, imm>;
1280 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1281 atomic_load_umax_64_gen, i64imm, imm>;
1282 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1283 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1287 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1288 (atomic_load_min_32 node:$a, node:$b)>;
1289 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1290 (atomic_load_min_32 node:$a, node:$b)>;
1291 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1292 (atomic_load_min_32 node:$a, node:$b)>;
1293 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1294 (atomic_load_min_64 node:$a, node:$b)>;
1295 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1296 (atomic_load_min_64 node:$a, node:$b)>;
1297 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1298 (atomic_load_min_64 node:$a, node:$b)>;
1299 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1300 (atomic_load_umin_32 node:$a, node:$b)>;
1301 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1302 (atomic_load_umin_32 node:$a, node:$b)>;
1303 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1304 (atomic_load_umin_32 node:$a, node:$b)>;
1305 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1306 (atomic_load_umin_64 node:$a, node:$b)>;
1307 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1308 (atomic_load_umin_64 node:$a, node:$b)>;
1309 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1310 (atomic_load_umin_64 node:$a, node:$b)>;
1312 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1313 ".min", atomic_load_min_32_g, i32imm, imm>;
1314 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1315 ".min", atomic_load_min_32_s, i32imm, imm>;
1316 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1317 atomic_load_min_32_gen, i32imm, imm>;
1318 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1319 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1320 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1321 ".min", atomic_load_min_64_g, i64imm, imm>;
1322 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1323 ".min", atomic_load_min_64_s, i64imm, imm>;
1324 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1325 atomic_load_min_64_gen, i64imm, imm>;
1326 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1327 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1328 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1329 ".min", atomic_load_umin_32_g, i32imm, imm>;
1330 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1331 ".min", atomic_load_umin_32_s, i32imm, imm>;
1332 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1333 atomic_load_umin_32_gen, i32imm, imm>;
1334 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1335 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1336 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1337 ".min", atomic_load_umin_64_g, i64imm, imm>;
1338 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1339 ".min", atomic_load_umin_64_s, i64imm, imm>;
1340 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1341 atomic_load_umin_64_gen, i64imm, imm>;
1342 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1343 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1345 // atom_inc atom_dec
1347 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1348 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1349 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1350 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1351 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1352 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1353 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1354 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1355 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1356 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1357 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1358 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1360 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1361 atomic_load_inc_32_g, i32imm, imm>;
1362 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1363 atomic_load_inc_32_s, i32imm, imm>;
1364 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1365 atomic_load_inc_32_gen, i32imm, imm>;
1366 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1367 ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1368 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1369 atomic_load_dec_32_g, i32imm, imm>;
1370 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1371 atomic_load_dec_32_s, i32imm, imm>;
1372 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1373 atomic_load_dec_32_gen, i32imm, imm>;
1374 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1375 ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1379 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1380 (atomic_load_and_32 node:$a, node:$b)>;
1381 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1382 (atomic_load_and_32 node:$a, node:$b)>;
1383 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1384 (atomic_load_and_32 node:$a, node:$b)>;
1385 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1386 (atomic_load_and_64 node:$a, node:$b)>;
1387 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1388 (atomic_load_and_64 node:$a, node:$b)>;
1389 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1390 (atomic_load_and_64 node:$a, node:$b)>;
1392 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1393 atomic_load_and_32_g, i32imm, imm>;
1394 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1395 atomic_load_and_32_s, i32imm, imm>;
1396 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1397 atomic_load_and_32_gen, i32imm, imm>;
1398 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1399 ".and", atomic_load_and_32_gen, i32imm, imm>;
1400 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1401 atomic_load_and_64_g, i64imm, imm>;
1402 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1403 atomic_load_and_64_s, i64imm, imm>;
1404 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1405 atomic_load_and_64_gen, i64imm, imm>;
1406 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1407 ".and", atomic_load_and_64_gen, i64imm, imm>;
1411 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1412 (atomic_load_or_32 node:$a, node:$b)>;
1413 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1414 (atomic_load_or_32 node:$a, node:$b)>;
1415 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1416 (atomic_load_or_32 node:$a, node:$b)>;
1417 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1418 (atomic_load_or_64 node:$a, node:$b)>;
1419 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1420 (atomic_load_or_64 node:$a, node:$b)>;
1421 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1422 (atomic_load_or_64 node:$a, node:$b)>;
1424 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1425 atomic_load_or_32_g, i32imm, imm>;
1426 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1427 atomic_load_or_32_gen, i32imm, imm>;
1428 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1429 ".or", atomic_load_or_32_gen, i32imm, imm>;
1430 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1431 atomic_load_or_32_s, i32imm, imm>;
1432 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1433 atomic_load_or_64_g, i64imm, imm>;
1434 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1435 atomic_load_or_64_gen, i64imm, imm>;
1436 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1437 ".or", atomic_load_or_64_gen, i64imm, imm>;
1438 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1439 atomic_load_or_64_s, i64imm, imm>;
1443 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1444 (atomic_load_xor_32 node:$a, node:$b)>;
1445 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1446 (atomic_load_xor_32 node:$a, node:$b)>;
1447 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1448 (atomic_load_xor_32 node:$a, node:$b)>;
1449 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1450 (atomic_load_xor_64 node:$a, node:$b)>;
1451 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1452 (atomic_load_xor_64 node:$a, node:$b)>;
1453 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1454 (atomic_load_xor_64 node:$a, node:$b)>;
1456 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1457 atomic_load_xor_32_g, i32imm, imm>;
1458 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1459 atomic_load_xor_32_s, i32imm, imm>;
1460 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1461 atomic_load_xor_32_gen, i32imm, imm>;
1462 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1463 ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1464 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1465 atomic_load_xor_64_g, i64imm, imm>;
1466 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1467 atomic_load_xor_64_s, i64imm, imm>;
1468 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1469 atomic_load_xor_64_gen, i64imm, imm>;
1470 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1471 ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1475 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1476 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1477 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1478 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1479 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1480 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1481 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1482 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1483 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1484 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1485 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1486 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1488 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1489 atomic_cmp_swap_32_g, i32imm>;
1490 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1491 atomic_cmp_swap_32_s, i32imm>;
1492 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1493 atomic_cmp_swap_32_gen, i32imm>;
1494 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1495 ".cas", atomic_cmp_swap_32_gen, i32imm>;
1496 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1497 atomic_cmp_swap_64_g, i64imm>;
1498 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1499 atomic_cmp_swap_64_s, i64imm>;
1500 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1501 atomic_cmp_swap_64_gen, i64imm>;
1502 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1503 ".cas", atomic_cmp_swap_64_gen, i64imm>;
1505 // Support for scoped atomic operations. Matches
1506 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1507 // and converts it into the appropriate instruction.
1508 // NOTE: not all possible combinations are implemented
1509 // 'space' is limited to generic as it's the only one needed to support CUDA.
1510 // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1511 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1512 dag ins, dag Operands>
1513 : NVPTXInst<(outs regclass:$result), ins,
1515 [(set regclass:$result, Operands)]>,
1518 // Define instruction variants for all addressing modes.
1519 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1520 NVPTXRegClass regclass, Operand ImmType,
1521 SDNode Imm, ValueType ImmTy,
1522 list<Predicate> Preds> {
1523 let AddedComplexity = 1 in {
1524 def : ATOM23_impl<AsmStr, regclass, Preds,
1525 (ins Int32Regs:$src, regclass:$b),
1526 (Intr Int32Regs:$src, regclass:$b)>;
1527 def : ATOM23_impl<AsmStr, regclass, Preds,
1528 (ins Int64Regs:$src, regclass:$b),
1529 (Intr Int64Regs:$src, regclass:$b)>;
1531 // tablegen can't infer argument types from Intrinsic (though it can
1532 // from Instruction) so we have to enforce specific type on
1533 // immediates via explicit cast to ImmTy.
1534 def : ATOM23_impl<AsmStr, regclass, Preds,
1535 (ins Int32Regs:$src, ImmType:$b),
1536 (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1537 def : ATOM23_impl<AsmStr, regclass, Preds,
1538 (ins Int64Regs:$src, ImmType:$b),
1539 (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1542 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1543 NVPTXRegClass regclass, Operand ImmType,
1544 SDNode Imm, ValueType ImmTy,
1545 list<Predicate> Preds> {
1546 // Variants for register/immediate permutations of $b and $c
1547 let AddedComplexity = 2 in {
1548 def : ATOM23_impl<AsmStr, regclass, Preds,
1549 (ins Int32Regs:$src, regclass:$b, regclass:$c),
1550 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1551 def : ATOM23_impl<AsmStr, regclass, Preds,
1552 (ins Int64Regs:$src, regclass:$b, regclass:$c),
1553 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1555 let AddedComplexity = 1 in {
1556 def : ATOM23_impl<AsmStr, regclass, Preds,
1557 (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1558 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1559 def : ATOM23_impl<AsmStr, regclass, Preds,
1560 (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1561 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1562 def : ATOM23_impl<AsmStr, regclass, Preds,
1563 (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1564 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1565 def : ATOM23_impl<AsmStr, regclass, Preds,
1566 (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1567 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1569 def : ATOM23_impl<AsmStr, regclass, Preds,
1570 (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1571 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1572 def : ATOM23_impl<AsmStr, regclass, Preds,
1573 (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1574 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1577 // Constructs instrinsic name and instruction asm strings.
1578 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1579 string ScopeStr, string SpaceStr,
1580 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1581 ValueType ImmTy, list<Predicate> Preds> {
1582 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1583 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1584 # "." # OpStr # "." # TypeStr
1585 # " \t$result, [$src], $b;",
1587 "int_nvvm_atomic_" # OpStr
1588 # "_" # SpaceStr # "_" # IntTypeStr
1589 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1590 regclass, ImmType, Imm, ImmTy, Preds>;
1592 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1593 string ScopeStr, string SpaceStr,
1594 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1595 ValueType ImmTy, list<Predicate> Preds> {
1596 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1597 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1598 # "." # OpStr # "." # TypeStr
1599 # " \t$result, [$src], $b, $c;",
1601 "int_nvvm_atomic_" # OpStr
1602 # "_" # SpaceStr # "_" # IntTypeStr
1603 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1604 regclass, ImmType, Imm, ImmTy, Preds>;
1607 // Constructs variants for different address spaces.
1608 // For now we only need variants for generic space pointers.
1609 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1610 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1611 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1612 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1613 regclass, ImmType, Imm, ImmTy, Preds>;
1615 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1616 string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1617 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1618 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1619 regclass, ImmType, Imm, ImmTy, Preds>;
1622 // Constructs variants for different scopes of atomic op.
1623 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1624 NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1625 ValueType ImmTy, list<Predicate> Preds> {
1626 // .gpu scope is default and is currently covered by existing
1627 // atomics w/o explicitly specified scope.
1628 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1629 regclass, ImmType, Imm, ImmTy,
1630 !listconcat(Preds,[hasAtomScope])>;
1631 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1632 regclass, ImmType, Imm, ImmTy,
1633 !listconcat(Preds,[hasAtomScope])>;
1635 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1636 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1637 list<Predicate> Preds> {
1638 // No need to define ".gpu"-scoped atomics. They do the same thing
1639 // as the regular, non-scoped atomics defined elsewhere.
1640 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1641 regclass, ImmType, Imm, ImmTy,
1642 !listconcat(Preds,[hasAtomScope])>;
1643 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1644 regclass, ImmType, Imm, ImmTy,
1645 !listconcat(Preds,[hasAtomScope])>;
1649 multiclass ATOM2_add_impl<string OpStr> {
1650 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1651 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1652 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1653 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1655 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1659 // atom.{and,or,xor}
1660 multiclass ATOM2_bitwise_impl<string OpStr> {
1661 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1662 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1663 [hasAtomBitwise64]>;
1667 multiclass ATOM2_exch_impl<string OpStr> {
1668 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1669 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1673 multiclass ATOM2_minmax_impl<string OpStr> {
1674 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1675 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1676 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1678 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1683 multiclass ATOM2_incdec_impl<string OpStr> {
1684 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1688 multiclass ATOM3_cas_impl<string OpStr> {
1689 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1690 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1693 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1694 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1695 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1696 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1697 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1698 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1699 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1700 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1701 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1702 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1704 //-----------------------------------
1705 // Support for ldu on sm_20 or later
1706 //-----------------------------------
1708 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
1709 // read-only in a kernel.
1713 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1714 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1715 !strconcat("ldu.global.", TyStr),
1716 []>, Requires<[hasLDU]>;
1717 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1718 !strconcat("ldu.global.", TyStr),
1719 []>, Requires<[hasLDU]>;
1720 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1721 !strconcat("ldu.global.", TyStr),
1722 []>, Requires<[hasLDU]>;
1723 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1724 !strconcat("ldu.global.", TyStr),
1725 []>, Requires<[hasLDU]>;
1726 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1727 !strconcat("ldu.global.", TyStr),
1728 []>, Requires<[hasLDU]>;
1731 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1732 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1733 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1734 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1735 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1736 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1737 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1738 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1739 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1740 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1744 // Elementized vector ldu
1745 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1746 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1747 (ins Int32Regs:$src),
1748 !strconcat("ldu.global.", TyStr), []>;
1749 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1750 (ins Int64Regs:$src),
1751 !strconcat("ldu.global.", TyStr), []>;
1752 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1754 !strconcat("ldu.global.", TyStr), []>;
1755 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1757 !strconcat("ldu.global.", TyStr), []>;
1758 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1760 !strconcat("ldu.global.", TyStr), []>;
1763 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1764 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1765 regclass:$dst4), (ins Int32Regs:$src),
1766 !strconcat("ldu.global.", TyStr), []>;
1767 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1768 regclass:$dst4), (ins Int64Regs:$src),
1769 !strconcat("ldu.global.", TyStr), []>;
1770 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1771 regclass:$dst4), (ins MEMri:$src),
1772 !strconcat("ldu.global.", TyStr), []>;
1773 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1774 regclass:$dst4), (ins MEMri64:$src),
1775 !strconcat("ldu.global.", TyStr), []>;
1776 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1777 regclass:$dst4), (ins imemAny:$src),
1778 !strconcat("ldu.global.", TyStr), []>;
1781 defm INT_PTX_LDU_G_v2i8_ELE
1782 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1783 defm INT_PTX_LDU_G_v2i16_ELE
1784 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1785 defm INT_PTX_LDU_G_v2i32_ELE
1786 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1787 defm INT_PTX_LDU_G_v2f16_ELE
1788 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1789 defm INT_PTX_LDU_G_v2f16x2_ELE
1790 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1791 defm INT_PTX_LDU_G_v2f32_ELE
1792 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1793 defm INT_PTX_LDU_G_v2i64_ELE
1794 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1795 defm INT_PTX_LDU_G_v2f64_ELE
1796 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1797 defm INT_PTX_LDU_G_v4i8_ELE
1798 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1799 defm INT_PTX_LDU_G_v4i16_ELE
1800 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1802 defm INT_PTX_LDU_G_v4i32_ELE
1803 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1805 defm INT_PTX_LDU_G_v4f16_ELE
1806 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1808 defm INT_PTX_LDU_G_v4f16x2_ELE
1809 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1811 defm INT_PTX_LDU_G_v4f32_ELE
1812 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1816 //-----------------------------------
1817 // Support for ldg on sm_35 or later
1818 //-----------------------------------
1820 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
1821 // non-coherent texture cache, and therefore the values read must be read-only
1822 // during the lifetime of the kernel.
1824 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1825 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1826 !strconcat("ld.global.nc.", TyStr),
1827 []>, Requires<[hasLDG]>;
1828 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1829 !strconcat("ld.global.nc.", TyStr),
1830 []>, Requires<[hasLDG]>;
1831 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1832 !strconcat("ld.global.nc.", TyStr),
1833 []>, Requires<[hasLDG]>;
1834 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1835 !strconcat("ld.global.nc.", TyStr),
1836 []>, Requires<[hasLDG]>;
1837 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1838 !strconcat("ld.global.nc.", TyStr),
1839 []>, Requires<[hasLDG]>;
1842 defm INT_PTX_LDG_GLOBAL_i8
1843 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1844 defm INT_PTX_LDG_GLOBAL_i16
1845 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1846 defm INT_PTX_LDG_GLOBAL_i32
1847 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1848 defm INT_PTX_LDG_GLOBAL_i64
1849 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1850 defm INT_PTX_LDG_GLOBAL_f16
1851 : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1852 defm INT_PTX_LDG_GLOBAL_f16x2
1853 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1854 defm INT_PTX_LDG_GLOBAL_f32
1855 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1856 defm INT_PTX_LDG_GLOBAL_f64
1857 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1858 defm INT_PTX_LDG_GLOBAL_p32
1859 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1860 defm INT_PTX_LDG_GLOBAL_p64
1861 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1865 // Elementized vector ldg
1866 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1867 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1868 (ins Int32Regs:$src),
1869 !strconcat("ld.global.nc.", TyStr), []>;
1870 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1871 (ins Int64Regs:$src),
1872 !strconcat("ld.global.nc.", TyStr), []>;
1873 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1875 !strconcat("ld.global.nc.", TyStr), []>;
1876 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1878 !strconcat("ld.global.nc.", TyStr), []>;
1879 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1881 !strconcat("ld.global.nc.", TyStr), []>;
1884 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1885 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1886 regclass:$dst4), (ins Int32Regs:$src),
1887 !strconcat("ld.global.nc.", TyStr), []>;
1888 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1889 regclass:$dst4), (ins Int64Regs:$src),
1890 !strconcat("ld.global.nc.", TyStr), []>;
1891 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1892 regclass:$dst4), (ins MEMri:$src),
1893 !strconcat("ld.global.nc.", TyStr), []>;
1894 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1895 regclass:$dst4), (ins MEMri64:$src),
1896 !strconcat("ld.global.nc.", TyStr), []>;
1897 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1898 regclass:$dst4), (ins imemAny:$src),
1899 !strconcat("ld.global.nc.", TyStr), []>;
1902 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1903 defm INT_PTX_LDG_G_v2i8_ELE
1904 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1905 defm INT_PTX_LDG_G_v2i16_ELE
1906 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1907 defm INT_PTX_LDG_G_v2i32_ELE
1908 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1909 defm INT_PTX_LDG_G_v2f16_ELE
1910 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1911 defm INT_PTX_LDG_G_v2f16x2_ELE
1912 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1913 defm INT_PTX_LDG_G_v2f32_ELE
1914 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1915 defm INT_PTX_LDG_G_v2i64_ELE
1916 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1917 defm INT_PTX_LDG_G_v2f64_ELE
1918 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1919 defm INT_PTX_LDG_G_v4i8_ELE
1920 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1921 defm INT_PTX_LDG_G_v4i16_ELE
1922 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1923 defm INT_PTX_LDG_G_v4i32_ELE
1924 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1925 defm INT_PTX_LDG_G_v4f16_ELE
1926 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1927 defm INT_PTX_LDG_G_v4f16x2_ELE
1928 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1929 defm INT_PTX_LDG_G_v4f32_ELE
1930 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1933 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1934 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1935 !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1936 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1937 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1938 !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1939 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1940 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1941 "{{ .reg .b64 %tmp;\n\t"
1942 #" cvt.u64.u32 \t%tmp, $src;\n\t"
1943 #" cvta." # Str # ".u64 \t$result, %tmp; }}",
1944 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1945 Requires<[useShortPtr]>;
1948 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1949 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1950 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1951 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1952 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1953 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1954 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1955 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1956 "{{ .reg .b64 %tmp;\n\t"
1957 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1958 #" cvt.u32.u64 \t$result, %tmp; }}",
1959 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1960 Requires<[useShortPtr]>;
1963 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1964 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1965 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1966 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1968 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1969 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1970 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1971 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1974 // nvvm.ptr.gen.to.param
1975 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1976 (ins Int32Regs:$src),
1977 "mov.u32 \t$result, $src;",
1978 [(set Int32Regs:$result,
1979 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1980 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1981 (ins Int64Regs:$src),
1982 "mov.u64 \t$result, $src;",
1983 [(set Int64Regs:$result,
1984 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1987 // nvvm.move intrinsicc
1988 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1989 "mov.b16 \t$r, $s;",
1991 (int_nvvm_move_i16 Int16Regs:$s))]>;
1992 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1993 "mov.b32 \t$r, $s;",
1995 (int_nvvm_move_i32 Int32Regs:$s))]>;
1996 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1997 "mov.b64 \t$r, $s;",
1999 (int_nvvm_move_i64 Int64Regs:$s))]>;
2000 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2001 "mov.f32 \t$r, $s;",
2002 [(set Float32Regs:$r,
2003 (int_nvvm_move_float Float32Regs:$s))]>;
2004 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2005 "mov.f64 \t$r, $s;",
2006 [(set Float64Regs:$r,
2007 (int_nvvm_move_double Float64Regs:$s))]>;
2008 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2009 "mov.u32 \t$r, $s;",
2011 (int_nvvm_move_ptr Int32Regs:$s))]>;
2012 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2013 "mov.u64 \t$r, $s;",
2015 (int_nvvm_move_ptr Int64Regs:$s))]>;
2017 // @TODO: Are these actually needed, or will we always just see symbols
2018 // copied to registers first?
2019 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2020 "mov.u32 \t$r, $s;",
2022 (int_nvvm_move_ptr texternalsym:$s))]>;
2023 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2024 "mov.u64 \t$r, $s;",
2026 (int_nvvm_move_ptr texternalsym:$s))]>;*/
2029 // MoveParam %r1, param
2030 // ptr_local_to_gen %r2, %r1
2031 // ptr_gen_to_local %r3, %r2
2035 // @TODO: Revisit this. There is a type
2036 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2037 // instructions are not currently defined. However, we can use the ptr
2038 // variants and the asm printer will do the right thing.
2039 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2040 (MoveParam texternalsym:$src)))),
2041 (nvvm_move_ptr64 texternalsym:$src)>;
2042 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2043 (MoveParam texternalsym:$src)))),
2044 (nvvm_move_ptr32 texternalsym:$src)>;
2047 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2048 "mov.u64 \t$result, $src;", []>;
2050 //-----------------------------------
2051 // Compiler Error Warn
2052 // - Just ignore them in codegen
2053 //-----------------------------------
2055 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2056 "// llvm.nvvm.compiler.warn()",
2057 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2058 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2059 "// llvm.nvvm.compiler.warn()",
2060 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2061 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2062 "// llvm.nvvm.compiler.error()",
2063 [(int_nvvm_compiler_error Int32Regs:$a)]>;
2064 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2065 "// llvm.nvvm.compiler.error()",
2066 [(int_nvvm_compiler_error Int64Regs:$a)]>;
2071 def ISSPACEP_CONST_32
2072 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2073 "isspacep.const \t$d, $a;",
2074 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2075 Requires<[hasPTX31]>;
2076 def ISSPACEP_CONST_64
2077 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2078 "isspacep.const \t$d, $a;",
2079 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2080 Requires<[hasPTX31]>;
2081 def ISSPACEP_GLOBAL_32
2082 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2083 "isspacep.global \t$d, $a;",
2084 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2085 def ISSPACEP_GLOBAL_64
2086 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2087 "isspacep.global \t$d, $a;",
2088 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2089 def ISSPACEP_LOCAL_32
2090 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2091 "isspacep.local \t$d, $a;",
2092 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2093 def ISSPACEP_LOCAL_64
2094 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2095 "isspacep.local \t$d, $a;",
2096 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2097 def ISSPACEP_SHARED_32
2098 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2099 "isspacep.shared \t$d, $a;",
2100 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2101 def ISSPACEP_SHARED_64
2102 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2103 "isspacep.shared \t$d, $a;",
2104 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2107 // Special register reads
2108 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2109 (ins SpecialRegs:$r),
2110 "mov.b32 \t$d, $r;", []>;
2112 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2113 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2114 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2115 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2116 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2117 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2118 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2119 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2120 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2121 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2122 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2123 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2124 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2125 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2126 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2146 // rotate builtin support
2148 def ROTATE_B32_HW_IMM
2149 : NVPTXInst<(outs Int32Regs:$dst),
2150 (ins Int32Regs:$src, i32imm:$amt),
2151 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2152 [(set Int32Regs:$dst,
2153 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2154 Requires<[hasHWROT32]> ;
2156 def ROTATE_B32_HW_REG
2157 : NVPTXInst<(outs Int32Regs:$dst),
2158 (ins Int32Regs:$src, Int32Regs:$amt),
2159 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2160 [(set Int32Regs:$dst,
2161 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2162 Requires<[hasHWROT32]> ;
2164 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2165 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2166 Requires<[noHWROT32]> ;
2168 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2169 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2170 Requires<[noHWROT32]> ;
2172 let hasSideEffects = 0 in {
2173 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2174 !strconcat("{{\n\t",
2175 ".reg .b32 %dummy;\n\t",
2176 "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2180 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2181 !strconcat("{{\n\t",
2182 ".reg .b32 %dummy;\n\t",
2183 "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2188 let hasSideEffects = 0 in {
2190 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2191 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2194 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2195 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2196 (GET_LO_INT64 Int64Regs:$src))> ;
2198 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
2200 let hasSideEffects = 0 in {
2201 def SHF_L_WRAP_B32_IMM
2202 : NVPTXInst<(outs Int32Regs:$dst),
2203 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2204 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2205 Requires<[hasHWROT32]>;
2207 def SHF_L_WRAP_B32_REG
2208 : NVPTXInst<(outs Int32Regs:$dst),
2209 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2210 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2211 Requires<[hasHWROT32]>;
2213 def SHF_R_WRAP_B32_IMM
2214 : NVPTXInst<(outs Int32Regs:$dst),
2215 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2216 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2217 Requires<[hasHWROT32]>;
2219 def SHF_R_WRAP_B32_REG
2220 : NVPTXInst<(outs Int32Regs:$dst),
2221 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2222 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2223 Requires<[hasHWROT32]>;
2226 // HW version of rotate 64
2227 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2229 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2230 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2231 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2232 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2233 Requires<[hasHWROT32]>;
2235 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2237 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2238 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2239 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2240 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2241 Requires<[hasHWROT32]>;
2244 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2246 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2247 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2248 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2249 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2250 Requires<[hasHWROT32]>;
2252 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2254 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2255 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2256 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2257 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2258 Requires<[hasHWROT32]>;
2260 // SW version of rotate 64
2261 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2262 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2263 Requires<[noHWROT32]>;
2264 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2265 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2266 Requires<[noHWROT32]>;
2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2268 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2269 Requires<[noHWROT32]>;
2270 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2271 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2272 Requires<[noHWROT32]>;
2275 //-----------------------------------
2276 // Texture Intrinsics
2277 //-----------------------------------
2279 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2280 // also defined in NVPTXReplaceImageHandles.cpp
2282 // texmode_independent
2283 let IsTex = 1, IsTexModeUnified = 0 in {
2284 // Texture fetch instructions using handles
2286 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2287 Float32Regs:$b, Float32Regs:$a),
2288 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2289 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2292 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2293 Float32Regs:$b, Float32Regs:$a),
2294 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2295 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2297 def TEX_1D_F32_F32_LEVEL
2298 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2299 Float32Regs:$b, Float32Regs:$a),
2300 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2301 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2302 "[$t, $s, \\{$x\\}], $lod;",
2304 def TEX_1D_F32_F32_GRAD
2305 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2306 Float32Regs:$b, Float32Regs:$a),
2307 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2308 Float32Regs:$gradx, Float32Regs:$grady),
2309 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2310 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2313 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2314 Int32Regs:$b, Int32Regs:$a),
2315 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2316 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2319 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2320 Int32Regs:$b, Int32Regs:$a),
2321 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2322 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2324 def TEX_1D_S32_F32_LEVEL
2325 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2326 Int32Regs:$b, Int32Regs:$a),
2327 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2329 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2330 "[$t, $s, \\{$x\\}], $lod;",
2332 def TEX_1D_S32_F32_GRAD
2333 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2334 Int32Regs:$b, Int32Regs:$a),
2335 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2336 Float32Regs:$gradx, Float32Regs:$grady),
2337 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2338 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2341 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2342 Int32Regs:$b, Int32Regs:$a),
2343 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2344 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2347 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2348 Int32Regs:$b, Int32Regs:$a),
2349 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2350 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2352 def TEX_1D_U32_F32_LEVEL
2353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2354 Int32Regs:$b, Int32Regs:$a),
2355 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2357 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2358 "[$t, $s, \\{$x\\}], $lod;",
2360 def TEX_1D_U32_F32_GRAD
2361 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2362 Int32Regs:$b, Int32Regs:$a),
2363 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2364 Float32Regs:$gradx, Float32Regs:$grady),
2365 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2366 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2369 def TEX_1D_ARRAY_F32_S32
2370 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2371 Float32Regs:$b, Float32Regs:$a),
2372 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2373 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2374 "[$t, $s, \\{$l, $x\\}];",
2376 def TEX_1D_ARRAY_F32_F32
2377 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2378 Float32Regs:$b, Float32Regs:$a),
2379 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2380 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2381 "[$t, $s, \\{$l, $x\\}];",
2383 def TEX_1D_ARRAY_F32_F32_LEVEL
2384 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2385 Float32Regs:$b, Float32Regs:$a),
2386 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2388 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2389 "[$t, $s, \\{$l, $x\\}], $lod;",
2391 def TEX_1D_ARRAY_F32_F32_GRAD
2392 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2393 Float32Regs:$b, Float32Regs:$a),
2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2395 Float32Regs:$gradx, Float32Regs:$grady),
2396 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2397 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2399 def TEX_1D_ARRAY_S32_S32
2400 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2401 Int32Regs:$b, Int32Regs:$a),
2402 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2403 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2404 "[$t, $s, \\{$l, $x\\}];",
2406 def TEX_1D_ARRAY_S32_F32
2407 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2408 Int32Regs:$b, Int32Regs:$a),
2409 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2410 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2411 "[$t, $s, \\{$l, $x\\}];",
2413 def TEX_1D_ARRAY_S32_F32_LEVEL
2414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2415 Int32Regs:$b, Int32Regs:$a),
2416 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2418 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2419 "[$t, $s, \\{$l, $x\\}], $lod;",
2421 def TEX_1D_ARRAY_S32_F32_GRAD
2422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2423 Int32Regs:$b, Int32Regs:$a),
2424 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2425 Float32Regs:$gradx, Float32Regs:$grady),
2426 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2427 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2429 def TEX_1D_ARRAY_U32_S32
2430 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2431 Int32Regs:$b, Int32Regs:$a),
2432 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2433 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2434 "[$t, $s, \\{$l, $x\\}];",
2436 def TEX_1D_ARRAY_U32_F32
2437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2438 Int32Regs:$b, Int32Regs:$a),
2439 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2440 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2441 "[$t, $s, \\{$l, $x\\}];",
2443 def TEX_1D_ARRAY_U32_F32_LEVEL
2444 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2445 Int32Regs:$b, Int32Regs:$a),
2446 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2448 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2449 "[$t, $s, \\{$l, $x\\}], $lod;",
2451 def TEX_1D_ARRAY_U32_F32_GRAD
2452 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2453 Int32Regs:$b, Int32Regs:$a),
2454 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2455 Float32Regs:$gradx, Float32Regs:$grady),
2456 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2457 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2461 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2462 Float32Regs:$b, Float32Regs:$a),
2463 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2464 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2465 "[$t, $s, \\{$x, $y\\}];",
2468 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2469 Float32Regs:$b, Float32Regs:$a),
2470 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2471 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2472 "[$t, $s, \\{$x, $y\\}];",
2474 def TEX_2D_F32_F32_LEVEL
2475 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2476 Float32Regs:$b, Float32Regs:$a),
2477 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2479 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2480 "[$t, $s, \\{$x, $y\\}], $lod;",
2482 def TEX_2D_F32_F32_GRAD
2483 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2484 Float32Regs:$b, Float32Regs:$a),
2485 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2486 Float32Regs:$gradx0, Float32Regs:$gradx1,
2487 Float32Regs:$grady0, Float32Regs:$grady1),
2488 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2489 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2490 "\\{$grady0, $grady1\\};",
2493 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2494 Int32Regs:$b, Int32Regs:$a),
2495 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2496 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2497 "[$t, $s, \\{$x, $y\\}];",
2500 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2501 Int32Regs:$b, Int32Regs:$a),
2502 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2503 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2504 "[$t, $s, \\{$x, $y\\}];",
2506 def TEX_2D_S32_F32_LEVEL
2507 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2508 Int32Regs:$b, Int32Regs:$a),
2509 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2511 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2512 "[$t, $s, \\{$x, $y\\}], $lod;",
2514 def TEX_2D_S32_F32_GRAD
2515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2516 Int32Regs:$b, Int32Regs:$a),
2517 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2518 Float32Regs:$gradx0, Float32Regs:$gradx1,
2519 Float32Regs:$grady0, Float32Regs:$grady1),
2520 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2521 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2522 "\\{$grady0, $grady1\\};",
2525 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2526 Int32Regs:$b, Int32Regs:$a),
2527 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2528 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2529 "[$t, $s, \\{$x, $y\\}];",
2532 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2533 Int32Regs:$b, Int32Regs:$a),
2534 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2535 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2536 "[$t, $s, \\{$x, $y\\}];",
2538 def TEX_2D_U32_F32_LEVEL
2539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2540 Int32Regs:$b, Int32Regs:$a),
2541 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2543 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2544 "[$t, $s, \\{$x, $y\\}], $lod;",
2546 def TEX_2D_U32_F32_GRAD
2547 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2548 Int32Regs:$b, Int32Regs:$a),
2549 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2550 Float32Regs:$gradx0, Float32Regs:$gradx1,
2551 Float32Regs:$grady0, Float32Regs:$grady1),
2552 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2553 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2554 "\\{$grady0, $grady1\\};",
2557 def TEX_2D_ARRAY_F32_S32
2558 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2559 Float32Regs:$b, Float32Regs:$a),
2560 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2562 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2563 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2565 def TEX_2D_ARRAY_F32_F32
2566 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2567 Float32Regs:$b, Float32Regs:$a),
2568 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2570 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2571 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2573 def TEX_2D_ARRAY_F32_F32_LEVEL
2574 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2575 Float32Regs:$b, Float32Regs:$a),
2576 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2577 Float32Regs:$y, Float32Regs:$lod),
2578 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2579 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2581 def TEX_2D_ARRAY_F32_F32_GRAD
2582 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2583 Float32Regs:$b, Float32Regs:$a),
2584 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2585 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2586 Float32Regs:$grady0, Float32Regs:$grady1),
2587 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2588 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2589 "\\{$grady0, $grady1\\};",
2591 def TEX_2D_ARRAY_S32_S32
2592 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2593 Int32Regs:$b, Int32Regs:$a),
2594 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2596 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2597 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2599 def TEX_2D_ARRAY_S32_F32
2600 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2601 Int32Regs:$b, Int32Regs:$a),
2602 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2604 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2605 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2607 def TEX_2D_ARRAY_S32_F32_LEVEL
2608 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2609 Int32Regs:$b, Int32Regs:$a),
2610 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2611 Float32Regs:$y, Float32Regs:$lod),
2612 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2613 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2615 def TEX_2D_ARRAY_S32_F32_GRAD
2616 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2617 Int32Regs:$b, Int32Regs:$a),
2618 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2620 Float32Regs:$gradx0, Float32Regs:$gradx1,
2621 Float32Regs:$grady0, Float32Regs:$grady1),
2622 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2623 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2624 "\\{$grady0, $grady1\\};",
2626 def TEX_2D_ARRAY_U32_S32
2627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2628 Int32Regs:$b, Int32Regs:$a),
2629 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2631 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2632 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2634 def TEX_2D_ARRAY_U32_F32
2635 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2636 Int32Regs:$b, Int32Regs:$a),
2637 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2639 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2640 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2642 def TEX_2D_ARRAY_U32_F32_LEVEL
2643 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2644 Int32Regs:$b, Int32Regs:$a),
2645 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2646 Float32Regs:$y, Float32Regs:$lod),
2647 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2648 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2650 def TEX_2D_ARRAY_U32_F32_GRAD
2651 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2652 Int32Regs:$b, Int32Regs:$a),
2653 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2655 Float32Regs:$gradx0, Float32Regs:$gradx1,
2656 Float32Regs:$grady0, Float32Regs:$grady1),
2657 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2658 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2659 "\\{$grady0, $grady1\\};",
2663 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2664 Float32Regs:$b, Float32Regs:$a),
2665 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2667 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2668 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2671 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2672 Float32Regs:$b, Float32Regs:$a),
2673 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2675 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2676 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2678 def TEX_3D_F32_F32_LEVEL
2679 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2680 Float32Regs:$b, Float32Regs:$a),
2681 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2682 Float32Regs:$z, Float32Regs:$lod),
2683 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2684 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2686 def TEX_3D_F32_F32_GRAD
2687 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2688 Float32Regs:$b, Float32Regs:$a),
2689 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2691 Float32Regs:$gradx0, Float32Regs:$gradx1,
2692 Float32Regs:$gradx2, Float32Regs:$grady0,
2693 Float32Regs:$grady1, Float32Regs:$grady2),
2694 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2695 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2696 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2697 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2700 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2701 Int32Regs:$b, Int32Regs:$a),
2702 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2704 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2705 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2708 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2709 Int32Regs:$b, Int32Regs:$a),
2710 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2712 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2713 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2715 def TEX_3D_S32_F32_LEVEL
2716 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2717 Int32Regs:$b, Int32Regs:$a),
2718 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2719 Float32Regs:$z, Float32Regs:$lod),
2720 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2721 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2723 def TEX_3D_S32_F32_GRAD
2724 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2725 Int32Regs:$b, Int32Regs:$a),
2726 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2728 Float32Regs:$gradx0, Float32Regs:$gradx1,
2729 Float32Regs:$gradx2, Float32Regs:$grady0,
2730 Float32Regs:$grady1, Float32Regs:$grady2),
2731 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2732 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2733 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2734 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2737 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2738 Int32Regs:$b, Int32Regs:$a),
2739 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2741 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2742 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2745 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2746 Int32Regs:$b, Int32Regs:$a),
2747 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2749 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2750 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2752 def TEX_3D_U32_F32_LEVEL
2753 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2754 Int32Regs:$b, Int32Regs:$a),
2755 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2756 Float32Regs:$z, Float32Regs:$lod),
2757 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2758 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2760 def TEX_3D_U32_F32_GRAD
2761 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2762 Int32Regs:$b, Int32Regs:$a),
2763 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2765 Float32Regs:$gradx0, Float32Regs:$gradx1,
2766 Float32Regs:$gradx2, Float32Regs:$grady0,
2767 Float32Regs:$grady1, Float32Regs:$grady2),
2768 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2769 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2770 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2771 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2774 def TEX_CUBE_F32_F32
2775 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2776 Float32Regs:$b, Float32Regs:$a),
2777 (ins Int64Regs:$t, Int64Regs:$s,
2778 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2779 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2780 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2782 def TEX_CUBE_F32_F32_LEVEL
2783 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2784 Float32Regs:$b, Float32Regs:$a),
2785 (ins Int64Regs:$t, Int64Regs:$s,
2786 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2788 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2789 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2791 def TEX_CUBE_S32_F32
2792 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2793 Int32Regs:$b, Int32Regs:$a),
2794 (ins Int64Regs:$t, Int64Regs:$s,
2795 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2796 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2797 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2799 def TEX_CUBE_S32_F32_LEVEL
2800 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2801 Int32Regs:$b, Int32Regs:$a),
2802 (ins Int64Regs:$t, Int64Regs:$s,
2803 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2805 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2806 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2808 def TEX_CUBE_U32_F32
2809 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2810 Int32Regs:$b, Int32Regs:$a),
2811 (ins Int64Regs:$t, Int64Regs:$s,
2812 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2813 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2814 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2816 def TEX_CUBE_U32_F32_LEVEL
2817 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2818 Int32Regs:$b, Int32Regs:$a),
2819 (ins Int64Regs:$t, Int64Regs:$s,
2820 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2822 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2823 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2826 def TEX_CUBE_ARRAY_F32_F32
2827 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2828 Float32Regs:$b, Float32Regs:$a),
2829 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2830 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2831 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2832 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2834 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2835 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2836 Float32Regs:$b, Float32Regs:$a),
2837 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2838 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2840 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2841 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2843 def TEX_CUBE_ARRAY_S32_F32
2844 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2845 Int32Regs:$b, Int32Regs:$a),
2846 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2847 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2848 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2849 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2851 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2852 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2853 Int32Regs:$b, Int32Regs:$a),
2854 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2855 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2857 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2858 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2860 def TEX_CUBE_ARRAY_U32_F32
2861 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2862 Int32Regs:$b, Int32Regs:$a),
2863 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2864 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2865 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2866 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2868 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2869 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2870 Int32Regs:$b, Int32Regs:$a),
2871 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2872 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2874 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2875 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2878 def TLD4_R_2D_F32_F32
2879 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2880 Float32Regs:$v2, Float32Regs:$v3),
2881 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2882 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2883 "[$t, $s, \\{$x, $y\\}];",
2885 def TLD4_G_2D_F32_F32
2886 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2887 Float32Regs:$v2, Float32Regs:$v3),
2888 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2889 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2890 "[$t, $s, \\{$x, $y\\}];",
2892 def TLD4_B_2D_F32_F32
2893 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2894 Float32Regs:$v2, Float32Regs:$v3),
2895 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2896 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2897 "[$t, $s, \\{$x, $y\\}];",
2899 def TLD4_A_2D_F32_F32
2900 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2901 Float32Regs:$v2, Float32Regs:$v3),
2902 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2903 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2904 "[$t, $s, \\{$x, $y\\}];",
2906 def TLD4_R_2D_S32_F32
2907 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2908 Int32Regs:$v2, Int32Regs:$v3),
2909 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2910 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2911 "[$t, $s, \\{$x, $y\\}];",
2913 def TLD4_G_2D_S32_F32
2914 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2915 Int32Regs:$v2, Int32Regs:$v3),
2916 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2917 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2918 "[$t, $s, \\{$x, $y\\}];",
2920 def TLD4_B_2D_S32_F32
2921 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2922 Int32Regs:$v2, Int32Regs:$v3),
2923 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2924 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2925 "[$t, $s, \\{$x, $y\\}];",
2927 def TLD4_A_2D_S32_F32
2928 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2929 Int32Regs:$v2, Int32Regs:$v3),
2930 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2931 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2932 "[$t, $s, \\{$x, $y\\}];",
2934 def TLD4_R_2D_U32_F32
2935 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2936 Int32Regs:$v2, Int32Regs:$v3),
2937 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2938 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2939 "[$t, $s, \\{$x, $y\\}];",
2941 def TLD4_G_2D_U32_F32
2942 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2943 Int32Regs:$v2, Int32Regs:$v3),
2944 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2945 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2946 "[$t, $s, \\{$x, $y\\}];",
2948 def TLD4_B_2D_U32_F32
2949 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2950 Int32Regs:$v2, Int32Regs:$v3),
2951 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2952 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2953 "[$t, $s, \\{$x, $y\\}];",
2955 def TLD4_A_2D_U32_F32
2956 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2957 Int32Regs:$v2, Int32Regs:$v3),
2958 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2959 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2960 "[$t, $s, \\{$x, $y\\}];",
2966 let IsTex = 1, IsTexModeUnified = 1 in {
2967 // Texture fetch instructions using handles
2968 def TEX_UNIFIED_1D_F32_S32
2969 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2970 Float32Regs:$b, Float32Regs:$a),
2971 (ins Int64Regs:$t, Int32Regs:$x),
2972 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2974 def TEX_UNIFIED_1D_F32_F32
2975 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2976 Float32Regs:$b, Float32Regs:$a),
2977 (ins Int64Regs:$t, Float32Regs:$x),
2978 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2980 def TEX_UNIFIED_1D_F32_F32_LEVEL
2981 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2982 Float32Regs:$b, Float32Regs:$a),
2983 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2984 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2985 "[$t, \\{$x\\}], $lod;",
2987 def TEX_UNIFIED_1D_F32_F32_GRAD
2988 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2989 Float32Regs:$b, Float32Regs:$a),
2990 (ins Int64Regs:$t, Float32Regs:$x,
2991 Float32Regs:$gradx, Float32Regs:$grady),
2992 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2993 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2995 def TEX_UNIFIED_1D_S32_S32
2996 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2997 Int32Regs:$b, Int32Regs:$a),
2998 (ins Int64Regs:$t, Int32Regs:$x),
2999 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3001 def TEX_UNIFIED_1D_S32_F32
3002 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3003 Int32Regs:$b, Int32Regs:$a),
3004 (ins Int64Regs:$t, Float32Regs:$x),
3005 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3007 def TEX_UNIFIED_1D_S32_F32_LEVEL
3008 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3009 Int32Regs:$b, Int32Regs:$a),
3010 (ins Int64Regs:$t, Float32Regs:$x,
3012 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3013 "[$t, \\{$x\\}], $lod;",
3015 def TEX_UNIFIED_1D_S32_F32_GRAD
3016 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3017 Int32Regs:$b, Int32Regs:$a),
3018 (ins Int64Regs:$t, Float32Regs:$x,
3019 Float32Regs:$gradx, Float32Regs:$grady),
3020 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3021 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3023 def TEX_UNIFIED_1D_U32_S32
3024 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3025 Int32Regs:$b, Int32Regs:$a),
3026 (ins Int64Regs:$t, Int32Regs:$x),
3027 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3029 def TEX_UNIFIED_1D_U32_F32
3030 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3031 Int32Regs:$b, Int32Regs:$a),
3032 (ins Int64Regs:$t, Float32Regs:$x),
3033 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3035 def TEX_UNIFIED_1D_U32_F32_LEVEL
3036 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3037 Int32Regs:$b, Int32Regs:$a),
3038 (ins Int64Regs:$t, Float32Regs:$x,
3040 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3041 "[$t, \\{$x\\}], $lod;",
3043 def TEX_UNIFIED_1D_U32_F32_GRAD
3044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3045 Int32Regs:$b, Int32Regs:$a),
3046 (ins Int64Regs:$t, Float32Regs:$x,
3047 Float32Regs:$gradx, Float32Regs:$grady),
3048 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3049 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3052 def TEX_UNIFIED_1D_ARRAY_F32_S32
3053 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3054 Float32Regs:$b, Float32Regs:$a),
3055 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3056 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3057 "[$t, \\{$l, $x\\}];",
3059 def TEX_UNIFIED_1D_ARRAY_F32_F32
3060 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3061 Float32Regs:$b, Float32Regs:$a),
3062 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3063 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3064 "[$t, \\{$l, $x\\}];",
3066 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3067 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3068 Float32Regs:$b, Float32Regs:$a),
3069 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3071 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3072 "[$t, \\{$l, $x\\}], $lod;",
3074 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3075 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3076 Float32Regs:$b, Float32Regs:$a),
3077 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3078 Float32Regs:$gradx, Float32Regs:$grady),
3079 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3080 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3082 def TEX_UNIFIED_1D_ARRAY_S32_S32
3083 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3084 Int32Regs:$b, Int32Regs:$a),
3085 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3086 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3087 "[$t, \\{$l, $x\\}];",
3089 def TEX_UNIFIED_1D_ARRAY_S32_F32
3090 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3091 Int32Regs:$b, Int32Regs:$a),
3092 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3093 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3094 "[$t, \\{$l, $x\\}];",
3096 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3098 Int32Regs:$b, Int32Regs:$a),
3099 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3101 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3102 "[$t, \\{$l, $x\\}], $lod;",
3104 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3105 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3106 Int32Regs:$b, Int32Regs:$a),
3107 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3108 Float32Regs:$gradx, Float32Regs:$grady),
3109 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3110 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3112 def TEX_UNIFIED_1D_ARRAY_U32_S32
3113 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3114 Int32Regs:$b, Int32Regs:$a),
3115 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3116 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3117 "[$t, \\{$l, $x\\}];",
3119 def TEX_UNIFIED_1D_ARRAY_U32_F32
3120 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3121 Int32Regs:$b, Int32Regs:$a),
3122 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3123 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3124 "[$t, \\{$l, $x\\}];",
3126 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3127 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3128 Int32Regs:$b, Int32Regs:$a),
3129 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3131 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3132 "[$t, \\{$l, $x\\}], $lod;",
3134 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3135 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3136 Int32Regs:$b, Int32Regs:$a),
3137 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3138 Float32Regs:$gradx, Float32Regs:$grady),
3139 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3140 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3143 def TEX_UNIFIED_2D_F32_S32
3144 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3145 Float32Regs:$b, Float32Regs:$a),
3146 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3147 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3148 "[$t, \\{$x, $y\\}];",
3150 def TEX_UNIFIED_2D_F32_F32
3151 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3152 Float32Regs:$b, Float32Regs:$a),
3153 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3154 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3155 "[$t, \\{$x, $y\\}];",
3157 def TEX_UNIFIED_2D_F32_F32_LEVEL
3158 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3159 Float32Regs:$b, Float32Regs:$a),
3160 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3162 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3163 "[$t, \\{$x, $y\\}], $lod;",
3165 def TEX_UNIFIED_2D_F32_F32_GRAD
3166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3167 Float32Regs:$b, Float32Regs:$a),
3168 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3169 Float32Regs:$gradx0, Float32Regs:$gradx1,
3170 Float32Regs:$grady0, Float32Regs:$grady1),
3171 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3172 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3173 "\\{$grady0, $grady1\\};",
3175 def TEX_UNIFIED_2D_S32_S32
3176 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3177 Int32Regs:$b, Int32Regs:$a),
3178 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3179 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3180 "[$t, \\{$x, $y\\}];",
3182 def TEX_UNIFIED_2D_S32_F32
3183 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3184 Int32Regs:$b, Int32Regs:$a),
3185 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3186 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3187 "[$t, \\{$x, $y\\}];",
3189 def TEX_UNIFIED_2D_S32_F32_LEVEL
3190 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3191 Int32Regs:$b, Int32Regs:$a),
3192 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3194 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3195 "[$t, \\{$x, $y\\}], $lod;",
3197 def TEX_UNIFIED_2D_S32_F32_GRAD
3198 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3199 Int32Regs:$b, Int32Regs:$a),
3200 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3201 Float32Regs:$gradx0, Float32Regs:$gradx1,
3202 Float32Regs:$grady0, Float32Regs:$grady1),
3203 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3204 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3205 "\\{$grady0, $grady1\\};",
3207 def TEX_UNIFIED_2D_U32_S32
3208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3209 Int32Regs:$b, Int32Regs:$a),
3210 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3211 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3212 "[$t, \\{$x, $y\\}];",
3214 def TEX_UNIFIED_2D_U32_F32
3215 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3216 Int32Regs:$b, Int32Regs:$a),
3217 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3218 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3219 "[$t, \\{$x, $y\\}];",
3221 def TEX_UNIFIED_2D_U32_F32_LEVEL
3222 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3223 Int32Regs:$b, Int32Regs:$a),
3224 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3226 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3227 "[$t, \\{$x, $y\\}], $lod;",
3229 def TEX_UNIFIED_2D_U32_F32_GRAD
3230 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3231 Int32Regs:$b, Int32Regs:$a),
3232 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3233 Float32Regs:$gradx0, Float32Regs:$gradx1,
3234 Float32Regs:$grady0, Float32Regs:$grady1),
3235 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3236 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3237 "\\{$grady0, $grady1\\};",
3240 def TEX_UNIFIED_2D_ARRAY_F32_S32
3241 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3242 Float32Regs:$b, Float32Regs:$a),
3243 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3245 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3246 "[$t, \\{$l, $x, $y, $y\\}];",
3248 def TEX_UNIFIED_2D_ARRAY_F32_F32
3249 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3250 Float32Regs:$b, Float32Regs:$a),
3251 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3253 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3254 "[$t, \\{$l, $x, $y, $y\\}];",
3256 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3257 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3258 Float32Regs:$b, Float32Regs:$a),
3259 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3260 Float32Regs:$y, Float32Regs:$lod),
3261 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3262 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3264 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3265 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3266 Float32Regs:$b, Float32Regs:$a),
3267 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3268 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3269 Float32Regs:$grady0, Float32Regs:$grady1),
3270 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3271 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3272 "\\{$grady0, $grady1\\};",
3274 def TEX_UNIFIED_2D_ARRAY_S32_S32
3275 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3276 Int32Regs:$b, Int32Regs:$a),
3277 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3279 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3280 "[$t, \\{$l, $x, $y, $y\\}];",
3282 def TEX_UNIFIED_2D_ARRAY_S32_F32
3283 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3284 Int32Regs:$b, Int32Regs:$a),
3285 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3287 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3288 "[$t, \\{$l, $x, $y, $y\\}];",
3290 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3291 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3292 Int32Regs:$b, Int32Regs:$a),
3293 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3294 Float32Regs:$y, Float32Regs:$lod),
3295 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3296 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3298 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3299 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3300 Int32Regs:$b, Int32Regs:$a),
3301 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3303 Float32Regs:$gradx0, Float32Regs:$gradx1,
3304 Float32Regs:$grady0, Float32Regs:$grady1),
3305 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3306 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3307 "\\{$grady0, $grady1\\};",
3309 def TEX_UNIFIED_2D_ARRAY_U32_S32
3310 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3311 Int32Regs:$b, Int32Regs:$a),
3312 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3314 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3315 "[$t, \\{$l, $x, $y, $y\\}];",
3317 def TEX_UNIFIED_2D_ARRAY_U32_F32
3318 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3319 Int32Regs:$b, Int32Regs:$a),
3320 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3322 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3323 "[$t, \\{$l, $x, $y, $y\\}];",
3325 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3326 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3327 Int32Regs:$b, Int32Regs:$a),
3328 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3329 Float32Regs:$y, Float32Regs:$lod),
3330 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3331 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3333 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3334 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3335 Int32Regs:$b, Int32Regs:$a),
3336 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3338 Float32Regs:$gradx0, Float32Regs:$gradx1,
3339 Float32Regs:$grady0, Float32Regs:$grady1),
3340 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3341 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3342 "\\{$grady0, $grady1\\};",
3345 def TEX_UNIFIED_3D_F32_S32
3346 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3347 Float32Regs:$b, Float32Regs:$a),
3348 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3350 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3351 "[$t, \\{$x, $y, $z, $z\\}];",
3353 def TEX_UNIFIED_3D_F32_F32
3354 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3355 Float32Regs:$b, Float32Regs:$a),
3356 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3358 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3359 "[$t, \\{$x, $y, $z, $z\\}];",
3361 def TEX_UNIFIED_3D_F32_F32_LEVEL
3362 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3363 Float32Regs:$b, Float32Regs:$a),
3364 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3365 Float32Regs:$z, Float32Regs:$lod),
3366 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3367 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3369 def TEX_UNIFIED_3D_F32_F32_GRAD
3370 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3371 Float32Regs:$b, Float32Regs:$a),
3372 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3374 Float32Regs:$gradx0, Float32Regs:$gradx1,
3375 Float32Regs:$gradx2, Float32Regs:$grady0,
3376 Float32Regs:$grady1, Float32Regs:$grady2),
3377 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3378 "[$t, \\{$x, $y, $z, $z\\}], "
3379 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3380 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3382 def TEX_UNIFIED_3D_S32_S32
3383 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3384 Int32Regs:$b, Int32Regs:$a),
3385 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3387 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3388 "[$t, \\{$x, $y, $z, $z\\}];",
3390 def TEX_UNIFIED_3D_S32_F32
3391 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3392 Int32Regs:$b, Int32Regs:$a),
3393 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3395 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3396 "[$t, \\{$x, $y, $z, $z\\}];",
3398 def TEX_UNIFIED_3D_S32_F32_LEVEL
3399 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3400 Int32Regs:$b, Int32Regs:$a),
3401 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3402 Float32Regs:$z, Float32Regs:$lod),
3403 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3404 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3406 def TEX_UNIFIED_3D_S32_F32_GRAD
3407 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3408 Int32Regs:$b, Int32Regs:$a),
3409 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3411 Float32Regs:$gradx0, Float32Regs:$gradx1,
3412 Float32Regs:$gradx2, Float32Regs:$grady0,
3413 Float32Regs:$grady1, Float32Regs:$grady2),
3414 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3415 "[$t, \\{$x, $y, $z, $z\\}], "
3416 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3417 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3419 def TEX_UNIFIED_3D_U32_S32
3420 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3421 Int32Regs:$b, Int32Regs:$a),
3422 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3424 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3425 "[$t, \\{$x, $y, $z, $z\\}];",
3427 def TEX_UNIFIED_3D_U32_F32
3428 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3429 Int32Regs:$b, Int32Regs:$a),
3430 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3432 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3433 "[$t, \\{$x, $y, $z, $z\\}];",
3435 def TEX_UNIFIED_3D_U32_F32_LEVEL
3436 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3437 Int32Regs:$b, Int32Regs:$a),
3438 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3439 Float32Regs:$z, Float32Regs:$lod),
3440 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3441 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3443 def TEX_UNIFIED_3D_U32_F32_GRAD
3444 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3445 Int32Regs:$b, Int32Regs:$a),
3446 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3448 Float32Regs:$gradx0, Float32Regs:$gradx1,
3449 Float32Regs:$gradx2, Float32Regs:$grady0,
3450 Float32Regs:$grady1, Float32Regs:$grady2),
3451 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3452 "[$t, \\{$x, $y, $z, $z\\}], "
3453 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3454 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3457 def TEX_UNIFIED_CUBE_F32_F32
3458 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3459 Float32Regs:$b, Float32Regs:$a),
3461 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3462 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3463 "[$t, \\{$x, $y, $z, $z\\}];",
3465 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3466 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3467 Float32Regs:$b, Float32Regs:$a),
3469 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3471 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3472 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3474 def TEX_UNIFIED_CUBE_S32_F32
3475 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3476 Int32Regs:$b, Int32Regs:$a),
3478 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3479 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3480 "[$t, \\{$x, $y, $z, $z\\}];",
3482 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3483 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3484 Int32Regs:$b, Int32Regs:$a),
3486 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3488 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3489 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3491 def TEX_UNIFIED_CUBE_U32_F32
3492 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3493 Int32Regs:$b, Int32Regs:$a),
3495 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3496 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3497 "[$t, \\{$x, $y, $z, $z\\}];",
3499 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3500 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3501 Int32Regs:$b, Int32Regs:$a),
3503 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3505 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3506 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3509 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3510 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3511 Float32Regs:$b, Float32Regs:$a),
3512 (ins Int64Regs:$t, Int32Regs:$l,
3513 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3514 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3515 "[$t, \\{$l, $x, $y, $z\\}];",
3517 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3518 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3519 Float32Regs:$b, Float32Regs:$a),
3520 (ins Int64Regs:$t, Int32Regs:$l,
3521 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3523 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3524 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3526 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3527 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3528 Int32Regs:$b, Int32Regs:$a),
3529 (ins Int64Regs:$t, Int32Regs:$l,
3530 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3531 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3532 "[$t, \\{$l, $x, $y, $z\\}];",
3534 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3535 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3536 Int32Regs:$b, Int32Regs:$a),
3537 (ins Int64Regs:$t, Int32Regs:$l,
3538 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3540 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3541 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3543 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3544 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3545 Int32Regs:$b, Int32Regs:$a),
3546 (ins Int64Regs:$t, Int32Regs:$l,
3547 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3548 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3549 "[$t, \\{$l, $x, $y, $z\\}];",
3551 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3552 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3553 Int32Regs:$b, Int32Regs:$a),
3554 (ins Int64Regs:$t, Int32Regs:$l,
3555 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3557 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3558 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3561 def TLD4_UNIFIED_R_2D_F32_F32
3562 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3563 Float32Regs:$v2, Float32Regs:$v3),
3564 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3565 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3566 "[$t, \\{$x, $y\\}];",
3568 def TLD4_UNIFIED_G_2D_F32_F32
3569 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3570 Float32Regs:$v2, Float32Regs:$v3),
3571 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3572 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3573 "[$t, \\{$x, $y\\}];",
3575 def TLD4_UNIFIED_B_2D_F32_F32
3576 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3577 Float32Regs:$v2, Float32Regs:$v3),
3578 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3579 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3580 "[$t, \\{$x, $y\\}];",
3582 def TLD4_UNIFIED_A_2D_F32_F32
3583 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3584 Float32Regs:$v2, Float32Regs:$v3),
3585 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3586 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3587 "[$t, \\{$x, $y\\}];",
3589 def TLD4_UNIFIED_R_2D_S32_F32
3590 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3591 Int32Regs:$v2, Int32Regs:$v3),
3592 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3593 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3594 "[$t, \\{$x, $y\\}];",
3596 def TLD4_UNIFIED_G_2D_S32_F32
3597 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3598 Int32Regs:$v2, Int32Regs:$v3),
3599 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3600 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3601 "[$t, \\{$x, $y\\}];",
3603 def TLD4_UNIFIED_B_2D_S32_F32
3604 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3605 Int32Regs:$v2, Int32Regs:$v3),
3606 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3607 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3608 "[$t, \\{$x, $y\\}];",
3610 def TLD4_UNIFIED_A_2D_S32_F32
3611 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3612 Int32Regs:$v2, Int32Regs:$v3),
3613 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3614 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3615 "[$t, \\{$x, $y\\}];",
3617 def TLD4_UNIFIED_R_2D_U32_F32
3618 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3619 Int32Regs:$v2, Int32Regs:$v3),
3620 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3621 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3622 "[$t, \\{$x, $y\\}];",
3624 def TLD4_UNIFIED_G_2D_U32_F32
3625 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3626 Int32Regs:$v2, Int32Regs:$v3),
3627 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3628 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3629 "[$t, \\{$x, $y\\}];",
3631 def TLD4_UNIFIED_B_2D_U32_F32
3632 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3633 Int32Regs:$v2, Int32Regs:$v3),
3634 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3635 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3636 "[$t, \\{$x, $y\\}];",
3638 def TLD4_UNIFIED_A_2D_U32_F32
3639 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3640 Int32Regs:$v2, Int32Regs:$v3),
3641 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3642 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3643 "[$t, \\{$x, $y\\}];",
3649 //=== Surface load instructions
3652 def SULD_1D_I8_CLAMP
3653 : NVPTXInst<(outs Int16Regs:$r),
3654 (ins Int64Regs:$s, Int32Regs:$x),
3655 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3657 def SULD_1D_I16_CLAMP
3658 : NVPTXInst<(outs Int16Regs:$r),
3659 (ins Int64Regs:$s, Int32Regs:$x),
3660 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3662 def SULD_1D_I32_CLAMP
3663 : NVPTXInst<(outs Int32Regs:$r),
3664 (ins Int64Regs:$s, Int32Regs:$x),
3665 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3667 def SULD_1D_I64_CLAMP
3668 : NVPTXInst<(outs Int64Regs:$r),
3669 (ins Int64Regs:$s, Int32Regs:$x),
3670 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3673 def SULD_1D_ARRAY_I8_CLAMP
3674 : NVPTXInst<(outs Int16Regs:$r),
3675 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3676 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3678 def SULD_1D_ARRAY_I16_CLAMP
3679 : NVPTXInst<(outs Int16Regs:$r),
3680 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3681 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3683 def SULD_1D_ARRAY_I32_CLAMP
3684 : NVPTXInst<(outs Int32Regs:$r),
3685 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3686 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3688 def SULD_1D_ARRAY_I64_CLAMP
3689 : NVPTXInst<(outs Int64Regs:$r),
3690 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3691 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3694 def SULD_2D_I8_CLAMP
3695 : NVPTXInst<(outs Int16Regs:$r),
3696 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3697 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3699 def SULD_2D_I16_CLAMP
3700 : NVPTXInst<(outs Int16Regs:$r),
3701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3702 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3704 def SULD_2D_I32_CLAMP
3705 : NVPTXInst<(outs Int32Regs:$r),
3706 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3707 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3709 def SULD_2D_I64_CLAMP
3710 : NVPTXInst<(outs Int64Regs:$r),
3711 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3712 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3715 def SULD_2D_ARRAY_I8_CLAMP
3716 : NVPTXInst<(outs Int16Regs:$r),
3717 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3718 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3720 def SULD_2D_ARRAY_I16_CLAMP
3721 : NVPTXInst<(outs Int16Regs:$r),
3722 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3723 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3725 def SULD_2D_ARRAY_I32_CLAMP
3726 : NVPTXInst<(outs Int32Regs:$r),
3727 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3728 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3730 def SULD_2D_ARRAY_I64_CLAMP
3731 : NVPTXInst<(outs Int64Regs:$r),
3732 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3733 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3736 def SULD_3D_I8_CLAMP
3737 : NVPTXInst<(outs Int16Regs:$r),
3738 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3739 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3741 def SULD_3D_I16_CLAMP
3742 : NVPTXInst<(outs Int16Regs:$r),
3743 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3744 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3746 def SULD_3D_I32_CLAMP
3747 : NVPTXInst<(outs Int32Regs:$r),
3748 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3749 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3751 def SULD_3D_I64_CLAMP
3752 : NVPTXInst<(outs Int64Regs:$r),
3753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3754 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3759 def SULD_1D_V2I8_CLAMP
3760 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3761 (ins Int64Regs:$s, Int32Regs:$x),
3762 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3764 def SULD_1D_V2I16_CLAMP
3765 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3766 (ins Int64Regs:$s, Int32Regs:$x),
3767 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3769 def SULD_1D_V2I32_CLAMP
3770 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3771 (ins Int64Regs:$s, Int32Regs:$x),
3772 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3774 def SULD_1D_V2I64_CLAMP
3775 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3776 (ins Int64Regs:$s, Int32Regs:$x),
3777 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3780 def SULD_1D_ARRAY_V2I8_CLAMP
3781 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3782 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3783 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3785 def SULD_1D_ARRAY_V2I16_CLAMP
3786 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3787 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3788 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3790 def SULD_1D_ARRAY_V2I32_CLAMP
3791 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3792 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3793 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3795 def SULD_1D_ARRAY_V2I64_CLAMP
3796 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3797 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3798 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3801 def SULD_2D_V2I8_CLAMP
3802 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3804 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3806 def SULD_2D_V2I16_CLAMP
3807 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3808 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3809 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3811 def SULD_2D_V2I32_CLAMP
3812 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3813 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3814 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3816 def SULD_2D_V2I64_CLAMP
3817 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3819 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3822 def SULD_2D_ARRAY_V2I8_CLAMP
3823 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3824 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3825 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3826 "[$s, \\{$l, $x, $y, $y\\}];",
3828 def SULD_2D_ARRAY_V2I16_CLAMP
3829 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3830 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3831 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3832 "[$s, \\{$l, $x, $y, $y\\}];",
3834 def SULD_2D_ARRAY_V2I32_CLAMP
3835 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3836 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3837 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3838 "[$s, \\{$l, $x, $y, $y\\}];",
3840 def SULD_2D_ARRAY_V2I64_CLAMP
3841 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3842 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3843 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3844 "[$s, \\{$l, $x, $y, $y\\}];",
3847 def SULD_3D_V2I8_CLAMP
3848 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3849 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3850 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3852 def SULD_3D_V2I16_CLAMP
3853 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3854 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3855 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3857 def SULD_3D_V2I32_CLAMP
3858 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3859 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3860 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3862 def SULD_3D_V2I64_CLAMP
3863 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3864 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3865 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3870 def SULD_1D_V4I8_CLAMP
3871 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3872 (ins Int64Regs:$s, Int32Regs:$x),
3873 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3875 def SULD_1D_V4I16_CLAMP
3876 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3877 (ins Int64Regs:$s, Int32Regs:$x),
3878 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3880 def SULD_1D_V4I32_CLAMP
3881 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3882 (ins Int64Regs:$s, Int32Regs:$x),
3883 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3886 def SULD_1D_ARRAY_V4I8_CLAMP
3887 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3888 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3889 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3890 "[$s, \\{$l, $x\\}];",
3892 def SULD_1D_ARRAY_V4I16_CLAMP
3893 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3894 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3895 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3896 "[$s, \\{$l, $x\\}];",
3898 def SULD_1D_ARRAY_V4I32_CLAMP
3899 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3900 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3901 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3902 "[$s, \\{$l, $x\\}];",
3905 def SULD_2D_V4I8_CLAMP
3906 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3907 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3908 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3910 def SULD_2D_V4I16_CLAMP
3911 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3912 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3913 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3915 def SULD_2D_V4I32_CLAMP
3916 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3918 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3921 def SULD_2D_ARRAY_V4I8_CLAMP
3922 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3923 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3924 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3925 "[$s, \\{$l, $x, $y, $y\\}];",
3927 def SULD_2D_ARRAY_V4I16_CLAMP
3928 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3929 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3930 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3931 "[$s, \\{$l, $x, $y, $y\\}];",
3933 def SULD_2D_ARRAY_V4I32_CLAMP
3934 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3935 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3936 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3937 "[$s, \\{$l, $x, $y, $y\\}];",
3941 def SULD_3D_V4I8_CLAMP
3942 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3944 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3945 "[$s, \\{$x, $y, $z, $z\\}];",
3947 def SULD_3D_V4I16_CLAMP
3948 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3949 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3950 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3951 "[$s, \\{$x, $y, $z, $z\\}];",
3953 def SULD_3D_V4I32_CLAMP
3954 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3955 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3956 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3957 "[$s, \\{$x, $y, $z, $z\\}];",
3965 : NVPTXInst<(outs Int16Regs:$r),
3966 (ins Int64Regs:$s, Int32Regs:$x),
3967 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3969 def SULD_1D_I16_TRAP
3970 : NVPTXInst<(outs Int16Regs:$r),
3971 (ins Int64Regs:$s, Int32Regs:$x),
3972 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3974 def SULD_1D_I32_TRAP
3975 : NVPTXInst<(outs Int32Regs:$r),
3976 (ins Int64Regs:$s, Int32Regs:$x),
3977 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3979 def SULD_1D_I64_TRAP
3980 : NVPTXInst<(outs Int64Regs:$r),
3981 (ins Int64Regs:$s, Int32Regs:$x),
3982 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3985 def SULD_1D_ARRAY_I8_TRAP
3986 : NVPTXInst<(outs Int16Regs:$r),
3987 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3988 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3990 def SULD_1D_ARRAY_I16_TRAP
3991 : NVPTXInst<(outs Int16Regs:$r),
3992 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3993 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3995 def SULD_1D_ARRAY_I32_TRAP
3996 : NVPTXInst<(outs Int32Regs:$r),
3997 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3998 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4000 def SULD_1D_ARRAY_I64_TRAP
4001 : NVPTXInst<(outs Int64Regs:$r),
4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4003 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
4007 : NVPTXInst<(outs Int16Regs:$r),
4008 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4009 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4011 def SULD_2D_I16_TRAP
4012 : NVPTXInst<(outs Int16Regs:$r),
4013 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4014 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4016 def SULD_2D_I32_TRAP
4017 : NVPTXInst<(outs Int32Regs:$r),
4018 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4019 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4021 def SULD_2D_I64_TRAP
4022 : NVPTXInst<(outs Int64Regs:$r),
4023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4024 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
4027 def SULD_2D_ARRAY_I8_TRAP
4028 : NVPTXInst<(outs Int16Regs:$r),
4029 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4030 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4032 def SULD_2D_ARRAY_I16_TRAP
4033 : NVPTXInst<(outs Int16Regs:$r),
4034 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4035 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4037 def SULD_2D_ARRAY_I32_TRAP
4038 : NVPTXInst<(outs Int32Regs:$r),
4039 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4040 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4042 def SULD_2D_ARRAY_I64_TRAP
4043 : NVPTXInst<(outs Int64Regs:$r),
4044 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4045 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4049 : NVPTXInst<(outs Int16Regs:$r),
4050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4051 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4053 def SULD_3D_I16_TRAP
4054 : NVPTXInst<(outs Int16Regs:$r),
4055 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4056 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4058 def SULD_3D_I32_TRAP
4059 : NVPTXInst<(outs Int32Regs:$r),
4060 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4061 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4063 def SULD_3D_I64_TRAP
4064 : NVPTXInst<(outs Int64Regs:$r),
4065 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4066 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4071 def SULD_1D_V2I8_TRAP
4072 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4073 (ins Int64Regs:$s, Int32Regs:$x),
4074 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4076 def SULD_1D_V2I16_TRAP
4077 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4078 (ins Int64Regs:$s, Int32Regs:$x),
4079 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4081 def SULD_1D_V2I32_TRAP
4082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4083 (ins Int64Regs:$s, Int32Regs:$x),
4084 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4086 def SULD_1D_V2I64_TRAP
4087 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4088 (ins Int64Regs:$s, Int32Regs:$x),
4089 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4092 def SULD_1D_ARRAY_V2I8_TRAP
4093 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4094 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4095 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4097 def SULD_1D_ARRAY_V2I16_TRAP
4098 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4099 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4100 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4102 def SULD_1D_ARRAY_V2I32_TRAP
4103 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4104 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4105 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4107 def SULD_1D_ARRAY_V2I64_TRAP
4108 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4110 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4113 def SULD_2D_V2I8_TRAP
4114 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4115 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4116 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4118 def SULD_2D_V2I16_TRAP
4119 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4120 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4121 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4123 def SULD_2D_V2I32_TRAP
4124 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4125 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4126 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4128 def SULD_2D_V2I64_TRAP
4129 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4130 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4131 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4134 def SULD_2D_ARRAY_V2I8_TRAP
4135 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4136 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4137 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4138 "[$s, \\{$l, $x, $y, $y\\}];",
4140 def SULD_2D_ARRAY_V2I16_TRAP
4141 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4142 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4143 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4144 "[$s, \\{$l, $x, $y, $y\\}];",
4146 def SULD_2D_ARRAY_V2I32_TRAP
4147 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4148 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4149 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4150 "[$s, \\{$l, $x, $y, $y\\}];",
4152 def SULD_2D_ARRAY_V2I64_TRAP
4153 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4154 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4155 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4156 "[$s, \\{$l, $x, $y, $y\\}];",
4159 def SULD_3D_V2I8_TRAP
4160 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4161 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4162 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4164 def SULD_3D_V2I16_TRAP
4165 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4166 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4167 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4169 def SULD_3D_V2I32_TRAP
4170 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4171 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4172 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4174 def SULD_3D_V2I64_TRAP
4175 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4176 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4177 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4182 def SULD_1D_V4I8_TRAP
4183 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4184 (ins Int64Regs:$s, Int32Regs:$x),
4185 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4187 def SULD_1D_V4I16_TRAP
4188 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4189 (ins Int64Regs:$s, Int32Regs:$x),
4190 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4192 def SULD_1D_V4I32_TRAP
4193 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4194 (ins Int64Regs:$s, Int32Regs:$x),
4195 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4198 def SULD_1D_ARRAY_V4I8_TRAP
4199 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4200 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4201 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4202 "[$s, \\{$l, $x\\}];",
4204 def SULD_1D_ARRAY_V4I16_TRAP
4205 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4206 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4207 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4208 "[$s, \\{$l, $x\\}];",
4210 def SULD_1D_ARRAY_V4I32_TRAP
4211 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4212 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4213 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4214 "[$s, \\{$l, $x\\}];",
4217 def SULD_2D_V4I8_TRAP
4218 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4220 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4222 def SULD_2D_V4I16_TRAP
4223 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4224 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4225 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4227 def SULD_2D_V4I32_TRAP
4228 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4229 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4230 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4233 def SULD_2D_ARRAY_V4I8_TRAP
4234 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4235 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4236 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4237 "[$s, \\{$l, $x, $y, $y\\}];",
4239 def SULD_2D_ARRAY_V4I16_TRAP
4240 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4241 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4242 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4243 "[$s, \\{$l, $x, $y, $y\\}];",
4245 def SULD_2D_ARRAY_V4I32_TRAP
4246 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4247 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4248 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4249 "[$s, \\{$l, $x, $y, $y\\}];",
4253 def SULD_3D_V4I8_TRAP
4254 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4255 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4256 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4257 "[$s, \\{$x, $y, $z, $z\\}];",
4259 def SULD_3D_V4I16_TRAP
4260 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4261 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4262 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4263 "[$s, \\{$x, $y, $z, $z\\}];",
4265 def SULD_3D_V4I32_TRAP
4266 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4267 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4268 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4269 "[$s, \\{$x, $y, $z, $z\\}];",
4276 : NVPTXInst<(outs Int16Regs:$r),
4277 (ins Int64Regs:$s, Int32Regs:$x),
4278 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4280 def SULD_1D_I16_ZERO
4281 : NVPTXInst<(outs Int16Regs:$r),
4282 (ins Int64Regs:$s, Int32Regs:$x),
4283 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4285 def SULD_1D_I32_ZERO
4286 : NVPTXInst<(outs Int32Regs:$r),
4287 (ins Int64Regs:$s, Int32Regs:$x),
4288 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4290 def SULD_1D_I64_ZERO
4291 : NVPTXInst<(outs Int64Regs:$r),
4292 (ins Int64Regs:$s, Int32Regs:$x),
4293 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4296 def SULD_1D_ARRAY_I8_ZERO
4297 : NVPTXInst<(outs Int16Regs:$r),
4298 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4299 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4301 def SULD_1D_ARRAY_I16_ZERO
4302 : NVPTXInst<(outs Int16Regs:$r),
4303 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4304 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4306 def SULD_1D_ARRAY_I32_ZERO
4307 : NVPTXInst<(outs Int32Regs:$r),
4308 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4309 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4311 def SULD_1D_ARRAY_I64_ZERO
4312 : NVPTXInst<(outs Int64Regs:$r),
4313 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4314 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4318 : NVPTXInst<(outs Int16Regs:$r),
4319 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4320 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4322 def SULD_2D_I16_ZERO
4323 : NVPTXInst<(outs Int16Regs:$r),
4324 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4325 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4327 def SULD_2D_I32_ZERO
4328 : NVPTXInst<(outs Int32Regs:$r),
4329 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4330 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4332 def SULD_2D_I64_ZERO
4333 : NVPTXInst<(outs Int64Regs:$r),
4334 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4335 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4338 def SULD_2D_ARRAY_I8_ZERO
4339 : NVPTXInst<(outs Int16Regs:$r),
4340 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4341 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4343 def SULD_2D_ARRAY_I16_ZERO
4344 : NVPTXInst<(outs Int16Regs:$r),
4345 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4346 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4348 def SULD_2D_ARRAY_I32_ZERO
4349 : NVPTXInst<(outs Int32Regs:$r),
4350 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4351 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4353 def SULD_2D_ARRAY_I64_ZERO
4354 : NVPTXInst<(outs Int64Regs:$r),
4355 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4356 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4360 : NVPTXInst<(outs Int16Regs:$r),
4361 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4362 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4364 def SULD_3D_I16_ZERO
4365 : NVPTXInst<(outs Int16Regs:$r),
4366 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4367 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4369 def SULD_3D_I32_ZERO
4370 : NVPTXInst<(outs Int32Regs:$r),
4371 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4372 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4374 def SULD_3D_I64_ZERO
4375 : NVPTXInst<(outs Int64Regs:$r),
4376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4377 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4382 def SULD_1D_V2I8_ZERO
4383 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4384 (ins Int64Regs:$s, Int32Regs:$x),
4385 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4387 def SULD_1D_V2I16_ZERO
4388 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4389 (ins Int64Regs:$s, Int32Regs:$x),
4390 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4392 def SULD_1D_V2I32_ZERO
4393 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4394 (ins Int64Regs:$s, Int32Regs:$x),
4395 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4397 def SULD_1D_V2I64_ZERO
4398 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4399 (ins Int64Regs:$s, Int32Regs:$x),
4400 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4403 def SULD_1D_ARRAY_V2I8_ZERO
4404 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4405 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4406 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4408 def SULD_1D_ARRAY_V2I16_ZERO
4409 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4410 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4411 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4413 def SULD_1D_ARRAY_V2I32_ZERO
4414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4415 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4416 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4418 def SULD_1D_ARRAY_V2I64_ZERO
4419 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4420 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4421 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4424 def SULD_2D_V2I8_ZERO
4425 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4426 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4427 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4429 def SULD_2D_V2I16_ZERO
4430 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4431 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4432 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4434 def SULD_2D_V2I32_ZERO
4435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4436 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4437 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4439 def SULD_2D_V2I64_ZERO
4440 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4441 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4442 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4445 def SULD_2D_ARRAY_V2I8_ZERO
4446 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4447 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4448 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4449 "[$s, \\{$l, $x, $y, $y\\}];",
4451 def SULD_2D_ARRAY_V2I16_ZERO
4452 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4453 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4454 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4455 "[$s, \\{$l, $x, $y, $y\\}];",
4457 def SULD_2D_ARRAY_V2I32_ZERO
4458 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4459 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4460 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4461 "[$s, \\{$l, $x, $y, $y\\}];",
4463 def SULD_2D_ARRAY_V2I64_ZERO
4464 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4465 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4466 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4467 "[$s, \\{$l, $x, $y, $y\\}];",
4470 def SULD_3D_V2I8_ZERO
4471 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4472 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4473 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4475 def SULD_3D_V2I16_ZERO
4476 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4477 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4478 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4480 def SULD_3D_V2I32_ZERO
4481 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4482 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4483 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4485 def SULD_3D_V2I64_ZERO
4486 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4487 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4488 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4493 def SULD_1D_V4I8_ZERO
4494 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4495 (ins Int64Regs:$s, Int32Regs:$x),
4496 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4498 def SULD_1D_V4I16_ZERO
4499 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4500 (ins Int64Regs:$s, Int32Regs:$x),
4501 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4503 def SULD_1D_V4I32_ZERO
4504 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4505 (ins Int64Regs:$s, Int32Regs:$x),
4506 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4509 def SULD_1D_ARRAY_V4I8_ZERO
4510 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4511 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4512 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4513 "[$s, \\{$l, $x\\}];",
4515 def SULD_1D_ARRAY_V4I16_ZERO
4516 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4517 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4518 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4519 "[$s, \\{$l, $x\\}];",
4521 def SULD_1D_ARRAY_V4I32_ZERO
4522 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4523 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4524 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4525 "[$s, \\{$l, $x\\}];",
4528 def SULD_2D_V4I8_ZERO
4529 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4531 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4533 def SULD_2D_V4I16_ZERO
4534 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4535 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4536 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4538 def SULD_2D_V4I32_ZERO
4539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4540 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4541 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4544 def SULD_2D_ARRAY_V4I8_ZERO
4545 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4546 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4547 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4548 "[$s, \\{$l, $x, $y, $y\\}];",
4550 def SULD_2D_ARRAY_V4I16_ZERO
4551 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4552 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4553 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4554 "[$s, \\{$l, $x, $y, $y\\}];",
4556 def SULD_2D_ARRAY_V4I32_ZERO
4557 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4558 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4559 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4560 "[$s, \\{$l, $x, $y, $y\\}];",
4564 def SULD_3D_V4I8_ZERO
4565 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4566 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4567 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4568 "[$s, \\{$x, $y, $z, $z\\}];",
4570 def SULD_3D_V4I16_ZERO
4571 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4572 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4573 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4574 "[$s, \\{$x, $y, $z, $z\\}];",
4576 def SULD_3D_V4I32_ZERO
4577 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4578 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4579 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4580 "[$s, \\{$x, $y, $z, $z\\}];",
4584 //-----------------------------------
4585 // Texture Query Intrinsics
4586 //-----------------------------------
4588 let IsSurfTexQuery = 1 in {
4589 def TXQ_CHANNEL_ORDER
4590 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4591 "txq.channel_order.b32 \t$d, [$a];",
4593 def TXQ_CHANNEL_DATA_TYPE
4594 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4595 "txq.channel_data_type.b32 \t$d, [$a];",
4598 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4599 "txq.width.b32 \t$d, [$a];",
4602 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4603 "txq.height.b32 \t$d, [$a];",
4606 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4607 "txq.depth.b32 \t$d, [$a];",
4610 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4611 "txq.array_size.b32 \t$d, [$a];",
4614 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4615 "txq.num_samples.b32 \t$d, [$a];",
4617 def TXQ_NUM_MIPMAP_LEVELS
4618 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4619 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4623 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4624 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4625 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4626 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4627 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4628 (TXQ_WIDTH Int64Regs:$a)>;
4629 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4630 (TXQ_HEIGHT Int64Regs:$a)>;
4631 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4632 (TXQ_DEPTH Int64Regs:$a)>;
4633 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4634 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4635 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4636 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4637 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4638 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4641 //-----------------------------------
4642 // Surface Query Intrinsics
4643 //-----------------------------------
4645 let IsSurfTexQuery = 1 in {
4646 def SUQ_CHANNEL_ORDER
4647 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4648 "suq.channel_order.b32 \t$d, [$a];",
4650 def SUQ_CHANNEL_DATA_TYPE
4651 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4652 "suq.channel_data_type.b32 \t$d, [$a];",
4655 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4656 "suq.width.b32 \t$d, [$a];",
4659 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4660 "suq.height.b32 \t$d, [$a];",
4663 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4664 "suq.depth.b32 \t$d, [$a];",
4667 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4668 "suq.array_size.b32 \t$d, [$a];",
4672 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4673 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4674 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4675 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4676 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4677 (SUQ_WIDTH Int64Regs:$a)>;
4678 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4679 (SUQ_HEIGHT Int64Regs:$a)>;
4680 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4681 (SUQ_DEPTH Int64Regs:$a)>;
4682 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4683 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4686 //===- Handle Query -------------------------------------------------------===//
4688 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4690 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4691 "istypep.samplerref \t$d, $a;",
4692 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4694 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4695 "istypep.surfref \t$d, $a;",
4696 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4698 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4699 "istypep.texref \t$d, $a;",
4700 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4702 //===- Surface Stores -----------------------------------------------------===//
4707 def SUST_B_1D_B8_CLAMP
4709 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4710 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4712 def SUST_B_1D_B16_CLAMP
4714 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4715 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4717 def SUST_B_1D_B32_CLAMP
4719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4720 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4722 def SUST_B_1D_B64_CLAMP
4724 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4725 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4727 def SUST_B_1D_V2B8_CLAMP
4729 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4730 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4732 def SUST_B_1D_V2B16_CLAMP
4734 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4735 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4737 def SUST_B_1D_V2B32_CLAMP
4739 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4740 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4742 def SUST_B_1D_V2B64_CLAMP
4744 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4745 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4747 def SUST_B_1D_V4B8_CLAMP
4749 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4750 Int16Regs:$b, Int16Regs:$a),
4751 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4753 def SUST_B_1D_V4B16_CLAMP
4755 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4756 Int16Regs:$b, Int16Regs:$a),
4757 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4759 def SUST_B_1D_V4B32_CLAMP
4761 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4762 Int32Regs:$b, Int32Regs:$a),
4763 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4767 def SUST_B_1D_ARRAY_B8_CLAMP
4769 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4770 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4772 def SUST_B_1D_ARRAY_B16_CLAMP
4774 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4775 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4777 def SUST_B_1D_ARRAY_B32_CLAMP
4779 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4780 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4782 def SUST_B_1D_ARRAY_B64_CLAMP
4784 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4785 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4787 def SUST_B_1D_ARRAY_V2B8_CLAMP
4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4791 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4793 def SUST_B_1D_ARRAY_V2B16_CLAMP
4795 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4797 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4799 def SUST_B_1D_ARRAY_V2B32_CLAMP
4801 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4803 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4805 def SUST_B_1D_ARRAY_V2B64_CLAMP
4807 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4809 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4811 def SUST_B_1D_ARRAY_V4B8_CLAMP
4813 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4814 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4815 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4816 "\\{$r, $g, $b, $a\\};",
4818 def SUST_B_1D_ARRAY_V4B16_CLAMP
4820 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4821 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4822 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4823 "\\{$r, $g, $b, $a\\};",
4825 def SUST_B_1D_ARRAY_V4B32_CLAMP
4827 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4828 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4829 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4830 "\\{$r, $g, $b, $a\\};",
4834 def SUST_B_2D_B8_CLAMP
4836 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4837 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4839 def SUST_B_2D_B16_CLAMP
4841 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4842 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4844 def SUST_B_2D_B32_CLAMP
4846 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4847 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4849 def SUST_B_2D_B64_CLAMP
4851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4852 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4854 def SUST_B_2D_V2B8_CLAMP
4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4858 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4860 def SUST_B_2D_V2B16_CLAMP
4862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4864 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4866 def SUST_B_2D_V2B32_CLAMP
4868 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4870 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4872 def SUST_B_2D_V2B64_CLAMP
4874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4876 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4878 def SUST_B_2D_V4B8_CLAMP
4880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4881 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4882 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4883 "\\{$r, $g, $b, $a\\};",
4885 def SUST_B_2D_V4B16_CLAMP
4887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4888 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4889 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4890 "\\{$r, $g, $b, $a\\};",
4892 def SUST_B_2D_V4B32_CLAMP
4894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4895 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4896 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4897 "\\{$r, $g, $b, $a\\};",
4901 def SUST_B_2D_ARRAY_B8_CLAMP
4903 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4905 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4907 def SUST_B_2D_ARRAY_B16_CLAMP
4909 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4913 def SUST_B_2D_ARRAY_B32_CLAMP
4915 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4917 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4919 def SUST_B_2D_ARRAY_B64_CLAMP
4921 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4923 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4925 def SUST_B_2D_ARRAY_V2B8_CLAMP
4927 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4928 Int16Regs:$r, Int16Regs:$g),
4929 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4932 def SUST_B_2D_ARRAY_V2B16_CLAMP
4934 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4935 Int16Regs:$r, Int16Regs:$g),
4936 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4939 def SUST_B_2D_ARRAY_V2B32_CLAMP
4941 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4942 Int32Regs:$r, Int32Regs:$g),
4943 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4946 def SUST_B_2D_ARRAY_V2B64_CLAMP
4948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4949 Int64Regs:$r, Int64Regs:$g),
4950 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4953 def SUST_B_2D_ARRAY_V4B8_CLAMP
4955 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4956 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4957 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4958 "\\{$r, $g, $b, $a\\};",
4960 def SUST_B_2D_ARRAY_V4B16_CLAMP
4962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4964 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4965 "\\{$r, $g, $b, $a\\};",
4967 def SUST_B_2D_ARRAY_V4B32_CLAMP
4969 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4970 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4971 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4972 "\\{$r, $g, $b, $a\\};",
4976 def SUST_B_3D_B8_CLAMP
4978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4980 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4982 def SUST_B_3D_B16_CLAMP
4984 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4988 def SUST_B_3D_B32_CLAMP
4990 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4992 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4994 def SUST_B_3D_B64_CLAMP
4996 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4998 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5000 def SUST_B_3D_V2B8_CLAMP
5002 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5003 Int16Regs:$r, Int16Regs:$g),
5004 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5007 def SUST_B_3D_V2B16_CLAMP
5009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5010 Int16Regs:$r, Int16Regs:$g),
5011 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5014 def SUST_B_3D_V2B32_CLAMP
5016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5017 Int32Regs:$r, Int32Regs:$g),
5018 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5021 def SUST_B_3D_V2B64_CLAMP
5023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5024 Int64Regs:$r, Int64Regs:$g),
5025 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5028 def SUST_B_3D_V4B8_CLAMP
5030 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5031 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5032 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5033 "\\{$r, $g, $b, $a\\};",
5035 def SUST_B_3D_V4B16_CLAMP
5037 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5040 "\\{$r, $g, $b, $a\\};",
5042 def SUST_B_3D_V4B32_CLAMP
5044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5045 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5046 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5047 "\\{$r, $g, $b, $a\\};",
5052 def SUST_B_1D_B8_TRAP
5054 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5055 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5057 def SUST_B_1D_B16_TRAP
5059 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5060 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5062 def SUST_B_1D_B32_TRAP
5064 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5065 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5067 def SUST_B_1D_B64_TRAP
5069 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5070 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5072 def SUST_B_1D_V2B8_TRAP
5074 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5075 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5077 def SUST_B_1D_V2B16_TRAP
5079 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5080 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5082 def SUST_B_1D_V2B32_TRAP
5084 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5085 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5087 def SUST_B_1D_V2B64_TRAP
5089 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5090 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5092 def SUST_B_1D_V4B8_TRAP
5094 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5095 Int16Regs:$b, Int16Regs:$a),
5096 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5098 def SUST_B_1D_V4B16_TRAP
5100 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5101 Int16Regs:$b, Int16Regs:$a),
5102 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5104 def SUST_B_1D_V4B32_TRAP
5106 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5107 Int32Regs:$b, Int32Regs:$a),
5108 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5112 def SUST_B_1D_ARRAY_B8_TRAP
5114 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5115 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5117 def SUST_B_1D_ARRAY_B16_TRAP
5119 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5120 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5122 def SUST_B_1D_ARRAY_B32_TRAP
5124 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5125 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5127 def SUST_B_1D_ARRAY_B64_TRAP
5129 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5130 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5132 def SUST_B_1D_ARRAY_V2B8_TRAP
5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5136 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5138 def SUST_B_1D_ARRAY_V2B16_TRAP
5140 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5142 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5144 def SUST_B_1D_ARRAY_V2B32_TRAP
5146 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5148 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5150 def SUST_B_1D_ARRAY_V2B64_TRAP
5152 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5154 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5156 def SUST_B_1D_ARRAY_V4B8_TRAP
5158 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5159 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5160 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5161 "\\{$r, $g, $b, $a\\};",
5163 def SUST_B_1D_ARRAY_V4B16_TRAP
5165 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5166 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5167 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5168 "\\{$r, $g, $b, $a\\};",
5170 def SUST_B_1D_ARRAY_V4B32_TRAP
5172 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5173 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5174 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5175 "\\{$r, $g, $b, $a\\};",
5179 def SUST_B_2D_B8_TRAP
5181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5182 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5184 def SUST_B_2D_B16_TRAP
5186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5187 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5189 def SUST_B_2D_B32_TRAP
5191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5192 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5194 def SUST_B_2D_B64_TRAP
5196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5197 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5199 def SUST_B_2D_V2B8_TRAP
5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5203 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5205 def SUST_B_2D_V2B16_TRAP
5207 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5209 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5211 def SUST_B_2D_V2B32_TRAP
5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5215 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5217 def SUST_B_2D_V2B64_TRAP
5219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5221 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5223 def SUST_B_2D_V4B8_TRAP
5225 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5226 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5227 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5228 "\\{$r, $g, $b, $a\\};",
5230 def SUST_B_2D_V4B16_TRAP
5232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5233 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5234 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5235 "\\{$r, $g, $b, $a\\};",
5237 def SUST_B_2D_V4B32_TRAP
5239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5240 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5241 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5242 "\\{$r, $g, $b, $a\\};",
5246 def SUST_B_2D_ARRAY_B8_TRAP
5248 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5250 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5252 def SUST_B_2D_ARRAY_B16_TRAP
5254 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5258 def SUST_B_2D_ARRAY_B32_TRAP
5260 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5262 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5264 def SUST_B_2D_ARRAY_B64_TRAP
5266 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5268 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5270 def SUST_B_2D_ARRAY_V2B8_TRAP
5272 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5273 Int16Regs:$r, Int16Regs:$g),
5274 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5277 def SUST_B_2D_ARRAY_V2B16_TRAP
5279 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5280 Int16Regs:$r, Int16Regs:$g),
5281 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5284 def SUST_B_2D_ARRAY_V2B32_TRAP
5286 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5287 Int32Regs:$r, Int32Regs:$g),
5288 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5291 def SUST_B_2D_ARRAY_V2B64_TRAP
5293 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5294 Int64Regs:$r, Int64Regs:$g),
5295 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5298 def SUST_B_2D_ARRAY_V4B8_TRAP
5300 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5301 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5302 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5303 "\\{$r, $g, $b, $a\\};",
5305 def SUST_B_2D_ARRAY_V4B16_TRAP
5307 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5309 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5310 "\\{$r, $g, $b, $a\\};",
5312 def SUST_B_2D_ARRAY_V4B32_TRAP
5314 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5315 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5316 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5317 "\\{$r, $g, $b, $a\\};",
5321 def SUST_B_3D_B8_TRAP
5323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5325 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5327 def SUST_B_3D_B16_TRAP
5329 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5333 def SUST_B_3D_B32_TRAP
5335 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5337 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5339 def SUST_B_3D_B64_TRAP
5341 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5345 def SUST_B_3D_V2B8_TRAP
5347 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5348 Int16Regs:$r, Int16Regs:$g),
5349 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5352 def SUST_B_3D_V2B16_TRAP
5354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5355 Int16Regs:$r, Int16Regs:$g),
5356 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5359 def SUST_B_3D_V2B32_TRAP
5361 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5362 Int32Regs:$r, Int32Regs:$g),
5363 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5366 def SUST_B_3D_V2B64_TRAP
5368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5369 Int64Regs:$r, Int64Regs:$g),
5370 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5373 def SUST_B_3D_V4B8_TRAP
5375 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5377 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5378 "\\{$r, $g, $b, $a\\};",
5380 def SUST_B_3D_V4B16_TRAP
5382 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5384 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5385 "\\{$r, $g, $b, $a\\};",
5387 def SUST_B_3D_V4B32_TRAP
5389 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5391 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5392 "\\{$r, $g, $b, $a\\};",
5397 def SUST_B_1D_B8_ZERO
5399 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5400 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5402 def SUST_B_1D_B16_ZERO
5404 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5405 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5407 def SUST_B_1D_B32_ZERO
5409 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5410 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5412 def SUST_B_1D_B64_ZERO
5414 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5415 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5417 def SUST_B_1D_V2B8_ZERO
5419 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5420 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5422 def SUST_B_1D_V2B16_ZERO
5424 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5425 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5427 def SUST_B_1D_V2B32_ZERO
5429 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5430 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5432 def SUST_B_1D_V2B64_ZERO
5434 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5435 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5437 def SUST_B_1D_V4B8_ZERO
5439 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5440 Int16Regs:$b, Int16Regs:$a),
5441 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5443 def SUST_B_1D_V4B16_ZERO
5445 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5446 Int16Regs:$b, Int16Regs:$a),
5447 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5449 def SUST_B_1D_V4B32_ZERO
5451 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5452 Int32Regs:$b, Int32Regs:$a),
5453 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5457 def SUST_B_1D_ARRAY_B8_ZERO
5459 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5460 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5462 def SUST_B_1D_ARRAY_B16_ZERO
5464 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5465 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5467 def SUST_B_1D_ARRAY_B32_ZERO
5469 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5470 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5472 def SUST_B_1D_ARRAY_B64_ZERO
5474 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5475 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5477 def SUST_B_1D_ARRAY_V2B8_ZERO
5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5481 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5483 def SUST_B_1D_ARRAY_V2B16_ZERO
5485 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5487 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5489 def SUST_B_1D_ARRAY_V2B32_ZERO
5491 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5493 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5495 def SUST_B_1D_ARRAY_V2B64_ZERO
5497 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5499 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5501 def SUST_B_1D_ARRAY_V4B8_ZERO
5503 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5504 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5505 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5506 "\\{$r, $g, $b, $a\\};",
5508 def SUST_B_1D_ARRAY_V4B16_ZERO
5510 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5511 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5512 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5513 "\\{$r, $g, $b, $a\\};",
5515 def SUST_B_1D_ARRAY_V4B32_ZERO
5517 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5518 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5519 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5520 "\\{$r, $g, $b, $a\\};",
5524 def SUST_B_2D_B8_ZERO
5526 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5527 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5529 def SUST_B_2D_B16_ZERO
5531 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5532 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5534 def SUST_B_2D_B32_ZERO
5536 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5537 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5539 def SUST_B_2D_B64_ZERO
5541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5542 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5544 def SUST_B_2D_V2B8_ZERO
5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5548 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5550 def SUST_B_2D_V2B16_ZERO
5552 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5554 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5556 def SUST_B_2D_V2B32_ZERO
5558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5560 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5562 def SUST_B_2D_V2B64_ZERO
5564 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5566 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5568 def SUST_B_2D_V4B8_ZERO
5570 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5571 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5572 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5573 "\\{$r, $g, $b, $a\\};",
5575 def SUST_B_2D_V4B16_ZERO
5577 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5578 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5579 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5580 "\\{$r, $g, $b, $a\\};",
5582 def SUST_B_2D_V4B32_ZERO
5584 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5585 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5586 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5587 "\\{$r, $g, $b, $a\\};",
5591 def SUST_B_2D_ARRAY_B8_ZERO
5593 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5595 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5597 def SUST_B_2D_ARRAY_B16_ZERO
5599 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5601 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5603 def SUST_B_2D_ARRAY_B32_ZERO
5605 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5607 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5609 def SUST_B_2D_ARRAY_B64_ZERO
5611 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5613 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5615 def SUST_B_2D_ARRAY_V2B8_ZERO
5617 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5618 Int16Regs:$r, Int16Regs:$g),
5619 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5622 def SUST_B_2D_ARRAY_V2B16_ZERO
5624 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5625 Int16Regs:$r, Int16Regs:$g),
5626 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5629 def SUST_B_2D_ARRAY_V2B32_ZERO
5631 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5632 Int32Regs:$r, Int32Regs:$g),
5633 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5636 def SUST_B_2D_ARRAY_V2B64_ZERO
5638 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5639 Int64Regs:$r, Int64Regs:$g),
5640 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5643 def SUST_B_2D_ARRAY_V4B8_ZERO
5645 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5646 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5647 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5648 "\\{$r, $g, $b, $a\\};",
5650 def SUST_B_2D_ARRAY_V4B16_ZERO
5652 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5653 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5654 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5655 "\\{$r, $g, $b, $a\\};",
5657 def SUST_B_2D_ARRAY_V4B32_ZERO
5659 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5660 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5661 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5662 "\\{$r, $g, $b, $a\\};",
5666 def SUST_B_3D_B8_ZERO
5668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5670 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5672 def SUST_B_3D_B16_ZERO
5674 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5676 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5678 def SUST_B_3D_B32_ZERO
5680 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5684 def SUST_B_3D_B64_ZERO
5686 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5688 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5690 def SUST_B_3D_V2B8_ZERO
5692 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5693 Int16Regs:$r, Int16Regs:$g),
5694 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5697 def SUST_B_3D_V2B16_ZERO
5699 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5700 Int16Regs:$r, Int16Regs:$g),
5701 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5704 def SUST_B_3D_V2B32_ZERO
5706 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5707 Int32Regs:$r, Int32Regs:$g),
5708 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5711 def SUST_B_3D_V2B64_ZERO
5713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5714 Int64Regs:$r, Int64Regs:$g),
5715 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5718 def SUST_B_3D_V4B8_ZERO
5720 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5721 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5722 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5723 "\\{$r, $g, $b, $a\\};",
5725 def SUST_B_3D_V4B16_ZERO
5727 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5728 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5729 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5730 "\\{$r, $g, $b, $a\\};",
5732 def SUST_B_3D_V4B32_ZERO
5734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5735 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5736 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5737 "\\{$r, $g, $b, $a\\};",
5744 def SUST_P_1D_B8_TRAP
5746 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5747 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5749 def SUST_P_1D_B16_TRAP
5751 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5752 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5754 def SUST_P_1D_B32_TRAP
5756 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5757 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5759 def SUST_P_1D_V2B8_TRAP
5761 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5762 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5764 def SUST_P_1D_V2B16_TRAP
5766 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5767 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5769 def SUST_P_1D_V2B32_TRAP
5771 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5772 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5774 def SUST_P_1D_V4B8_TRAP
5776 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5777 Int16Regs:$b, Int16Regs:$a),
5778 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5780 def SUST_P_1D_V4B16_TRAP
5782 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5783 Int16Regs:$b, Int16Regs:$a),
5784 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5786 def SUST_P_1D_V4B32_TRAP
5788 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5789 Int32Regs:$b, Int32Regs:$a),
5790 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5794 def SUST_P_1D_ARRAY_B8_TRAP
5796 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5797 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5799 def SUST_P_1D_ARRAY_B16_TRAP
5801 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5802 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5804 def SUST_P_1D_ARRAY_B32_TRAP
5806 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5807 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5809 def SUST_P_1D_ARRAY_V2B8_TRAP
5811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5813 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5815 def SUST_P_1D_ARRAY_V2B16_TRAP
5817 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5819 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5821 def SUST_P_1D_ARRAY_V2B32_TRAP
5823 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5825 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5827 def SUST_P_1D_ARRAY_V4B8_TRAP
5829 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5830 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5831 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5832 "\\{$r, $g, $b, $a\\};",
5834 def SUST_P_1D_ARRAY_V4B16_TRAP
5836 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5837 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5838 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5839 "\\{$r, $g, $b, $a\\};",
5841 def SUST_P_1D_ARRAY_V4B32_TRAP
5843 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5844 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5845 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5846 "\\{$r, $g, $b, $a\\};",
5850 def SUST_P_2D_B8_TRAP
5852 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5853 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5855 def SUST_P_2D_B16_TRAP
5857 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5858 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5860 def SUST_P_2D_B32_TRAP
5862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5863 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5865 def SUST_P_2D_V2B8_TRAP
5867 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5869 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5871 def SUST_P_2D_V2B16_TRAP
5873 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5875 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5877 def SUST_P_2D_V2B32_TRAP
5879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5881 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5883 def SUST_P_2D_V4B8_TRAP
5885 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5886 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5887 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5888 "\\{$r, $g, $b, $a\\};",
5890 def SUST_P_2D_V4B16_TRAP
5892 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5893 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5894 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5895 "\\{$r, $g, $b, $a\\};",
5897 def SUST_P_2D_V4B32_TRAP
5899 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5900 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5901 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5902 "\\{$r, $g, $b, $a\\};",
5906 def SUST_P_2D_ARRAY_B8_TRAP
5908 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5910 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5912 def SUST_P_2D_ARRAY_B16_TRAP
5914 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5916 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5918 def SUST_P_2D_ARRAY_B32_TRAP
5920 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5922 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5924 def SUST_P_2D_ARRAY_V2B8_TRAP
5926 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5927 Int16Regs:$r, Int16Regs:$g),
5928 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5931 def SUST_P_2D_ARRAY_V2B16_TRAP
5933 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5934 Int16Regs:$r, Int16Regs:$g),
5935 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5938 def SUST_P_2D_ARRAY_V2B32_TRAP
5940 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5941 Int32Regs:$r, Int32Regs:$g),
5942 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5945 def SUST_P_2D_ARRAY_V4B8_TRAP
5947 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5948 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5949 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5950 "\\{$r, $g, $b, $a\\};",
5952 def SUST_P_2D_ARRAY_V4B16_TRAP
5954 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5955 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5956 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5957 "\\{$r, $g, $b, $a\\};",
5959 def SUST_P_2D_ARRAY_V4B32_TRAP
5961 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5962 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5963 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5964 "\\{$r, $g, $b, $a\\};",
5968 def SUST_P_3D_B8_TRAP
5970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5974 def SUST_P_3D_B16_TRAP
5976 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5978 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5980 def SUST_P_3D_B32_TRAP
5982 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5984 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5986 def SUST_P_3D_V2B8_TRAP
5988 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5989 Int16Regs:$r, Int16Regs:$g),
5990 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5993 def SUST_P_3D_V2B16_TRAP
5995 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996 Int16Regs:$r, Int16Regs:$g),
5997 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6000 def SUST_P_3D_V2B32_TRAP
6002 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003 Int32Regs:$r, Int32Regs:$g),
6004 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6007 def SUST_P_3D_V4B8_TRAP
6009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6012 "\\{$r, $g, $b, $a\\};",
6014 def SUST_P_3D_V4B16_TRAP
6016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6017 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6018 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6019 "\\{$r, $g, $b, $a\\};",
6021 def SUST_P_3D_V4B32_TRAP
6023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6024 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6025 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
6026 "\\{$r, $g, $b, $a\\};",
6030 // Surface store instruction patterns
6031 // I'm not sure why we can't just include these in the instruction definitions,
6032 // but TableGen complains of type errors :(
6035 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
6036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6037 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6039 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6040 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6041 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6043 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6044 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6045 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6047 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6048 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6049 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6051 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6052 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6053 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6054 Int16Regs:$r, Int16Regs:$g)>;
6056 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6057 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6058 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6059 Int16Regs:$r, Int16Regs:$g)>;
6061 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6063 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6064 Int32Regs:$r, Int32Regs:$g)>;
6066 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6067 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6068 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6069 Int64Regs:$r, Int64Regs:$g)>;
6071 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6072 Int64Regs:$s, Int32Regs:$x,
6073 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6074 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6075 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6077 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6078 Int64Regs:$s, Int32Regs:$x,
6079 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6080 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6081 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6083 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6084 Int64Regs:$s, Int32Regs:$x,
6085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6086 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6087 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6091 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6092 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6093 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6096 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6097 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6098 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6101 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6103 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6106 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6108 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6113 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114 Int16Regs:$r, Int16Regs:$g)>;
6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6118 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119 Int16Regs:$r, Int16Regs:$g)>;
6121 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6123 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6124 Int32Regs:$r, Int32Regs:$g)>;
6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6128 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129 Int64Regs:$r, Int64Regs:$g)>;
6131 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6133 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6134 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6135 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6137 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6138 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6140 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6141 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6143 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6144 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6146 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6147 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6151 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6153 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6156 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6157 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6158 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6163 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6166 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6168 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6171 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6173 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174 Int16Regs:$r, Int16Regs:$g)>;
6176 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6178 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179 Int16Regs:$r, Int16Regs:$g)>;
6181 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6183 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184 Int32Regs:$r, Int32Regs:$g)>;
6186 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6188 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189 Int64Regs:$r, Int64Regs:$g)>;
6191 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6194 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6195 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6197 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6198 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6200 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6201 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6203 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6204 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6205 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6206 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6207 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6211 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6212 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6213 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6214 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6217 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6219 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6220 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6223 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6224 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6225 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6226 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6229 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6230 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6231 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6232 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6235 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6236 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6237 Int16Regs:$r, Int16Regs:$g),
6238 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6239 Int32Regs:$x, Int32Regs:$y,
6240 Int16Regs:$r, Int16Regs:$g)>;
6242 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6243 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6244 Int16Regs:$r, Int16Regs:$g),
6245 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6246 Int32Regs:$x, Int32Regs:$y,
6247 Int16Regs:$r, Int16Regs:$g)>;
6249 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6250 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6252 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6253 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6255 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6256 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6258 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6259 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6261 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6262 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6264 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6265 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6266 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6268 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6269 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6271 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6272 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6275 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6276 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6277 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6278 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6279 Int32Regs:$x, Int32Regs:$y,
6280 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6284 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6285 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6287 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6288 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6291 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6292 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6294 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6295 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6298 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6299 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6301 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6302 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6305 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6306 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6308 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6309 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6312 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6313 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314 Int16Regs:$r, Int16Regs:$g),
6315 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6316 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6317 Int16Regs:$r, Int16Regs:$g)>;
6319 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6320 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321 Int16Regs:$r, Int16Regs:$g),
6322 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6323 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324 Int16Regs:$r, Int16Regs:$g)>;
6326 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6327 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6328 Int32Regs:$r, Int32Regs:$g),
6329 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6330 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331 Int32Regs:$r, Int32Regs:$g)>;
6333 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6335 Int64Regs:$r, Int64Regs:$g),
6336 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6337 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338 Int64Regs:$r, Int64Regs:$g)>;
6340 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6341 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6342 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6343 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6344 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6347 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6348 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6349 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6350 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6351 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6352 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6354 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6355 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6356 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6357 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6358 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6359 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6363 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6364 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6365 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6367 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6368 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6369 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6371 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6372 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6373 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6375 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6376 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6377 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6379 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6380 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6381 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6382 Int16Regs:$r, Int16Regs:$g)>;
6384 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6385 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6386 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6387 Int16Regs:$r, Int16Regs:$g)>;
6389 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6390 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6391 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6392 Int32Regs:$r, Int32Regs:$g)>;
6394 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6395 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6396 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6397 Int64Regs:$r, Int64Regs:$g)>;
6399 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6400 Int64Regs:$s, Int32Regs:$x,
6401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6402 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6403 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6405 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6406 Int64Regs:$s, Int32Regs:$x,
6407 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6408 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6409 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6411 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6412 Int64Regs:$s, Int32Regs:$x,
6413 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6414 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6415 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6419 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6420 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6421 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6424 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6425 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6426 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6429 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6430 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6431 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6434 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6436 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6441 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442 Int16Regs:$r, Int16Regs:$g)>;
6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6446 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447 Int16Regs:$r, Int16Regs:$g)>;
6449 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6451 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6452 Int32Regs:$r, Int32Regs:$g)>;
6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6456 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457 Int64Regs:$r, Int64Regs:$g)>;
6459 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6462 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6463 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6465 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6466 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6467 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6468 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6469 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6471 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6472 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6473 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6474 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6475 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6479 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6480 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6481 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6484 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6485 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6486 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6489 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6490 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6491 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6494 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6496 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6499 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6501 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502 Int16Regs:$r, Int16Regs:$g)>;
6504 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6506 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507 Int16Regs:$r, Int16Regs:$g)>;
6509 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6511 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6512 Int32Regs:$r, Int32Regs:$g)>;
6514 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6516 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517 Int64Regs:$r, Int64Regs:$g)>;
6519 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6521 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6522 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6523 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6525 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6526 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6528 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6529 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6531 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6532 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6533 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6534 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6535 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6539 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6540 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6541 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6542 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6545 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6546 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6547 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6548 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6551 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6552 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6553 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6554 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6557 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6558 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6559 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6560 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6563 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6564 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6565 Int16Regs:$r, Int16Regs:$g),
6566 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6567 Int32Regs:$x, Int32Regs:$y,
6568 Int16Regs:$r, Int16Regs:$g)>;
6570 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6571 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6572 Int16Regs:$r, Int16Regs:$g),
6573 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6574 Int32Regs:$x, Int32Regs:$y,
6575 Int16Regs:$r, Int16Regs:$g)>;
6577 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6578 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6580 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6581 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6583 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6584 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6586 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6587 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6589 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6590 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6592 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6593 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6594 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6596 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6597 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6598 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6599 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6600 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6601 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6603 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6604 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6605 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6606 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6607 Int32Regs:$x, Int32Regs:$y,
6608 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6612 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6613 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6615 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6616 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6619 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6620 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6622 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6623 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6626 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6627 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6629 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6630 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6633 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6634 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6636 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6637 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6640 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6641 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642 Int16Regs:$r, Int16Regs:$g),
6643 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6644 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6645 Int16Regs:$r, Int16Regs:$g)>;
6647 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6648 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649 Int16Regs:$r, Int16Regs:$g),
6650 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6651 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6652 Int16Regs:$r, Int16Regs:$g)>;
6654 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6655 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6656 Int32Regs:$r, Int32Regs:$g),
6657 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6658 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6659 Int32Regs:$r, Int32Regs:$g)>;
6661 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6663 Int64Regs:$r, Int64Regs:$g),
6664 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6665 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6666 Int64Regs:$r, Int64Regs:$g)>;
6668 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6669 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6670 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6671 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6672 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6673 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6675 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6676 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6677 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6678 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6679 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6680 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6682 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6683 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6684 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6685 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6686 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6687 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6691 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6692 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6693 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6695 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6696 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6697 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6699 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6700 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6701 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6703 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6704 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6705 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6707 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6708 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6709 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6710 Int16Regs:$r, Int16Regs:$g)>;
6712 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6713 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6714 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6715 Int16Regs:$r, Int16Regs:$g)>;
6717 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6718 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6719 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6720 Int32Regs:$r, Int32Regs:$g)>;
6722 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6723 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6724 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6725 Int64Regs:$r, Int64Regs:$g)>;
6727 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6728 Int64Regs:$s, Int32Regs:$x,
6729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6730 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6731 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6733 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6734 Int64Regs:$s, Int32Regs:$x,
6735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6736 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6737 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6739 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6740 Int64Regs:$s, Int32Regs:$x,
6741 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6742 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6743 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6747 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6748 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6749 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6752 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6753 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6754 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6758 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6759 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6762 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6763 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6764 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6767 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6768 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6769 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6770 Int16Regs:$r, Int16Regs:$g)>;
6772 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6774 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775 Int16Regs:$r, Int16Regs:$g)>;
6777 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6778 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6779 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6780 Int32Regs:$r, Int32Regs:$g)>;
6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6783 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6784 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6785 Int64Regs:$r, Int64Regs:$g)>;
6787 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6789 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6790 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6791 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6793 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6794 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6795 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6796 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6797 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6799 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6800 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6801 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6802 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6803 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6807 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6808 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6809 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6812 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6813 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6814 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6818 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6819 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6822 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6823 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6824 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6827 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6828 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6829 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6830 Int16Regs:$r, Int16Regs:$g)>;
6832 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6833 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6834 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6835 Int16Regs:$r, Int16Regs:$g)>;
6837 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6839 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6840 Int32Regs:$r, Int32Regs:$g)>;
6842 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6844 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6845 Int64Regs:$r, Int64Regs:$g)>;
6847 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6848 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6849 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6850 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6851 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6853 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6854 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6856 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6857 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6859 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6860 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6861 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6862 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6863 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6867 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6869 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6870 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6873 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6875 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6876 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6879 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6880 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6881 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6882 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6885 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6886 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6887 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6888 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6891 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6892 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6893 Int16Regs:$r, Int16Regs:$g),
6894 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6895 Int32Regs:$x, Int32Regs:$y,
6896 Int16Regs:$r, Int16Regs:$g)>;
6898 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6899 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6900 Int16Regs:$r, Int16Regs:$g),
6901 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6902 Int32Regs:$x, Int32Regs:$y,
6903 Int16Regs:$r, Int16Regs:$g)>;
6905 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6906 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6908 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6909 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6911 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6912 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6914 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6915 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6917 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6918 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6919 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6920 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6921 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6922 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6924 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6925 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6926 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6927 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6928 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6929 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6931 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6932 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6933 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6934 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6935 Int32Regs:$x, Int32Regs:$y,
6936 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6940 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6941 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6943 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6944 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6947 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6950 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6951 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6954 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6955 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6957 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6958 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6961 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6962 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6964 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6965 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6968 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6969 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6970 Int16Regs:$r, Int16Regs:$g),
6971 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6972 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6973 Int16Regs:$r, Int16Regs:$g)>;
6975 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6976 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6977 Int16Regs:$r, Int16Regs:$g),
6978 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6979 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6980 Int16Regs:$r, Int16Regs:$g)>;
6982 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6983 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6984 Int32Regs:$r, Int32Regs:$g),
6985 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6986 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6987 Int32Regs:$r, Int32Regs:$g)>;
6989 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6990 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6991 Int64Regs:$r, Int64Regs:$g),
6992 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6993 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6994 Int64Regs:$r, Int64Regs:$g)>;
6996 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6997 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6998 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6999 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
7000 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7001 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7003 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
7004 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7005 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7006 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
7007 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7008 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7010 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
7011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7012 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7013 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
7014 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7015 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7020 def : Pat<(int_nvvm_sust_p_1d_i8_trap
7021 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7022 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7024 def : Pat<(int_nvvm_sust_p_1d_i16_trap
7025 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
7026 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
7028 def : Pat<(int_nvvm_sust_p_1d_i32_trap
7029 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
7030 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
7032 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
7033 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7034 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
7035 Int16Regs:$r, Int16Regs:$g)>;
7037 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7038 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7039 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7040 Int16Regs:$r, Int16Regs:$g)>;
7042 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7044 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7045 Int32Regs:$r, Int32Regs:$g)>;
7047 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7048 Int64Regs:$s, Int32Regs:$x,
7049 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7050 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7051 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7053 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7054 Int64Regs:$s, Int32Regs:$x,
7055 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7056 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7057 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7059 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7060 Int64Regs:$s, Int32Regs:$x,
7061 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7062 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7063 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7067 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7068 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7069 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7072 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7073 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7074 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7077 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7078 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7079 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7082 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7083 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7084 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7085 Int16Regs:$r, Int16Regs:$g)>;
7087 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7089 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7090 Int16Regs:$r, Int16Regs:$g)>;
7092 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7093 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7094 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7095 Int32Regs:$r, Int32Regs:$g)>;
7097 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7099 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7100 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7103 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7104 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7106 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7107 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7109 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7110 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7111 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7112 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7113 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7117 def : Pat<(int_nvvm_sust_p_2d_i8_trap
7118 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7119 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7122 def : Pat<(int_nvvm_sust_p_2d_i16_trap
7123 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7124 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7127 def : Pat<(int_nvvm_sust_p_2d_i32_trap
7128 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7129 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7132 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7133 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7134 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7135 Int16Regs:$r, Int16Regs:$g)>;
7137 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7139 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7140 Int16Regs:$r, Int16Regs:$g)>;
7142 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7143 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7144 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7145 Int32Regs:$r, Int32Regs:$g)>;
7147 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7149 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7150 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7151 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7153 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7154 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7156 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7157 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7159 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7161 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7162 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7163 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7167 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7168 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7169 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7170 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7173 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7175 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7176 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7179 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7180 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7181 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7182 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7185 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7186 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7187 Int16Regs:$r, Int16Regs:$g),
7188 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7189 Int32Regs:$x, Int32Regs:$y,
7190 Int16Regs:$r, Int16Regs:$g)>;
7192 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7193 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7194 Int16Regs:$r, Int16Regs:$g),
7195 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7196 Int32Regs:$x, Int32Regs:$y,
7197 Int16Regs:$r, Int16Regs:$g)>;
7199 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7200 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7202 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7203 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7205 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7206 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7207 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7208 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7209 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7210 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7212 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7213 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7215 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7216 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7217 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7219 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7220 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7221 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7222 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7223 Int32Regs:$x, Int32Regs:$y,
7224 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7228 def : Pat<(int_nvvm_sust_p_3d_i8_trap
7229 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7231 (SUST_P_3D_B8_TRAP Int64Regs:$s,
7232 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7235 def : Pat<(int_nvvm_sust_p_3d_i16_trap
7236 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7238 (SUST_P_3D_B16_TRAP Int64Regs:$s,
7239 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7242 def : Pat<(int_nvvm_sust_p_3d_i32_trap
7243 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7245 (SUST_P_3D_B32_TRAP Int64Regs:$s,
7246 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7249 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7250 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7251 Int16Regs:$r, Int16Regs:$g),
7252 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7253 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7254 Int16Regs:$r, Int16Regs:$g)>;
7256 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7257 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7258 Int16Regs:$r, Int16Regs:$g),
7259 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7260 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7261 Int16Regs:$r, Int16Regs:$g)>;
7263 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7264 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7265 Int32Regs:$r, Int32Regs:$g),
7266 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7267 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7268 Int32Regs:$r, Int32Regs:$g)>;
7270 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7271 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7272 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7273 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7274 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7275 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7277 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7278 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7279 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7280 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7281 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7282 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7284 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7285 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7286 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7287 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7288 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7289 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7291 //-----------------------------------
7292 // Read Special Registers
7293 //-----------------------------------
7295 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7296 : NVPTXInst<(outs Int64Regs:$d), (ins),
7297 !strconcat("mov.u64 \t$d, %", regname, ";"),
7298 [(set Int64Regs:$d, (intop))]>;
7300 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7301 : NVPTXInst<(outs Int32Regs:$d), (ins),
7302 !strconcat("mov.u32 \t$d, %", regname, ";"),
7303 [(set Int32Regs:$d, (intop))]>;
7305 // TODO Add read vector-version of special registers
7307 def INT_PTX_SREG_TID_X :
7308 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7309 def INT_PTX_SREG_TID_Y :
7310 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7311 def INT_PTX_SREG_TID_Z :
7312 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7313 def INT_PTX_SREG_TID_W :
7314 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7316 def INT_PTX_SREG_NTID_X :
7317 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7318 def INT_PTX_SREG_NTID_Y :
7319 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7320 def INT_PTX_SREG_NTID_Z :
7321 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7322 def INT_PTX_SREG_NTID_W :
7323 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7325 def INT_PTX_SREG_LANEID :
7326 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7327 def INT_PTX_SREG_WARPID :
7328 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7329 def INT_PTX_SREG_NWARPID :
7330 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7332 def INT_PTX_SREG_CTAID_X :
7333 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7334 def INT_PTX_SREG_CTAID_Y :
7335 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7336 def INT_PTX_SREG_CTAID_Z :
7337 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7338 def INT_PTX_SREG_CTAID_W :
7339 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7341 def INT_PTX_SREG_NCTAID_X :
7342 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7343 def INT_PTX_SREG_NCTAID_Y :
7344 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7345 def INT_PTX_SREG_NCTAID_Z :
7346 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7347 def INT_PTX_SREG_NCTAID_W :
7348 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7350 def INT_PTX_SREG_SMID :
7351 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7352 def INT_PTX_SREG_NSMID :
7353 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7354 def INT_PTX_SREG_GRIDID :
7355 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7357 def INT_PTX_SREG_LANEMASK_EQ :
7358 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7359 def INT_PTX_SREG_LANEMASK_LE :
7360 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7361 def INT_PTX_SREG_LANEMASK_LT :
7362 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7363 def INT_PTX_SREG_LANEMASK_GE :
7364 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7365 def INT_PTX_SREG_LANEMASK_GT :
7366 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7368 def INT_PTX_SREG_CLOCK :
7369 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7370 def INT_PTX_SREG_CLOCK64 :
7371 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7373 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7374 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7375 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7376 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7378 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7379 // handle the constant.
7380 def INT_PTX_SREG_WARPSIZE :
7381 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7382 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7385 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7388 class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
7390 class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
7391 string Space, string Type, NVPTXRegClass regclass,
7392 DAGOperand SrcOp, bit WithStride>
7394 Requires<[!if(!eq(Geometry, "m16n16k16"),
7398 // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
7399 // for this function.
7400 PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
7401 # Geometry # "_load_"
7402 # !subst("c", "c_" # Type, Abc)
7404 # !subst(".", "_", Space)
7405 # !if(WithStride,"_stride", "")
7407 dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
7408 dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
7409 dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
7411 dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
7412 dag Ins = !con((ins SrcOp:$src), StrideArg);
7414 // Build a dag pattern that matches the intrinsic call.
7415 // We want a dag that looks like this:
7416 // (set <output args>, (intrinsic <input arguments>)) where input and
7417 // output arguments are named patterns that would match corresponding
7418 // input/output arguments of the instruction.
7420 // First we construct (set <output arguments>) from instruction's outs dag by
7421 // replacing dag operator 'outs' with 'set'.
7422 dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
7423 // Similarly, construct (intrinsic <input arguments>) sub-dag from
7424 // instruction's input arguments, only now we also need to replace operands
7425 // with patterns that would match them and the operator 'ins' with the
7427 dag PatArgs = !foreach(tmp, Ins,
7428 !subst(imem, ADDRvar,
7429 !subst(MEMri64, ADDRri64,
7430 !subst(MEMri, ADDRri,
7431 !subst(ins, IntrMatcher, tmp)))));
7432 // Finally, consatenate both parts together. !con() requires both dags to have
7433 // the same operator, so we wrap PatArgs in a (set ...) dag.
7434 let Pattern = [!con(PatOuts, (set PatArgs))];
7435 let OutOperandList = Outs;
7436 let InOperandList = Ins;
7437 let AsmString = "wmma.load."
7443 # "." # Type # " \t"
7444 # !if(!eq(Abc#Type, "cf16"),
7445 "{{$r0, $r1, $r2, $r3}}",
7446 "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
7448 # !if(WithStride, ", $ldm", "")
7452 class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
7453 string Space, string Type, bit WithStride>
7454 : PatFrag <(ops),(ops)> {
7455 // Intrinsic that matches this instruction.
7456 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
7457 # "_" # Geometry # "_load_"
7458 # Abc # "_" # Type # "_" # Layout
7459 # !if(WithStride,"_stride", ""));
7460 code match_generic = [{
7461 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
7463 code match_shared = [{
7464 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
7466 code match_global = [{
7467 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
7470 let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7471 let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
7472 let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
7473 !if(!eq(Space, ".global"), match_global, match_generic));
7476 multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
7477 string Space, string Type, NVPTXRegClass regclass,
7479 def _avar: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7481 def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7482 Int32Regs, WithStride>;
7483 def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7484 Int64Regs, WithStride>;
7485 def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7487 def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
7488 MEMri64, WithStride>;
7491 multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
7492 string Space, string Type, NVPTXRegClass regclass,
7494 // Define a PatFrag that matches appropriate intrinsic that loads from the
7495 // given address space.
7496 def _Intr: WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
7498 defm NAME: WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
7502 multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
7503 string Space, string Type, NVPTXRegClass regclass> {
7504 defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
7505 defm NAME: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
7508 multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
7509 string Type, NVPTXRegClass regclass> {
7510 defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
7512 defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
7514 defm NAME: WMMA_LOAD_GALST<Geometry, Abc, Layout, "",
7518 multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
7519 string Type, NVPTXRegClass regclass> {
7520 defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
7521 defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
7524 multiclass WMMA_LOAD_G<string Geometry> {
7525 defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
7526 defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
7527 defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
7528 defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
7531 defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
7532 defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
7533 defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
7536 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7538 class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
7539 string Type, NVPTXRegClass regclass,
7540 bit WithStride, DAGOperand DstOp>
7542 Requires<[!if(!eq(Geometry, "m16n16k16"),
7546 PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
7547 # "_" # Geometry # "_store_d"
7550 # !subst(".", "_", Space)
7551 # !if(WithStride,"_stride", "")
7553 dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
7554 regclass:$r2, regclass:$r3);
7555 dag InsR47 = (ins regclass:$r4, regclass:$r5,
7556 regclass:$r6, regclass:$r7);
7557 dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
7558 dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
7559 dag Ins = !con(InsR, StrideArg);
7561 // Construct the pattern to match corresponding intrinsic call. See the
7562 // details in the comments in WMMA_LOAD_ALSTOS.
7563 dag PatArgs = !foreach(tmp, Ins,
7564 !subst(imem, ADDRvar,
7565 !subst(MEMri64, ADDRri64,
7566 !subst(MEMri, ADDRri,
7567 !subst(ins, IntrMatcher, tmp)))));
7568 let Pattern = [PatArgs];
7569 let OutOperandList = (outs);
7570 let InOperandList = Ins;
7571 let AsmString = "wmma.store.d.sync."
7577 # !if(!eq(Type,"f16"),
7578 "{{$r0, $r1, $r2, $r3}}",
7579 "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
7580 # !if(WithStride, ", $ldm", "")
7585 class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
7586 string Type, bit WithStride>
7587 : PatFrag <(ops),(ops)> {
7588 // Intrinsic that matches this instruction.
7589 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
7594 # !if(WithStride, "_stride", ""));
7595 code match_generic = [{
7596 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
7598 code match_shared = [{
7599 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
7601 code match_global = [{
7602 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
7605 dag Args = !if(!eq(Type,"f16"),
7606 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
7607 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
7608 node:$r4, node:$r5, node:$r6, node:$r7));
7609 dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
7610 let Operands = !con(Args, StrideArg);
7611 let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
7612 let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
7613 !if(!eq(Space, ".global"), match_global, match_generic));
7616 multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
7617 string Type, NVPTXRegClass regclass,
7619 def _avar: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7621 def _areg: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7622 WithStride, Int32Regs>;
7623 def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7624 WithStride, Int64Regs>;
7625 def _ari: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7627 def _ari64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
7628 WithStride, MEMri64>;
7631 multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
7632 string Type, NVPTXRegClass regclass,
7634 // Define a PatFrag that matches appropriate intrinsic that loads from the
7635 // given address space.
7636 def _Intr: WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
7638 defm NAME: WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
7642 multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
7643 string Type, NVPTXRegClass regclass > {
7644 defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
7645 defm NAME: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
7648 multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
7649 string Type, NVPTXRegClass regclass> {
7650 defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
7651 defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
7652 defm NAME: WMMA_STORE_D_GLST<Geometry, Layout, "", Type, regclass>;
7655 multiclass WMMA_STORE_D_GT<string Geometry, string Type,
7656 NVPTXRegClass regclass> {
7657 defm _row: WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
7658 defm _col: WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
7661 multiclass WMMA_STORE_D_G<string Geometry> {
7662 defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
7663 defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
7666 defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
7667 defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
7668 defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
7671 class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
7672 string DType, NVPTXRegClass d_reg,
7673 string CType, NVPTXRegClass c_reg,
7674 NVPTXRegClass ab_reg,
7675 string Satfinite = "">
7677 Requires<[!if(!eq(Geometry, "m16n16k16"),
7681 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
7688 # !subst(".", "_", Satfinite));
7689 dag Outs = !if(!eq(DType,"f16"),
7690 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
7691 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
7692 d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
7693 dag InsExtraCArgs = !if(!eq(CType,"f16"),
7695 (ins c_reg:$c4, c_reg:$c5, c_reg:$c6, c_reg:$c7));
7696 dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
7697 ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
7698 ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
7699 ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
7700 c_reg:$c0, c_reg:$c1, c_reg:$c2, c_reg:$c3),
7703 // Construct the pattern to match corresponding intrinsic call. See the
7704 // details in the comments in WMMA_LOAD_ALSTOS.
7705 dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
7706 dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
7707 let Pattern = [!con(PatOuts, (set PatArgs))];
7708 let OutOperandList = Outs;
7709 let InOperandList = Ins;
7710 let AsmString = "wmma.mma.sync."
7716 # Satfinite # "\n\t\t"
7717 # !if(!eq(DType,"f16"),
7718 "{{$d0, $d1, $d2, $d3}}, \n\t\t",
7719 "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
7720 # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
7721 # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
7722 # !if(!eq(CType,"f16"),
7723 "{{$c0, $c1, $c2, $c3}};",
7724 "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
7727 multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
7728 string DType, NVPTXRegClass d_reg,
7729 string CType, NVPTXRegClass c_reg> {
7730 def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
7731 DType, d_reg, CType, c_reg,
7732 Float16x2Regs, ".satfinite">;
7733 def NAME: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
7734 DType, d_reg, CType, c_reg,
7738 multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
7739 string DType, NVPTXRegClass d_reg> {
7740 defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
7741 "f16", Float16x2Regs>;
7742 defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
7743 "f32", Float32Regs>;
7746 multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
7747 defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
7748 defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
7751 multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
7752 defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
7753 defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
7756 multiclass WMMA_MMA_G<string Geometry> {
7757 defm _col: WMMA_MMA_GA<Geometry, "col">;
7758 defm _row: WMMA_MMA_GA<Geometry, "row">;
7761 defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
7762 defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
7763 defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;