1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the PTX instructions in TableGen format.
12 //===----------------------------------------------------------------------===//
14 include "NVPTXInstrFormats.td"
17 def NOP : NVPTXInst<(outs), (ins), "", []>;
19 // List of vector specific properties
20 def isVecLD : VecInstTypeEnum<1>;
21 def isVecST : VecInstTypeEnum<2>;
22 def isVecBuild : VecInstTypeEnum<3>;
23 def isVecShuffle : VecInstTypeEnum<4>;
24 def isVecExtract : VecInstTypeEnum<5>;
25 def isVecInsert : VecInstTypeEnum<6>;
26 def isVecDest : VecInstTypeEnum<7>;
27 def isVecOther : VecInstTypeEnum<15>;
29 //===----------------------------------------------------------------------===//
30 // NVPTX Operand Definitions.
31 //===----------------------------------------------------------------------===//
33 def brtarget : Operand<OtherVT>;
35 //===----------------------------------------------------------------------===//
36 // NVPTX Instruction Predicate Definitions
37 //===----------------------------------------------------------------------===//
40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
43 def useAtomRedG32forGen32 :
44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
49 def useAtomRedG64forGen64 :
50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
52 def hasVote : Predicate<"Subtarget.hasVote()">;
53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
59 def doF32FTZ : Predicate<"UseF32FTZ">;
61 def doFMAF32 : Predicate<"doFMAF32">;
62 def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
63 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
64 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
65 def doFMAF64 : Predicate<"doFMAF64">;
66 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
67 def doFMADF32 : Predicate<"doFMADF32">;
68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
70 def doMulWide : Predicate<"doMulWide">;
72 def allowFMA : Predicate<"allowFMA">;
73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
78 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
80 def true : Predicate<"1">;
82 //===----------------------------------------------------------------------===//
83 // Special Handling for 8-bit Operands and Operations
85 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
86 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
89 // PTX ld, st and cvt instructions permit source and destination data operands
90 // to be wider than the instruction-type size, so that narrow values may be
91 // loaded, stored, and converted using regular-width registers.
93 // So in PTX generation, we
94 // - always use 16-bit registers in place in 8-bit registers.
95 // (8-bit variables should stay as 8-bit as they represent memory layout.)
96 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
104 // We are patching the operations by inserting the cvt instructions in the
105 // asm strings of the affected instructions.
107 // Since vector operations, except for ld/st, are eventually elementized. We
108 // do not need to special-hand the vector 8-bit operations.
111 //===----------------------------------------------------------------------===//
113 // Generate string block like
117 // cvt.s16.s8 %temp1, %a;
118 // cvt.s16.s8 %temp2, %b;
119 // opc.s16 %dst, %temp1, %temp2;
121 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
122 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
123 string s = !strconcat("{{\n\t",
124 !strconcat(".reg .", !strconcat(TypeStr,
125 !strconcat(" \t%temp1;\n\t",
126 !strconcat(".reg .", !strconcat(TypeStr,
127 !strconcat(" \t%temp2;\n\t",
128 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
129 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
130 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
133 // Generate string block like
137 // cvt.s16.s8 %temp1, %a;
138 // mov.b16 %temp2, %b;
139 // cvt.s16.s8 %temp2, %temp2;
140 // opc.s16 %dst, %temp1, %temp2;
142 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
143 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
144 string s = !strconcat("{{\n\t",
145 !strconcat(".reg .", !strconcat(TypeStr,
146 !strconcat(" \t%temp1;\n\t",
148 !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
149 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
150 !strconcat("mov.b16 \t%temp2, $b;\n\t",
151 !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
152 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
155 // Generate string block like
159 // mov.b16 %temp1, %b;
160 // cvt.s16.s8 %temp1, %temp1;
161 // cvt.s16.s8 %temp2, %a;
162 // opc.s16 %dst, %temp1, %temp2;
164 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
165 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
166 string s = !strconcat("{{\n\t",
167 !strconcat(".reg .", !strconcat(TypeStr,
168 !strconcat(" \t%temp1;\n\t",
169 !strconcat(".reg .", !strconcat(TypeStr,
170 !strconcat(" \t%temp2;\n\t",
171 !strconcat("mov.b16 \t%temp1, $a;\n\t",
172 !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
173 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
174 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
178 //===----------------------------------------------------------------------===//
179 // Some Common Instruction Class Templates
180 //===----------------------------------------------------------------------===//
182 multiclass I3<string OpcStr, SDNode OpNode> {
183 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
184 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
185 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
187 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
188 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
189 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
190 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
191 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
192 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
194 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
195 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
196 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
197 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
198 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
199 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
201 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
202 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
203 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
204 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
205 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
206 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
207 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
208 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
209 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
212 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
213 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
214 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
215 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
217 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
218 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
219 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
220 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
221 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
222 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
224 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
225 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
226 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
227 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
228 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
229 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
231 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
232 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
233 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
234 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
235 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
236 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
237 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
238 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
239 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
242 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
243 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
244 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
245 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
247 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
248 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
249 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
250 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
251 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
252 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
254 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
255 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
256 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
257 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
258 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
259 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
261 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
262 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
263 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
266 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
267 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
269 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
270 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
272 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
273 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
274 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
277 multiclass F3<string OpcStr, SDNode OpNode> {
278 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
279 (ins Float64Regs:$a, Float64Regs:$b),
280 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
281 [(set Float64Regs:$dst,
282 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
283 Requires<[allowFMA]>;
284 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
285 (ins Float64Regs:$a, f64imm:$b),
286 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
287 [(set Float64Regs:$dst,
288 (OpNode Float64Regs:$a, fpimm:$b))]>,
289 Requires<[allowFMA]>;
290 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
291 (ins Float32Regs:$a, Float32Regs:$b),
292 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
293 [(set Float32Regs:$dst,
294 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
295 Requires<[allowFMA_ftz]>;
296 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
297 (ins Float32Regs:$a, f32imm:$b),
298 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
299 [(set Float32Regs:$dst,
300 (OpNode Float32Regs:$a, fpimm:$b))]>,
301 Requires<[allowFMA_ftz]>;
302 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
303 (ins Float32Regs:$a, Float32Regs:$b),
304 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
305 [(set Float32Regs:$dst,
306 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
307 Requires<[allowFMA]>;
308 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
309 (ins Float32Regs:$a, f32imm:$b),
310 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
311 [(set Float32Regs:$dst,
312 (OpNode Float32Regs:$a, fpimm:$b))]>,
313 Requires<[allowFMA]>;
316 multiclass F3_rn<string OpcStr, SDNode OpNode> {
317 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
318 (ins Float64Regs:$a, Float64Regs:$b),
319 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
320 [(set Float64Regs:$dst,
321 (OpNode Float64Regs:$a, Float64Regs:$b))]>;
322 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
323 (ins Float64Regs:$a, f64imm:$b),
324 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
325 [(set Float64Regs:$dst,
326 (OpNode Float64Regs:$a, fpimm:$b))]>;
327 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
328 (ins Float32Regs:$a, Float32Regs:$b),
329 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
330 [(set Float32Regs:$dst,
331 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
332 Requires<[doF32FTZ]>;
333 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
334 (ins Float32Regs:$a, f32imm:$b),
335 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
336 [(set Float32Regs:$dst,
337 (OpNode Float32Regs:$a, fpimm:$b))]>,
338 Requires<[doF32FTZ]>;
339 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
340 (ins Float32Regs:$a, Float32Regs:$b),
341 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
342 [(set Float32Regs:$dst,
343 (OpNode Float32Regs:$a, Float32Regs:$b))]>;
344 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
345 (ins Float32Regs:$a, f32imm:$b),
346 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
347 [(set Float32Regs:$dst,
348 (OpNode Float32Regs:$a, fpimm:$b))]>;
351 multiclass F2<string OpcStr, SDNode OpNode> {
352 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
353 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
354 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
355 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
356 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
357 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
358 Requires<[doF32FTZ]>;
359 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
360 !strconcat(OpcStr, ".f32 \t$dst, $a;"),
361 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
364 //===----------------------------------------------------------------------===//
365 // NVPTX Instructions.
366 //===----------------------------------------------------------------------===//
368 //-----------------------------------
369 // Integer Arithmetic
370 //-----------------------------------
372 multiclass ADD_SUB_i1<SDNode OpNode> {
373 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
374 "xor.pred \t$dst, $a, $b;",
375 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
376 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
377 "xor.pred \t$dst, $a, $b;",
378 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
381 defm ADD_i1 : ADD_SUB_i1<add>;
382 defm SUB_i1 : ADD_SUB_i1<sub>;
385 defm ADD : I3<"add.s", add>;
386 defm SUB : I3<"sub.s", sub>;
388 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
389 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
391 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
392 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
394 //mul.wide PTX instruction
395 def SInt32Const : PatLeaf<(imm), [{
396 const APInt &v = N->getAPIntValue();
397 if (v.isSignedIntN(32))
402 def UInt32Const : PatLeaf<(imm), [{
403 const APInt &v = N->getAPIntValue();
409 def SInt16Const : PatLeaf<(imm), [{
410 const APInt &v = N->getAPIntValue();
411 if (v.isSignedIntN(16))
416 def UInt16Const : PatLeaf<(imm), [{
417 const APInt &v = N->getAPIntValue();
423 def Int5Const : PatLeaf<(imm), [{
424 const APInt &v = N->getAPIntValue();
425 // Check if 0 <= v < 32
426 // Only then the result from (x << v) will be i32
427 if (v.sge(0) && v.slt(32))
432 def Int4Const : PatLeaf<(imm), [{
433 const APInt &v = N->getAPIntValue();
434 // Check if 0 <= v < 16
435 // Only then the result from (x << v) will be i16
436 if (v.sge(0) && v.slt(16))
441 def SHL2MUL32 : SDNodeXForm<imm, [{
442 const APInt &v = N->getAPIntValue();
444 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
447 def SHL2MUL16 : SDNodeXForm<imm, [{
448 const APInt &v = N->getAPIntValue();
450 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
453 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
454 (ins Int32Regs:$a, Int32Regs:$b),
455 "mul.wide.s32 \t$dst, $a, $b;", []>;
456 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
457 (ins Int32Regs:$a, i64imm:$b),
458 "mul.wide.s32 \t$dst, $a, $b;", []>;
460 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
461 (ins Int32Regs:$a, Int32Regs:$b),
462 "mul.wide.u32 \t$dst, $a, $b;", []>;
463 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
464 (ins Int32Regs:$a, i64imm:$b),
465 "mul.wide.u32 \t$dst, $a, $b;", []>;
467 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
468 (ins Int16Regs:$a, Int16Regs:$b),
469 "mul.wide.s16 \t$dst, $a, $b;", []>;
470 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
471 (ins Int16Regs:$a, i32imm:$b),
472 "mul.wide.s16 \t$dst, $a, $b;", []>;
474 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
475 (ins Int16Regs:$a, Int16Regs:$b),
476 "mul.wide.u16 \t$dst, $a, $b;", []>;
477 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
478 (ins Int16Regs:$a, i32imm:$b),
479 "mul.wide.u16 \t$dst, $a, $b;", []>;
481 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
482 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
483 Requires<[doMulWide]>;
484 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
485 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
486 Requires<[doMulWide]>;
488 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
489 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
490 Requires<[doMulWide]>;
491 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
492 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
493 Requires<[doMulWide]>;
495 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
496 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
497 Requires<[doMulWide]>;
498 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
499 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
500 Requires<[doMulWide]>;
502 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
503 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
504 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
505 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
506 Requires<[doMulWide]>;
508 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
509 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
510 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
511 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
512 Requires<[doMulWide]>;
514 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
515 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
516 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
517 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
518 Requires<[doMulWide]>;
520 defm MULT : I3<"mul.lo.s", mul>;
522 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
523 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
524 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
525 !strconcat("{{ \n\t",
526 !strconcat(".reg \t.s16 temp1; \n\t",
527 !strconcat(".reg \t.s16 temp2; \n\t",
528 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
529 !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
530 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
531 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
532 !strconcat("}}", "")))))))),
533 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
534 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
535 !strconcat("{{ \n\t",
536 !strconcat(".reg \t.s16 temp1; \n\t",
537 !strconcat(".reg \t.s16 temp2; \n\t",
538 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
539 !strconcat("mov.b16 \ttemp2, $b; \n\t",
540 !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
541 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
542 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
543 !strconcat("}}", ""))))))))),
544 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
545 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
546 !strconcat("{{ \n\t",
547 !strconcat(".reg \t.u16 temp1; \n\t",
548 !strconcat(".reg \t.u16 temp2; \n\t",
549 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
550 !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
551 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
552 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
553 !strconcat("}}", "")))))))),
554 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
555 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
556 !strconcat("{{ \n\t",
557 !strconcat(".reg \t.u16 temp1; \n\t",
558 !strconcat(".reg \t.u16 temp2; \n\t",
559 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
560 !strconcat("mov.b16 \ttemp2, $b; \n\t",
561 !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
562 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
563 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
564 !strconcat("}}", ""))))))))),
565 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
568 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
569 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
571 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
572 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
573 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
574 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
576 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
577 (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
578 "mad.lo.s16 \t$dst, $a, $b, $c;",
579 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
581 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
582 (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
583 "mad.lo.s16 \t$dst, $a, $b, $c;",
584 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
586 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
587 (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
588 "mad.lo.s16 \t$dst, $a, $b, $c;",
589 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
591 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
592 (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
593 "mad.lo.s16 \t$dst, $a, $b, $c;",
594 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
597 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
598 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
599 "mad.lo.s16 \t$dst, $a, $b, $c;",
600 [(set Int16Regs:$dst, (add
601 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
602 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
603 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
604 "mad.lo.s16 \t$dst, $a, $b, $c;",
605 [(set Int16Regs:$dst, (add
606 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
607 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
608 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
609 "mad.lo.s16 \t$dst, $a, $b, $c;",
610 [(set Int16Regs:$dst, (add
611 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
612 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
613 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
614 "mad.lo.s16 \t$dst, $a, $b, $c;",
615 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
618 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
619 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
620 "mad.lo.s32 \t$dst, $a, $b, $c;",
621 [(set Int32Regs:$dst, (add
622 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
623 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
624 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
625 "mad.lo.s32 \t$dst, $a, $b, $c;",
626 [(set Int32Regs:$dst, (add
627 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
628 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
629 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
630 "mad.lo.s32 \t$dst, $a, $b, $c;",
631 [(set Int32Regs:$dst, (add
632 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
633 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
634 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
635 "mad.lo.s32 \t$dst, $a, $b, $c;",
636 [(set Int32Regs:$dst, (add
637 (mul Int32Regs:$a, imm:$b), imm:$c))]>;
639 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
640 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
641 "mad.lo.s64 \t$dst, $a, $b, $c;",
642 [(set Int64Regs:$dst, (add
643 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
644 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
645 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
646 "mad.lo.s64 \t$dst, $a, $b, $c;",
647 [(set Int64Regs:$dst, (add
648 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
649 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
650 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
651 "mad.lo.s64 \t$dst, $a, $b, $c;",
652 [(set Int64Regs:$dst, (add
653 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
654 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
655 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
656 "mad.lo.s64 \t$dst, $a, $b, $c;",
657 [(set Int64Regs:$dst, (add
658 (mul Int64Regs:$a, imm:$b), imm:$c))]>;
661 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
662 !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
663 "neg.s16 \t$dst, $dst;"),
664 [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
665 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
666 "neg.s16 \t$dst, $src;",
667 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
668 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
669 "neg.s32 \t$dst, $src;",
670 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
671 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
672 "neg.s64 \t$dst, $src;",
673 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
675 //-----------------------------------
676 // Floating Point Arithmetic
677 //-----------------------------------
680 def FloatConst1 : PatLeaf<(fpimm), [{
681 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
683 float f = (float)N->getValueAPF().convertToFloat();
686 // Constand (double)1.0
687 def DoubleConst1 : PatLeaf<(fpimm), [{
688 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
690 double d = (double)N->getValueAPF().convertToDouble();
694 defm FADD : F3<"add", fadd>;
695 defm FSUB : F3<"sub", fsub>;
696 defm FMUL : F3<"mul", fmul>;
698 defm FADD_rn : F3_rn<"add", fadd>;
699 defm FSUB_rn : F3_rn<"sub", fsub>;
700 defm FMUL_rn : F3_rn<"mul", fmul>;
702 defm FABS : F2<"abs", fabs>;
703 defm FNEG : F2<"neg", fneg>;
704 defm FSQRT : F2<"sqrt.rn", fsqrt>;
709 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
710 (ins f64imm:$a, Float64Regs:$b),
711 "rcp.rn.f64 \t$dst, $b;",
712 [(set Float64Regs:$dst,
713 (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
714 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
715 (ins Float64Regs:$a, Float64Regs:$b),
716 "div.rn.f64 \t$dst, $a, $b;",
717 [(set Float64Regs:$dst,
718 (fdiv Float64Regs:$a, Float64Regs:$b))]>;
719 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
720 (ins Float64Regs:$a, f64imm:$b),
721 "div.rn.f64 \t$dst, $a, $b;",
722 [(set Float64Regs:$dst,
723 (fdiv Float64Regs:$a, fpimm:$b))]>;
726 // F32 Approximate reciprocal
728 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
729 (ins f32imm:$a, Float32Regs:$b),
730 "rcp.approx.ftz.f32 \t$dst, $b;",
731 [(set Float32Regs:$dst,
732 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
733 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
734 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
735 (ins f32imm:$a, Float32Regs:$b),
736 "rcp.approx.f32 \t$dst, $b;",
737 [(set Float32Regs:$dst,
738 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
739 Requires<[do_DIVF32_APPROX]>;
741 // F32 Approximate division
743 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
744 (ins Float32Regs:$a, Float32Regs:$b),
745 "div.approx.ftz.f32 \t$dst, $a, $b;",
746 [(set Float32Regs:$dst,
747 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
748 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
749 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
750 (ins Float32Regs:$a, Float32Regs:$b),
751 "div.approx.f32 \t$dst, $a, $b;",
752 [(set Float32Regs:$dst,
753 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
754 Requires<[do_DIVF32_APPROX]>;
756 // F32 Semi-accurate reciprocal
758 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
760 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
761 (ins f32imm:$a, Float32Regs:$b),
762 "rcp.approx.ftz.f32 \t$dst, $b;",
763 [(set Float32Regs:$dst,
764 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
765 Requires<[do_DIVF32_FULL, doF32FTZ]>;
766 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
767 (ins f32imm:$a, Float32Regs:$b),
768 "rcp.approx.f32 \t$dst, $b;",
769 [(set Float32Regs:$dst,
770 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
771 Requires<[do_DIVF32_FULL]>;
773 // F32 Semi-accurate division
775 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
776 (ins Float32Regs:$a, Float32Regs:$b),
777 "div.full.ftz.f32 \t$dst, $a, $b;",
778 [(set Float32Regs:$dst,
779 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
780 Requires<[do_DIVF32_FULL, doF32FTZ]>;
781 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
782 (ins Float32Regs:$a, f32imm:$b),
783 "div.full.ftz.f32 \t$dst, $a, $b;",
784 [(set Float32Regs:$dst,
785 (fdiv Float32Regs:$a, fpimm:$b))]>,
786 Requires<[do_DIVF32_FULL, doF32FTZ]>;
787 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
788 (ins Float32Regs:$a, Float32Regs:$b),
789 "div.full.f32 \t$dst, $a, $b;",
790 [(set Float32Regs:$dst,
791 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
792 Requires<[do_DIVF32_FULL]>;
793 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
794 (ins Float32Regs:$a, f32imm:$b),
795 "div.full.f32 \t$dst, $a, $b;",
796 [(set Float32Regs:$dst,
797 (fdiv Float32Regs:$a, fpimm:$b))]>,
798 Requires<[do_DIVF32_FULL]>;
800 // F32 Accurate reciprocal
802 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
803 (ins f32imm:$a, Float32Regs:$b),
804 "rcp.rn.ftz.f32 \t$dst, $b;",
805 [(set Float32Regs:$dst,
806 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
807 Requires<[reqPTX20, doF32FTZ]>;
808 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
809 (ins f32imm:$a, Float32Regs:$b),
810 "rcp.rn.f32 \t$dst, $b;",
811 [(set Float32Regs:$dst,
812 (fdiv FloatConst1:$a, Float32Regs:$b))]>,
813 Requires<[reqPTX20]>;
815 // F32 Accurate division
817 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
818 (ins Float32Regs:$a, Float32Regs:$b),
819 "div.rn.ftz.f32 \t$dst, $a, $b;",
820 [(set Float32Regs:$dst,
821 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
822 Requires<[doF32FTZ, reqPTX20]>;
823 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
824 (ins Float32Regs:$a, f32imm:$b),
825 "div.rn.ftz.f32 \t$dst, $a, $b;",
826 [(set Float32Regs:$dst,
827 (fdiv Float32Regs:$a, fpimm:$b))]>,
828 Requires<[doF32FTZ, reqPTX20]>;
829 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
830 (ins Float32Regs:$a, Float32Regs:$b),
831 "div.rn.f32 \t$dst, $a, $b;",
832 [(set Float32Regs:$dst,
833 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
834 Requires<[reqPTX20]>;
835 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
836 (ins Float32Regs:$a, f32imm:$b),
837 "div.rn.f32 \t$dst, $a, $b;",
838 [(set Float32Regs:$dst,
839 (fdiv Float32Regs:$a, fpimm:$b))]>,
840 Requires<[reqPTX20]>;
843 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
844 def rrr : NVPTXInst<(outs Float32Regs:$dst),
845 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
846 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
847 [(set Float32Regs:$dst, (fadd
848 (fmul Float32Regs:$a, Float32Regs:$b),
849 Float32Regs:$c))]>, Requires<[Pred]>;
850 // This is to WAR a weird bug in Tablegen that does not automatically
851 // generate the following permutated rule rrr2 from the above rrr.
852 // So we explicitly add it here. This happens to FMA32 only.
853 // See the comments at FMAD32 and FMA32 for more information.
854 def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
855 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
856 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
857 [(set Float32Regs:$dst, (fadd Float32Regs:$c,
858 (fmul Float32Regs:$a, Float32Regs:$b)))]>,
860 def rri : NVPTXInst<(outs Float32Regs:$dst),
861 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
862 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
863 [(set Float32Regs:$dst, (fadd
864 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
866 def rir : NVPTXInst<(outs Float32Regs:$dst),
867 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
868 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
869 [(set Float32Regs:$dst, (fadd
870 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
872 def rii : NVPTXInst<(outs Float32Regs:$dst),
873 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
874 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
875 [(set Float32Regs:$dst, (fadd
876 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
880 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
881 def rrr : NVPTXInst<(outs Float64Regs:$dst),
882 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
883 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
884 [(set Float64Regs:$dst, (fadd
885 (fmul Float64Regs:$a, Float64Regs:$b),
886 Float64Regs:$c))]>, Requires<[Pred]>;
887 def rri : NVPTXInst<(outs Float64Regs:$dst),
888 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
889 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
890 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
891 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
892 def rir : NVPTXInst<(outs Float64Regs:$dst),
893 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
895 [(set Float64Regs:$dst, (fadd
896 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
898 def rii : NVPTXInst<(outs Float64Regs:$dst),
899 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
901 [(set Float64Regs:$dst, (fadd
902 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
906 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
907 // automatically generate the rrr2 rule from
908 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
909 // If we reverse the order of the following two lines, then rrr2 rule will be
910 // generated for FMA32, but not for rrr.
911 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
912 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
913 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
914 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
915 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
916 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
918 // b*c-a => fmad(b, c, -a)
919 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
920 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
921 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
925 // a-b*c => fmad(-b,c, a)
926 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
927 // b*c-a => fmad(b, c, -a)
928 // - legal because b*c-a <=> b*c+(-a)
929 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
930 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
931 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
933 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
934 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
938 // a-b*c => fmad(-b,c, a)
939 // b*c-a => fmad(b, c, -a)
940 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
941 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
942 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
945 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
946 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
950 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
951 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
952 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
953 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
954 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
956 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
957 "sin.approx.f32 \t$dst, $src;",
958 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
959 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
960 "cos.approx.f32 \t$dst, $src;",
961 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
963 //-----------------------------------
964 // Logical Arithmetic
965 //-----------------------------------
967 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
968 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
969 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
970 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
971 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
972 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
973 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
974 def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
975 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
976 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
977 def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
978 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
979 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
980 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
981 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
982 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
984 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
985 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
986 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
987 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
988 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
989 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
991 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
992 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
993 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
994 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
995 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
996 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
998 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
999 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1000 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1003 defm OR : LOG_FORMAT<"or", or>;
1004 defm AND : LOG_FORMAT<"and", and>;
1005 defm XOR : LOG_FORMAT<"xor", xor>;
1007 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1008 "not.pred \t$dst, $src;",
1009 [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1010 def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1011 "not.b16 \t$dst, $src;",
1012 [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1013 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1014 "not.b16 \t$dst, $src;",
1015 [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1016 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1017 "not.b32 \t$dst, $src;",
1018 [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1019 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1020 "not.b64 \t$dst, $src;",
1021 [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1023 // For shifts, the second src operand must be 32-bit value
1024 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1025 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1027 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1028 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1030 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1031 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1032 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1034 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1036 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1037 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1039 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1040 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1041 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1043 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1044 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1045 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1047 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1049 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1050 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1052 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1053 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1054 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1056 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1057 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1058 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1060 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1061 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1062 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1066 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1068 // For shifts, the second src operand must be 32-bit value
1069 // Need to add cvt for the 8-bits.
1070 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1071 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1073 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1074 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1076 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1077 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1078 [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1080 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1082 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1083 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1085 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1086 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1087 [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1089 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1090 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1091 [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1093 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1095 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1096 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1098 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1099 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1100 [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1102 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1103 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1104 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1105 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1107 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1108 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1109 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1110 [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1114 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1115 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1118 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1119 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1120 !strconcat("{{\n\t",
1121 !strconcat(".reg .b32 %lhs;\n\t",
1122 !strconcat(".reg .b32 %rhs;\n\t",
1123 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1124 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1125 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1126 !strconcat("}}", ""))))))),
1129 def SUB_FRM_32 : SDNodeXForm<imm, [{
1130 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1133 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1134 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1135 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1136 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1138 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1140 !strconcat("{{\n\t",
1141 !strconcat(".reg .b32 %lhs;\n\t",
1142 !strconcat(".reg .b32 %rhs;\n\t",
1143 !strconcat(".reg .b32 %amt2;\n\t",
1144 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1145 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1146 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1147 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1148 !strconcat("}}", ""))))))))),
1149 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1151 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1153 !strconcat("{{\n\t",
1154 !strconcat(".reg .b32 %lhs;\n\t",
1155 !strconcat(".reg .b32 %rhs;\n\t",
1156 !strconcat(".reg .b32 %amt2;\n\t",
1157 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1159 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1161 !strconcat("}}", ""))))))))),
1162 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1165 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1166 i32imm:$amt1, i32imm:$amt2),
1167 !strconcat("{{\n\t",
1168 !strconcat(".reg .b64 %lhs;\n\t",
1169 !strconcat(".reg .b64 %rhs;\n\t",
1170 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1171 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1172 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1173 !strconcat("}}", ""))))))),
1176 def SUB_FRM_64 : SDNodeXForm<imm, [{
1177 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1180 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1181 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1182 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1183 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1185 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1187 !strconcat("{{\n\t",
1188 !strconcat(".reg .b64 %lhs;\n\t",
1189 !strconcat(".reg .b64 %rhs;\n\t",
1190 !strconcat(".reg .u32 %amt2;\n\t",
1191 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1192 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1193 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1194 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1195 !strconcat("}}", ""))))))))),
1196 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1198 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1200 !strconcat("{{\n\t",
1201 !strconcat(".reg .b64 %lhs;\n\t",
1202 !strconcat(".reg .b64 %rhs;\n\t",
1203 !strconcat(".reg .u32 %amt2;\n\t",
1204 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1205 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1206 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1207 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1208 !strconcat("}}", ""))))))))),
1209 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1212 //-----------------------------------
1213 // Data Movement (Load / Store, Move)
1214 //-----------------------------------
1216 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1218 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1221 def MEMri : Operand<i32> {
1222 let PrintMethod = "printMemOperand";
1223 let MIOperandInfo = (ops Int32Regs, i32imm);
1225 def MEMri64 : Operand<i64> {
1226 let PrintMethod = "printMemOperand";
1227 let MIOperandInfo = (ops Int64Regs, i64imm);
1230 def imem : Operand<iPTR> {
1231 let PrintMethod = "printOperand";
1234 def imemAny : Operand<iPTRAny> {
1235 let PrintMethod = "printOperand";
1238 def LdStCode : Operand<i32> {
1239 let PrintMethod = "printLdStCode";
1242 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1243 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1245 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1246 "mov.u32 \t$dst, $a;",
1247 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1249 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1250 "mov.u64 \t$dst, $a;",
1251 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1253 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1254 let IsSimpleMove=1 in {
1255 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1256 "mov.pred \t$dst, $sss;", []>;
1257 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1258 "mov.u16 \t$dst, $sss;", []>;
1259 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1260 "mov.u16 \t$dst, $sss;", []>;
1261 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1262 "mov.u32 \t$dst, $sss;", []>;
1263 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1264 "mov.u64 \t$dst, $sss;", []>;
1266 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1267 "mov.f32 \t$dst, $src;", []>;
1268 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1269 "mov.f64 \t$dst, $src;", []>;
1271 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1272 "mov.pred \t$dst, $src;",
1273 [(set Int1Regs:$dst, imm:$src)]>;
1274 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1275 "mov.u16 \t$dst, $src;",
1276 [(set Int8Regs:$dst, imm:$src)]>;
1277 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1278 "mov.u16 \t$dst, $src;",
1279 [(set Int16Regs:$dst, imm:$src)]>;
1280 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1281 "mov.u32 \t$dst, $src;",
1282 [(set Int32Regs:$dst, imm:$src)]>;
1283 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1284 "mov.u64 \t$dst, $src;",
1285 [(set Int64Regs:$dst, imm:$src)]>;
1287 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1288 "mov.f32 \t$dst, $src;",
1289 [(set Float32Regs:$dst, fpimm:$src)]>;
1290 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1291 "mov.f64 \t$dst, $src;",
1292 [(set Float64Regs:$dst, fpimm:$src)]>;
1294 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1296 //---- Copy Frame Index ----
1297 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1298 "add.u32 \t$dst, ${addr:add};",
1299 [(set Int32Regs:$dst, ADDRri:$addr)]>;
1300 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1301 "add.u64 \t$dst, ${addr:add};",
1302 [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1304 //-----------------------------------
1305 // Comparison and Selection
1306 //-----------------------------------
1308 // Generate string block like
1311 // setp.gt.s16 p, %a, %b;
1312 // selp.s16 %dst, -1, 0, p;
1314 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1315 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1317 string t1 = "{{\n\t.reg .pred p;\n\t";
1318 string t2 = !strconcat(t1 , OpcStr);
1319 string t3 = !strconcat(t2 , sz1);
1320 string t4 = !strconcat(t3 , " \tp, ");
1321 string t5 = !strconcat(t4 , a);
1322 string t6 = !strconcat(t5 , ", ");
1323 string t7 = !strconcat(t6 , b);
1324 string t8 = !strconcat(t7 , ";\n\tselp.s");
1325 string t9 = !strconcat(t8 , sz2);
1326 string t10 = !strconcat(t9, " \t");
1327 string t11 = !strconcat(t10, d);
1328 string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
1331 // Generate string block like
1334 // .reg .s16 %temp1;
1335 // .reg .s16 %temp2;
1336 // cvt.s16.s8 %temp1, %a;
1337 // cvt s16.s8 %temp1, %b;
1338 // setp.gt.s16 p, %temp1, %temp2;
1339 // selp.s16 %dst, -1, 0, p;
1341 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1342 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1344 string t1 = "{{\n\t.reg .pred p;\n\t";
1345 string t2 = !strconcat(t1, ".reg .");
1346 string t3 = !strconcat(t2, type);
1347 string t4 = !strconcat(t3, " %temp1;\n\t");
1348 string t5 = !strconcat(t4, ".reg .");
1349 string t6 = !strconcat(t5, type);
1350 string t7 = !strconcat(t6, " %temp2;\n\t");
1351 string t8 = !strconcat(t7, cvt);
1352 string t9 = !strconcat(t8, " \t%temp1, ");
1353 string t10 = !strconcat(t9, a);
1354 string t11 = !strconcat(t10, ";\n\t");
1355 string t12 = !strconcat(t11, cvt);
1356 string t13 = !strconcat(t12, " \t%temp2, ");
1357 string t14 = !strconcat(t13, b);
1358 string t15 = !strconcat(t14, ";\n\t");
1359 string t16 = !strconcat(t15, OpcStr);
1360 string t17 = !strconcat(t16, "16");
1361 string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1362 string t19 = !strconcat(t18, "selp.s16 \t");
1363 string t20 = !strconcat(t19, d);
1364 string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
1367 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1368 string TypeStr, string CVTStr> {
1369 def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1370 Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1372 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1374 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1376 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1378 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1380 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1382 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1385 def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1386 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1387 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1388 def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1389 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1390 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1391 def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1392 Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1393 [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1394 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1395 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1396 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1397 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1398 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1399 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1400 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1401 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1402 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1403 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1404 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1405 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1406 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1407 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1408 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1409 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1410 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1411 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1412 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1413 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1414 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1415 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1416 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1417 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1418 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1419 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1420 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1422 def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1423 Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1424 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1425 def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1426 Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1427 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1428 def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1429 Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1430 [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1431 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1433 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1434 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1435 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1436 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1437 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1438 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1439 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1440 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1441 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1443 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1444 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1445 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1446 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1447 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1448 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1449 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1450 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1451 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1453 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1454 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1455 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1456 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1457 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1458 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1459 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1460 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1463 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1464 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1466 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1467 []>, Requires<[doF32FTZ]>;
1468 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1470 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1472 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1474 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1476 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1478 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1481 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1483 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1484 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1485 , Requires<[doF32FTZ]>;
1486 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1487 (ins Float32Regs:$a, Float32Regs:$b),
1488 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1489 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1490 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1491 (ins Float32Regs:$a, f32imm:$b),
1492 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1493 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1494 Requires<[doF32FTZ]>;
1495 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1496 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1497 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1498 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1499 (ins f32imm:$a, Float32Regs:$b),
1500 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1501 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1502 Requires<[doF32FTZ]>;
1503 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1504 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1505 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1506 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1507 (ins Float64Regs:$a, Float64Regs:$b),
1508 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1509 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1510 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1511 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1512 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1513 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1514 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1515 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1517 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1518 (ins Float32Regs:$a, Float32Regs:$b),
1519 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1520 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1521 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1522 (ins Float32Regs:$a, Float32Regs:$b),
1523 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1524 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1525 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1526 (ins Float32Regs:$a, f32imm:$b),
1527 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1528 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1529 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1530 (ins Float32Regs:$a, f32imm:$b),
1531 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1532 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1533 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1534 (ins f32imm:$a, Float32Regs:$b),
1535 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1536 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1537 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1538 (ins f32imm:$a, Float32Regs:$b),
1539 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1540 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1541 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1542 (ins Float64Regs:$a, Float64Regs:$b),
1543 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1544 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1545 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1546 (ins Float64Regs:$a, f64imm:$b),
1547 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1548 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1549 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1550 (ins f64imm:$a, Float64Regs:$b),
1551 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1552 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1556 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1558 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1560 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1562 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1564 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1566 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1568 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1570 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1572 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1574 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1576 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1578 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1580 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1581 (ins Int1Regs:$a, Int1Regs:$b),
1582 "xor.pred \t$dst, $a, $b;",
1583 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1584 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1585 (ins Int1Regs:$a, Int1Regs:$b),
1586 "xor.pred \t$dst, $a, $b;",
1587 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1588 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1589 (ins Int1Regs:$a, Int1Regs:$b),
1590 !strconcat("{{\n\t",
1591 !strconcat(".reg .pred temp;\n\t",
1592 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1593 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1594 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1595 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1596 (ins Int1Regs:$a, Int1Regs:$b),
1597 !strconcat("{{\n\t",
1598 !strconcat(".reg .pred temp;\n\t",
1599 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1600 !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1601 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1603 // Compare 2 i1's and produce a u32
1604 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1605 (ins Int1Regs:$a, Int1Regs:$b),
1606 !strconcat("{{\n\t",
1607 !strconcat(".reg .pred temp;\n\t",
1608 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1609 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1610 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1611 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1612 (ins Int1Regs:$a, Int1Regs:$b),
1613 !strconcat("{{\n\t",
1614 !strconcat(".reg .pred temp;\n\t",
1615 !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1616 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1617 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1619 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1620 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1621 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1622 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1623 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1624 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1626 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1627 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1628 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1629 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1630 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1631 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1633 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1634 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1636 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1637 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1638 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1639 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1640 (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1641 "selp.b16 \t$dst, $a, $b, $p;",
1642 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1643 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1644 (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1645 "selp.b16 \t$dst, $a, $b, $p;",
1646 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1647 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1648 (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1649 "selp.b16 \t$dst, $a, $b, $p;",
1650 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1651 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1652 (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1653 "selp.b16 \t$dst, $a, $b, $p;",
1654 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1656 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1657 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1658 "selp.b16 \t$dst, $a, $b, $p;",
1659 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1660 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1661 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1662 "selp.b16 \t$dst, $a, $b, $p;",
1663 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1664 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1665 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1666 "selp.b16 \t$dst, $a, $b, $p;",
1667 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1668 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1669 (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1670 "selp.b16 \t$dst, $a, $b, $p;",
1671 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1673 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1674 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1675 "selp.b32 \t$dst, $a, $b, $p;",
1676 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1677 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1678 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1679 "selp.b32 \t$dst, $a, $b, $p;",
1680 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1681 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1682 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1683 "selp.b32 \t$dst, $a, $b, $p;",
1684 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1685 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1686 (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1687 "selp.b32 \t$dst, $a, $b, $p;",
1688 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1690 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1691 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1692 "selp.b64 \t$dst, $a, $b, $p;",
1693 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1694 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1695 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1696 "selp.b64 \t$dst, $a, $b, $p;",
1697 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1698 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1699 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1700 "selp.b64 \t$dst, $a, $b, $p;",
1701 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1702 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1703 (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1704 "selp.b64 \t$dst, $a, $b, $p;",
1705 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1707 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1708 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1709 "selp.f32 \t$dst, $a, $b, $p;",
1710 [(set Float32Regs:$dst,
1711 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1712 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1713 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1714 "selp.f32 \t$dst, $a, $b, $p;",
1715 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1716 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1717 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1718 "selp.f32 \t$dst, $a, $b, $p;",
1719 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1720 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1721 (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1722 "selp.f32 \t$dst, $a, $b, $p;",
1723 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1725 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1726 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1727 "selp.f64 \t$dst, $a, $b, $p;",
1728 [(set Float64Regs:$dst,
1729 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1730 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1731 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1732 "selp.f64 \t$dst, $a, $b, $p;",
1733 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1734 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1735 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1736 "selp.f64 \t$dst, $a, $b, $p;",
1737 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1738 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1739 (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1740 "selp.f64 \t $dst, $a, $b, $p;",
1741 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1743 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1744 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1746 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1748 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1749 SDTCisInt<1>, SDTCisInt<2>]>;
1750 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1751 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1752 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1753 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1754 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1755 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1756 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1757 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1758 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1759 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1760 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1761 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1762 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1764 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1765 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1766 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1767 SDTDeclareScalarParamProfile,
1768 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1769 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1770 SDTDeclareParamProfile,
1771 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1772 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1773 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1774 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1775 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1776 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1777 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1778 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1779 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1780 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1781 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1782 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1783 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1784 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1785 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1786 def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1787 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1788 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1789 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1790 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1791 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1792 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1793 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1794 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1795 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1796 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1797 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1798 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1799 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1800 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1801 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1802 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1804 def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1805 [SDNPHasChain, SDNPSideEffect]>;
1806 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1807 [SDNPHasChain, SDNPSideEffect]>;
1808 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1809 [SDNPHasChain, SDNPSideEffect]>;
1810 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1811 SDTPseudoUseParamProfile,
1812 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1813 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1814 [SDNPHasChain, SDNPSideEffect]>;
1816 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1817 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1818 !strconcat(!strconcat("ld.param", opstr),
1819 "\t$dst, [retval0+$b];"),
1820 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1822 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1823 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1824 !strconcat(!strconcat("mov", opstr),
1825 "\t$dst, retval$b;"),
1826 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1828 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1829 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1830 !strconcat(!strconcat("st.param", opstr),
1831 "\t[param$a+$b], $val;"),
1832 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1834 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1835 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1836 !strconcat(!strconcat("mov", opstr),
1837 "\tparam$a, $val;"),
1838 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1840 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1841 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1842 !strconcat(!strconcat("st.param", opstr),
1843 "\t[func_retval0+$a], $val;"),
1844 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1846 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1847 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1848 !strconcat(!strconcat("mov", opstr),
1849 "\tfunc_retval$num, $val;"),
1850 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1852 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1853 NVPTXInst<(outs), (ins regclass:$val),
1854 !strconcat(!strconcat("mov", opstr),
1855 "\tfunc_retval0, $val;"),
1856 [(MoveRetval regclass:$val)]>;
1858 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1860 [(PrintCall (i32 1))]>;
1861 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1862 "call (retval0, retval1), ",
1863 [(PrintCall (i32 2))]>;
1864 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1865 "call (retval0, retval1, retval2), ",
1866 [(PrintCall (i32 3))]>;
1867 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1868 "call (retval0, retval1, retval2, retval3), ",
1869 [(PrintCall (i32 4))]>;
1870 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1871 "call (retval0, retval1, retval2, retval3, retval4), ",
1872 [(PrintCall (i32 5))]>;
1873 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1874 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1875 [(PrintCall (i32 6))]>;
1876 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1877 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1878 [(PrintCall (i32 7))]>;
1879 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1880 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1881 ", retval5, retval6, retval7), "),
1882 [(PrintCall (i32 8))]>;
1884 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1885 [(PrintCall (i32 0))]>;
1887 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1888 "call.uni (retval0), ",
1889 [(PrintCallUni (i32 1))]>;
1890 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1891 "call.uni (retval0, retval1), ",
1892 [(PrintCallUni (i32 2))]>;
1893 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1894 "call.uni (retval0, retval1, retval2), ",
1895 [(PrintCallUni (i32 3))]>;
1896 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1897 "call.uni (retval0, retval1, retval2, retval3), ",
1898 [(PrintCallUni (i32 4))]>;
1899 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1900 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1901 [(PrintCallUni (i32 5))]>;
1902 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1903 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1904 [(PrintCallUni (i32 6))]>;
1905 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1907 [(PrintCallUni (i32 7))]>;
1908 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1909 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1910 ", retval5, retval6, retval7), "),
1911 [(PrintCallUni (i32 8))]>;
1913 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1914 [(PrintCallUni (i32 0))]>;
1916 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
1917 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
1918 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
1919 def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
1921 //def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1922 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1923 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1924 // [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1925 //def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1926 // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1927 // "cvt.u16.u32\t$dst, temp_param_reg;"),
1928 // [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1930 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
1931 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
1933 def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
1934 def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
1935 def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1936 "cvt.u16.u32\t$dst, retval$b;",
1937 [(set Int16Regs:$dst,
1938 (LoadParam (i32 0), (i32 imm:$b)))]>;
1939 def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1940 "cvt.u16.u32\t$dst, retval$b;",
1941 [(set Int8Regs:$dst,
1942 (LoadParam (i32 0), (i32 imm:$b)))]>;
1944 def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
1945 def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
1947 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
1948 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
1950 def StoreParamI16 : NVPTXInst<(outs),
1951 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1952 "st.param.b16\t[param$a+$b], $val;",
1953 [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1955 def StoreParamI8 : NVPTXInst<(outs),
1956 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1957 "st.param.b8\t[param$a+$b], $val;",
1959 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1961 def StoreParamS32I16 : NVPTXInst<(outs),
1962 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1963 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1964 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1965 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1966 def StoreParamU32I16 : NVPTXInst<(outs),
1967 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1968 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1969 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1970 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1972 def StoreParamU32I8 : NVPTXInst<(outs),
1973 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1974 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1975 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1976 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1977 def StoreParamS32I8 : NVPTXInst<(outs),
1978 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1979 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1980 "st.param.b32\t[param$a+$b], temp_param_reg;"),
1981 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1983 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
1984 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
1986 def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
1987 def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
1988 def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">;
1989 def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">;
1990 def MoveToParamI16 : NVPTXInst<(outs),
1991 (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1992 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1993 "mov.b32\tparam$a, temp_param_reg;"),
1994 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1995 def MoveToParamI8 : NVPTXInst<(outs),
1996 (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1997 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1998 "mov.b32\tparam$a, temp_param_reg;"),
1999 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2001 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
2002 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
2003 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
2004 def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
2006 //def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2007 // !strconcat("\{\n\t",
2008 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2009 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2010 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2011 // [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2012 //def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2013 // !strconcat("\{\n\t",
2014 // !strconcat(".reg .b32 temp_retval_reg;\n\t",
2015 // !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2016 // "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2017 // [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2019 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
2020 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
2022 def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
2023 def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
2024 def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
2025 def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
2026 def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
2027 def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
2029 def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">;
2030 def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">;
2031 def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">;
2032 def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
2033 def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2034 "cvt.u32.u16\tfunc_retval$num, $val;",
2035 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2036 def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2037 "cvt.u32.u16\tfunc_retval$num, $val;",
2038 [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2040 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2041 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2042 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2043 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2045 class CallArgInst<NVPTXRegClass regclass> :
2046 NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2047 [(CallArg (i32 0), regclass:$a)]>;
2049 class LastCallArgInst<NVPTXRegClass regclass> :
2050 NVPTXInst<(outs), (ins regclass:$a), "$a",
2051 [(LastCallArg (i32 0), regclass:$a)]>;
2053 def CallArgI64 : CallArgInst<Int64Regs>;
2054 def CallArgI32 : CallArgInst<Int32Regs>;
2055 def CallArgI16 : CallArgInst<Int16Regs>;
2056 def CallArgI8 : CallArgInst<Int8Regs>;
2058 def CallArgF64 : CallArgInst<Float64Regs>;
2059 def CallArgF32 : CallArgInst<Float32Regs>;
2061 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2062 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2063 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2064 def LastCallArgI8 : LastCallArgInst<Int8Regs>;
2066 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2067 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2069 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2070 [(CallArg (i32 0), (i32 imm:$a))]>;
2071 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2072 [(LastCallArg (i32 0), (i32 imm:$a))]>;
2074 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2075 [(CallArg (i32 1), (i32 imm:$a))]>;
2076 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2077 [(LastCallArg (i32 1), (i32 imm:$a))]>;
2079 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2081 [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2082 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2084 [(CallVoid Int32Regs:$addr)]>;
2085 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2087 [(CallVoid Int64Regs:$addr)]>;
2088 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2089 ", prototype_$val;",
2090 [(Prototype (i32 imm:$val))]>;
2092 def DeclareRetMemInst : NVPTXInst<(outs),
2093 (ins i32imm:$align, i32imm:$size, i32imm:$num),
2094 ".param .align $align .b8 retval$num[$size];",
2095 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2096 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2097 ".param .b$size retval$num;",
2098 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2099 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2100 ".reg .b$size retval$num;",
2101 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2103 def DeclareParamInst : NVPTXInst<(outs),
2104 (ins i32imm:$align, i32imm:$a, i32imm:$size),
2105 ".param .align $align .b8 param$a[$size];",
2106 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2107 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2108 ".param .b$size param$a;",
2109 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2110 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2111 ".reg .b$size param$a;",
2112 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2114 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2115 NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2116 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2117 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2119 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2120 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2121 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2122 "cvt.u16.u32\t$dst, $src;",
2123 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2124 def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2125 "cvt.u16.u32\t$dst, $src;",
2126 [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2127 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2128 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2130 class PseudoUseParamInst<NVPTXRegClass regclass> :
2131 NVPTXInst<(outs), (ins regclass:$src),
2132 "// Pseudo use of $src",
2133 [(PseudoUseParam regclass:$src)]>;
2135 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2136 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2137 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2138 def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
2139 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2140 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2144 // Load / Store Handling
2146 multiclass LD<NVPTXRegClass regclass> {
2147 def _avar : NVPTXInst<(outs regclass:$dst),
2148 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2149 i32imm:$fromWidth, imem:$addr),
2150 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2151 "$fromWidth \t$dst, [$addr];"), []>;
2152 def _areg : NVPTXInst<(outs regclass:$dst),
2153 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2154 i32imm:$fromWidth, Int32Regs:$addr),
2155 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2156 "$fromWidth \t$dst, [$addr];"), []>;
2157 def _areg_64 : NVPTXInst<(outs regclass:$dst),
2158 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2159 i32imm:$fromWidth, Int64Regs:$addr),
2160 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2161 " \t$dst, [$addr];"), []>;
2162 def _ari : NVPTXInst<(outs regclass:$dst),
2163 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2164 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2165 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2166 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2167 def _ari_64 : NVPTXInst<(outs regclass:$dst),
2168 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2169 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2170 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2171 " \t$dst, [$addr+$offset];"), []>;
2172 def _asi : NVPTXInst<(outs regclass:$dst),
2173 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2174 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2175 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2176 "$fromWidth \t$dst, [$addr+$offset];"), []>;
2179 let mayLoad=1, neverHasSideEffects=1 in {
2180 defm LD_i8 : LD<Int8Regs>;
2181 defm LD_i16 : LD<Int16Regs>;
2182 defm LD_i32 : LD<Int32Regs>;
2183 defm LD_i64 : LD<Int64Regs>;
2184 defm LD_f32 : LD<Float32Regs>;
2185 defm LD_f64 : LD<Float64Regs>;
2188 multiclass ST<NVPTXRegClass regclass> {
2189 def _avar : NVPTXInst<(outs),
2190 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2191 LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2192 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2193 " \t[$addr], $src;"), []>;
2194 def _areg : NVPTXInst<(outs),
2195 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2196 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2197 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2198 " \t[$addr], $src;"), []>;
2199 def _areg_64 : NVPTXInst<(outs),
2200 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2201 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2202 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2203 "\t[$addr], $src;"), []>;
2204 def _ari : NVPTXInst<(outs),
2205 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2206 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2207 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2208 " \t[$addr+$offset], $src;"), []>;
2209 def _ari_64 : NVPTXInst<(outs),
2210 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2211 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2212 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2213 "\t[$addr+$offset], $src;"), []>;
2214 def _asi : NVPTXInst<(outs),
2215 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2216 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2217 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2218 " \t[$addr+$offset], $src;"), []>;
2221 let mayStore=1, neverHasSideEffects=1 in {
2222 defm ST_i8 : ST<Int8Regs>;
2223 defm ST_i16 : ST<Int16Regs>;
2224 defm ST_i32 : ST<Int32Regs>;
2225 defm ST_i64 : ST<Int64Regs>;
2226 defm ST_f32 : ST<Float32Regs>;
2227 defm ST_f64 : ST<Float64Regs>;
2230 // The following is used only in and after vector elementizations.
2231 // Vector elementization happens at the machine instruction level, so the
2232 // following instruction
2233 // never appears in the DAG.
2234 multiclass LD_VEC<NVPTXRegClass regclass> {
2235 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2236 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2237 i32imm:$fromWidth, imem:$addr),
2238 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2239 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2240 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2241 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2242 i32imm:$fromWidth, Int32Regs:$addr),
2243 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2244 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2245 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2246 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2247 i32imm:$fromWidth, Int64Regs:$addr),
2248 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2249 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2250 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2251 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2252 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2253 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2254 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2255 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2256 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2257 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2258 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2259 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2260 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2261 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2262 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2263 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2264 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2265 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2266 regclass:$dst3, regclass:$dst4),
2267 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2268 i32imm:$fromWidth, imem:$addr),
2269 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2270 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2271 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2273 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2274 i32imm:$fromWidth, Int32Regs:$addr),
2275 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2276 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2277 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2278 regclass:$dst3, regclass:$dst4),
2279 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2280 i32imm:$fromWidth, Int64Regs:$addr),
2281 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2282 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2283 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2285 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2286 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2287 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2288 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2290 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2291 regclass:$dst3, regclass:$dst4),
2292 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2293 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2294 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2295 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2297 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2299 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2300 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2301 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2302 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2305 let mayLoad=1, neverHasSideEffects=1 in {
2306 defm LDV_i8 : LD_VEC<Int8Regs>;
2307 defm LDV_i16 : LD_VEC<Int16Regs>;
2308 defm LDV_i32 : LD_VEC<Int32Regs>;
2309 defm LDV_i64 : LD_VEC<Int64Regs>;
2310 defm LDV_f32 : LD_VEC<Float32Regs>;
2311 defm LDV_f64 : LD_VEC<Float64Regs>;
2314 multiclass ST_VEC<NVPTXRegClass regclass> {
2315 def _v2_avar : NVPTXInst<(outs),
2316 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2317 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2318 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2319 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2320 def _v2_areg : NVPTXInst<(outs),
2321 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2322 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2323 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2324 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2325 def _v2_areg_64 : NVPTXInst<(outs),
2326 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2327 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2328 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2329 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2330 def _v2_ari : NVPTXInst<(outs),
2331 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2332 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2334 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2335 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2336 def _v2_ari_64 : NVPTXInst<(outs),
2337 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2338 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2340 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2341 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2342 def _v2_asi : NVPTXInst<(outs),
2343 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2344 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2346 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2347 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2348 def _v4_avar : NVPTXInst<(outs),
2349 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2350 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2351 i32imm:$fromWidth, imem:$addr),
2352 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2353 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2354 def _v4_areg : NVPTXInst<(outs),
2355 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2356 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2357 i32imm:$fromWidth, Int32Regs:$addr),
2358 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2359 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2360 def _v4_areg_64 : NVPTXInst<(outs),
2361 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2362 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2363 i32imm:$fromWidth, Int64Regs:$addr),
2364 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2365 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2366 def _v4_ari : NVPTXInst<(outs),
2367 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2368 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2369 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2370 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2371 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2373 def _v4_ari_64 : NVPTXInst<(outs),
2374 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2375 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2376 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2377 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2378 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2380 def _v4_asi : NVPTXInst<(outs),
2381 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2382 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2383 i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2384 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2385 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2388 let mayStore=1, neverHasSideEffects=1 in {
2389 defm STV_i8 : ST_VEC<Int8Regs>;
2390 defm STV_i16 : ST_VEC<Int16Regs>;
2391 defm STV_i32 : ST_VEC<Int32Regs>;
2392 defm STV_i64 : ST_VEC<Int64Regs>;
2393 defm STV_f32 : ST_VEC<Float32Regs>;
2394 defm STV_f64 : ST_VEC<Float64Regs>;
2398 //---- Conversion ----
2400 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2401 // FIXME: need to add f16 support
2403 // NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2404 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2405 // [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2407 // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2408 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2409 // [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2411 // NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2412 // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2413 // [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2415 // NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2416 // !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2417 // [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2420 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2421 "selp.f32 \t$d, 1.0, 0.0, $a;",
2422 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2424 NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2425 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2426 [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2428 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2429 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2430 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2432 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2433 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2434 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2436 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2437 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2438 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2441 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2442 "selp.f64 \t$d, 1.0, 0.0, $a;",
2443 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2445 NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2446 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2447 [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2449 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2450 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2451 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2453 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2454 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2455 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2457 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2458 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2459 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2462 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2463 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2465 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2466 // FIXME: need to add f16 support
2468 // NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2469 // !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2470 // [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2472 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2473 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2474 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2476 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2477 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2478 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2480 NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2481 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2482 [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2484 // FIXME: need to add f16 support
2486 // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2487 // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2488 // [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2490 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2491 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2492 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2494 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2495 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2496 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2498 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2499 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2500 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2502 // FIXME: need to add f16 support
2503 // def CVTi32f16: def CVTi32f16:
2504 // NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2505 // !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2506 // [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2508 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2509 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2510 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2512 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2513 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2514 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2516 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2517 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2518 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2520 // FIXME: need to add f16 support
2522 // NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2523 // !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2524 // [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2526 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2527 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2528 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2530 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2531 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2532 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2534 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2535 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2536 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2539 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2540 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2542 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2544 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2545 "selp.u16 \t$d, 1, 0, $a;",
2546 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2548 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2549 "selp.u16 \t$d, 1, 0, $a;",
2550 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2552 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2553 "selp.u32 \t$d, 1, 0, $a;",
2554 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2556 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2557 "selp.u64 \t$d, 1, 0, $a;",
2558 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2561 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2563 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2564 "selp.s16 \t$d, -1, 0, $a;",
2565 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2567 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2568 "selp.s16 \t$d, -1, 0, $a;",
2569 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2571 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2572 "selp.s32 \t$d, -1, 0, $a;",
2573 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2575 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2576 "selp.s64 \t$d, -1, 0, $a;",
2577 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2580 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2581 // All Int8Regs are emiited as 16bit registers in ptx.
2582 // And there is no selp.u8 in ptx.
2584 NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2585 !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2586 !strconcat(OpStr, "8 \t$d, $a;")))),
2587 [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2589 NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2590 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2591 !strconcat(OpStr, "8 \t$d, $a;")))),
2592 [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2594 NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2595 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2596 !strconcat(OpStr, "8 \t$d, $a;")))),
2597 [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2599 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2600 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2601 !strconcat(OpStr, "16 \t$d, $a;")))),
2602 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2604 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2605 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2606 !strconcat(OpStr, "16 \t$d, $a;")))),
2607 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2609 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2610 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2611 !strconcat(OpStr, "32 \t$d, $a;")))),
2612 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2615 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2616 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2617 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2619 defm Sint_extend : INT_EXTEND <"s", sext>;
2620 defm Zint_extend : INT_EXTEND <"u", zext>;
2621 defm Aint_extend : INT_EXTEND <"u", anyext>;
2623 class TRUNC_to1_asm<string sz> {
2624 string s = !strconcat("{{\n\t",
2627 !strconcat(" temp;\n\t",
2630 !strconcat("\t temp, $a, 1;\n\t",
2632 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2635 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2636 "cvt.u32.u64 \t$d, $a;",
2637 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2638 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2639 "cvt.u16.u64 \t$d, $a;",
2640 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2641 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2642 "cvt.u8.u64 \t$d, $a;",
2643 [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2644 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2645 "cvt.u16.u32 \t$d, $a;",
2646 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2647 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2648 "cvt.u8.u32 \t$d, $a;",
2649 [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2650 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2651 "cvt.u8.u16 \t$d, $a;",
2652 [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2653 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2654 TRUNC_to1_asm<".b64">.s,
2655 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2656 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2657 TRUNC_to1_asm<".b32">.s,
2658 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2659 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2660 TRUNC_to1_asm<".b16">.s,
2661 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2662 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2663 TRUNC_to1_asm<".b16">.s,
2664 [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2666 // Select instructions
2667 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2668 (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2669 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2670 (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2671 (TRUNC_32to1 Int32Regs:$pred))>;
2672 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2673 (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2674 (TRUNC_32to1 Int32Regs:$pred))>;
2675 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2676 (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2677 (TRUNC_32to1 Int32Regs:$pred))>;
2678 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2679 (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2680 (TRUNC_32to1 Int32Regs:$pred))>;
2681 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2682 (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2683 (TRUNC_32to1 Int32Regs:$pred))>;
2685 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2686 NVPTXRegClass regclassOut> :
2687 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2688 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2689 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2691 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2692 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2693 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2694 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2696 // pack a set of smaller int registers to a larger int register
2697 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2698 (ins Int8Regs:$s1, Int8Regs:$s2,
2699 Int8Regs:$s3, Int8Regs:$s4),
2700 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2701 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2702 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2703 !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2704 !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2705 "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2707 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2708 (ins Int16Regs:$s1, Int16Regs:$s2,
2709 Int16Regs:$s3, Int16Regs:$s4),
2710 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2712 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2713 (ins Int8Regs:$s1, Int8Regs:$s2),
2714 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2715 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2716 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2717 "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2719 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2720 (ins Int16Regs:$s1, Int16Regs:$s2),
2721 "mov.b32\t$d, {{$s1, $s2}};",
2723 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2724 (ins Int32Regs:$s1, Int32Regs:$s2),
2725 "mov.b64\t$d, {{$s1, $s2}};",
2727 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2728 (ins Float32Regs:$s1, Float32Regs:$s2),
2729 "mov.b64\t$d, {{$s1, $s2}};",
2732 // unpack a larger int register to a set of smaller int registers
2733 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2734 Int8Regs:$d3, Int8Regs:$d4),
2736 !strconcat("{{\n\t.reg .b8\t%t<4>;",
2737 !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2738 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2739 !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2740 !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2741 "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2743 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2744 Int16Regs:$d3, Int16Regs:$d4),
2746 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2748 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2750 !strconcat("{{\n\t.reg .b8\t%t<2>;",
2751 !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2752 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2753 "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2755 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2757 "mov.b32\t{{$d1, $d2}}, $s;",
2759 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2761 "mov.b64\t{{$d1, $d2}}, $s;",
2763 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2764 (ins Float64Regs:$s),
2765 "mov.b64\t{{$d1, $d2}}, $s;",
2768 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2769 "cvt.rn.ftz.f32.f64 \t$d, $a;",
2770 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2772 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2773 "cvt.rn.f32.f64 \t$d, $a;",
2774 [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2776 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2777 "cvt.ftz.f64.f32 \t$d, $a;",
2778 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2780 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2781 "cvt.f64.f32 \t$d, $a;",
2782 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2784 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2785 [SDNPHasChain, SDNPOptInGlue]>;
2787 //-----------------------------------
2789 //-----------------------------------
2791 let isTerminator=1 in {
2792 let isReturn=1, isBarrier=1 in
2793 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2796 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2797 "@$a bra \t$target;",
2798 [(brcond Int1Regs:$a, bb:$target)]>;
2800 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2801 "@!$a bra \t$target;",
2804 let isBranch=1, isBarrier=1 in
2805 def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2806 "bra.uni \t$target;",
2810 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2811 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2813 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2814 // conditional branch if
2815 // the target block is the next block so that the code can fall through to the
2817 // The invertion is done by 'xor condition, 1', which will be translated to
2818 // (setne condition, -1).
2819 // Since ptx supports '@!pred bra target', we should use it.
2820 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2821 (CBranchOther Int1Regs:$a, bb:$target)>;
2824 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2825 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2826 SDTCisVT<1, i32> ]>;
2828 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2829 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2830 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2831 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2834 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2835 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2836 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2837 def calltarget : Operand<i32>;
2839 def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2840 "call \t$dst, (1);", []>;
2843 def : Pat<(call tglobaladdr:$dst),
2844 (CALL tglobaladdr:$dst)>;
2845 def : Pat<(call texternalsym:$dst),
2846 (CALL texternalsym:$dst)>;
2848 // Pseudo instructions.
2849 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2850 : NVPTXInst<outs, ins, asmstr, pattern>;
2852 // @TODO: We use some tricks here to emit curly braces. Can we clean this up
2853 // a bit without TableGen modifications?
2854 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2855 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2856 [(callseq_start timm:$amt)]>;
2857 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2858 "\n\t//{{\n\t}}// Callseq End $amt1",
2859 [(callseq_end timm:$amt1, timm:$amt2)]>;
2863 def trapinst : NVPTXInst<(outs), (ins),
2867 include "NVPTXIntrinsics.td"
2870 //-----------------------------------
2872 //-----------------------------------
2873 // BSWAP is currently expanded. The following is a more efficient
2874 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2875 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2876 // unpack). sm_20 supports native 32-bit register, but not native 16-bit